basic_tests.c revision 41687f09
13f012e29Smrg/* 23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc. 33f012e29Smrg * 43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a 53f012e29Smrg * copy of this software and associated documentation files (the "Software"), 63f012e29Smrg * to deal in the Software without restriction, including without limitation 73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the 93f012e29Smrg * Software is furnished to do so, subject to the following conditions: 103f012e29Smrg * 113f012e29Smrg * The above copyright notice and this permission notice shall be included in 123f012e29Smrg * all copies or substantial portions of the Software. 133f012e29Smrg * 143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE. 213f012e29Smrg * 223f012e29Smrg*/ 233f012e29Smrg 243f012e29Smrg#include <stdio.h> 253f012e29Smrg#include <stdlib.h> 263f012e29Smrg#include <unistd.h> 2788f8a8d2Smrg#include <sys/types.h> 2888f8a8d2Smrg#ifdef MAJOR_IN_SYSMACROS 2988f8a8d2Smrg#include <sys/sysmacros.h> 3088f8a8d2Smrg#endif 3188f8a8d2Smrg#include <sys/stat.h> 3288f8a8d2Smrg#include <fcntl.h> 339bd392adSmrg#if HAVE_ALLOCA_H 343f012e29Smrg# include <alloca.h> 353f012e29Smrg#endif 3600a23bdaSmrg#include <sys/wait.h> 373f012e29Smrg 383f012e29Smrg#include "CUnit/Basic.h" 393f012e29Smrg 403f012e29Smrg#include "amdgpu_test.h" 413f012e29Smrg#include "amdgpu_drm.h" 4241687f09Smrg#include "amdgpu_internal.h" 437cdc0497Smrg#include "util_math.h" 443f012e29Smrg 453f012e29Smrgstatic amdgpu_device_handle device_handle; 463f012e29Smrgstatic uint32_t major_version; 473f012e29Smrgstatic uint32_t minor_version; 48d8807b2fSmrgstatic uint32_t family_id; 493f012e29Smrg 503f012e29Smrgstatic void amdgpu_query_info_test(void); 513f012e29Smrgstatic void amdgpu_command_submission_gfx(void); 523f012e29Smrgstatic void amdgpu_command_submission_compute(void); 53d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void); 543f012e29Smrgstatic void amdgpu_command_submission_sdma(void); 553f012e29Smrgstatic void amdgpu_userptr_test(void); 563f012e29Smrgstatic void amdgpu_semaphore_test(void); 5700a23bdaSmrgstatic void amdgpu_sync_dependency_test(void); 5800a23bdaSmrgstatic void amdgpu_bo_eviction_test(void); 5988f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void); 6088f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void); 615324fb0dSmrgstatic void amdgpu_draw_test(void); 6288f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void); 633f012e29Smrg 643f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 653f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 663f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 6700a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 6800a23bdaSmrg unsigned ip_type, 6900a23bdaSmrg int instance, int pm4_dw, uint32_t *pm4_src, 7000a23bdaSmrg int res_cnt, amdgpu_bo_handle *resources, 7100a23bdaSmrg struct amdgpu_cs_ib_info *ib_info, 7200a23bdaSmrg struct amdgpu_cs_request *ibs_request); 7341687f09Smrg 743f012e29SmrgCU_TestInfo basic_tests[] = { 753f012e29Smrg { "Query Info Test", amdgpu_query_info_test }, 763f012e29Smrg { "Userptr Test", amdgpu_userptr_test }, 7700a23bdaSmrg { "bo eviction Test", amdgpu_bo_eviction_test }, 783f012e29Smrg { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 793f012e29Smrg { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 80d8807b2fSmrg { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 813f012e29Smrg { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 823f012e29Smrg { "SW semaphore Test", amdgpu_semaphore_test }, 8300a23bdaSmrg { "Sync dependency Test", amdgpu_sync_dependency_test }, 8488f8a8d2Smrg { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test }, 8588f8a8d2Smrg { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test }, 865324fb0dSmrg { "Draw Test", amdgpu_draw_test }, 8788f8a8d2Smrg { "GPU reset Test", amdgpu_gpu_reset_test }, 883f012e29Smrg CU_TEST_INFO_NULL, 893f012e29Smrg}; 909bd392adSmrg#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize())) 913f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0 923f012e29Smrg#define SDMA_PKT_HEADER_op_mask 0x000000FF 933f012e29Smrg#define SDMA_PKT_HEADER_op_shift 0 943f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 953f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL 11 963f012e29Smrg# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 973f012e29Smrg /* 0 = byte fill 983f012e29Smrg * 2 = DW fill 993f012e29Smrg */ 1003f012e29Smrg#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 1013f012e29Smrg (((sub_op) & 0xFF) << 8) | \ 1023f012e29Smrg (((op) & 0xFF) << 0)) 1033f012e29Smrg#define SDMA_OPCODE_WRITE 2 1043f012e29Smrg# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 1053f012e29Smrg# define SDMA_WRTIE_SUB_OPCODE_TILED 1 1063f012e29Smrg 1073f012e29Smrg#define SDMA_OPCODE_COPY 1 1083f012e29Smrg# define SDMA_COPY_SUB_OPCODE_LINEAR 0 1093f012e29Smrg 11041687f09Smrg#define SDMA_OPCODE_ATOMIC 10 11141687f09Smrg# define SDMA_ATOMIC_LOOP(x) ((x) << 0) 11241687f09Smrg /* 0 - single_pass_atomic. 11341687f09Smrg * 1 - loop_until_compare_satisfied. 11441687f09Smrg */ 11541687f09Smrg# define SDMA_ATOMIC_TMZ(x) ((x) << 2) 11641687f09Smrg /* 0 - non-TMZ. 11741687f09Smrg * 1 - TMZ. 11841687f09Smrg */ 11941687f09Smrg# define SDMA_ATOMIC_OPCODE(x) ((x) << 9) 12041687f09Smrg /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 12141687f09Smrg * same as Packet 3 12241687f09Smrg */ 12341687f09Smrg 1243f012e29Smrg#define GFX_COMPUTE_NOP 0xffff1000 1253f012e29Smrg#define SDMA_NOP 0x0 1263f012e29Smrg 1273f012e29Smrg/* PM4 */ 1283f012e29Smrg#define PACKET_TYPE0 0 1293f012e29Smrg#define PACKET_TYPE1 1 1303f012e29Smrg#define PACKET_TYPE2 2 1313f012e29Smrg#define PACKET_TYPE3 3 1323f012e29Smrg 1333f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 1343f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 1353f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 1363f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 1373f012e29Smrg#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 1383f012e29Smrg ((reg) & 0xFFFF) | \ 1393f012e29Smrg ((n) & 0x3FFF) << 16) 1403f012e29Smrg#define CP_PACKET2 0x80000000 1413f012e29Smrg#define PACKET2_PAD_SHIFT 0 1423f012e29Smrg#define PACKET2_PAD_MASK (0x3fffffff << 0) 1433f012e29Smrg 1443f012e29Smrg#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 1453f012e29Smrg 1463f012e29Smrg#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 1473f012e29Smrg (((op) & 0xFF) << 8) | \ 1483f012e29Smrg ((n) & 0x3FFF) << 16) 1495324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) 1503f012e29Smrg 1513f012e29Smrg/* Packet 3 types */ 1523f012e29Smrg#define PACKET3_NOP 0x10 1533f012e29Smrg 1543f012e29Smrg#define PACKET3_WRITE_DATA 0x37 1553f012e29Smrg#define WRITE_DATA_DST_SEL(x) ((x) << 8) 1563f012e29Smrg /* 0 - register 1573f012e29Smrg * 1 - memory (sync - via GRBM) 1583f012e29Smrg * 2 - gl2 1593f012e29Smrg * 3 - gds 1603f012e29Smrg * 4 - reserved 1613f012e29Smrg * 5 - memory (async - direct) 1623f012e29Smrg */ 1633f012e29Smrg#define WR_ONE_ADDR (1 << 16) 1643f012e29Smrg#define WR_CONFIRM (1 << 20) 1653f012e29Smrg#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 1663f012e29Smrg /* 0 - LRU 1673f012e29Smrg * 1 - Stream 1683f012e29Smrg */ 1693f012e29Smrg#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 1703f012e29Smrg /* 0 - me 1713f012e29Smrg * 1 - pfp 1723f012e29Smrg * 2 - ce 1733f012e29Smrg */ 1743f012e29Smrg 17541687f09Smrg#define PACKET3_ATOMIC_MEM 0x1E 17641687f09Smrg#define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 17741687f09Smrg#define ATOMIC_MEM_COMMAND(x) ((x) << 8) 17841687f09Smrg /* 0 - single_pass_atomic. 17941687f09Smrg * 1 - loop_until_compare_satisfied. 18041687f09Smrg */ 18141687f09Smrg#define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25) 18241687f09Smrg /* 0 - lru. 18341687f09Smrg * 1 - stream. 18441687f09Smrg */ 18541687f09Smrg#define ATOMIC_MEM_ENGINESEL(x) ((x) << 30) 18641687f09Smrg /* 0 - micro_engine. 18741687f09Smrg */ 18841687f09Smrg 1893f012e29Smrg#define PACKET3_DMA_DATA 0x50 1903f012e29Smrg/* 1. header 1913f012e29Smrg * 2. CONTROL 1923f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0] 1933f012e29Smrg * 4. SRC_ADDR_HI [31:0] 1943f012e29Smrg * 5. DST_ADDR_LO [31:0] 1953f012e29Smrg * 6. DST_ADDR_HI [7:0] 1963f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 1973f012e29Smrg */ 1983f012e29Smrg/* CONTROL */ 1993f012e29Smrg# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 2003f012e29Smrg /* 0 - ME 2013f012e29Smrg * 1 - PFP 2023f012e29Smrg */ 2033f012e29Smrg# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 2043f012e29Smrg /* 0 - LRU 2053f012e29Smrg * 1 - Stream 2063f012e29Smrg * 2 - Bypass 2073f012e29Smrg */ 2083f012e29Smrg# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 2093f012e29Smrg# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 2103f012e29Smrg /* 0 - DST_ADDR using DAS 2113f012e29Smrg * 1 - GDS 2123f012e29Smrg * 3 - DST_ADDR using L2 2133f012e29Smrg */ 2143f012e29Smrg# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 2153f012e29Smrg /* 0 - LRU 2163f012e29Smrg * 1 - Stream 2173f012e29Smrg * 2 - Bypass 2183f012e29Smrg */ 2193f012e29Smrg# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 2203f012e29Smrg# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 2213f012e29Smrg /* 0 - SRC_ADDR using SAS 2223f012e29Smrg * 1 - GDS 2233f012e29Smrg * 2 - DATA 2243f012e29Smrg * 3 - SRC_ADDR using L2 2253f012e29Smrg */ 2263f012e29Smrg# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 2273f012e29Smrg/* COMMAND */ 2283f012e29Smrg# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 2293f012e29Smrg# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 2303f012e29Smrg /* 0 - none 2313f012e29Smrg * 1 - 8 in 16 2323f012e29Smrg * 2 - 8 in 32 2333f012e29Smrg * 3 - 8 in 64 2343f012e29Smrg */ 2353f012e29Smrg# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 2363f012e29Smrg /* 0 - none 2373f012e29Smrg * 1 - 8 in 16 2383f012e29Smrg * 2 - 8 in 32 2393f012e29Smrg * 3 - 8 in 64 2403f012e29Smrg */ 2413f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 2423f012e29Smrg /* 0 - memory 2433f012e29Smrg * 1 - register 2443f012e29Smrg */ 2453f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 2463f012e29Smrg /* 0 - memory 2473f012e29Smrg * 1 - register 2483f012e29Smrg */ 2493f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 2503f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 2513f012e29Smrg# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 2523f012e29Smrg 253d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 254d8807b2fSmrg (((b) & 0x1) << 26) | \ 255d8807b2fSmrg (((t) & 0x1) << 23) | \ 256d8807b2fSmrg (((s) & 0x1) << 22) | \ 257d8807b2fSmrg (((cnt) & 0xFFFFF) << 0)) 258d8807b2fSmrg#define SDMA_OPCODE_COPY_SI 3 259d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI 13 260d8807b2fSmrg#define SDMA_NOP_SI 0xf 261d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000 262d8807b2fSmrg#define PACKET3_DMA_DATA_SI 0x41 263d8807b2fSmrg# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 264d8807b2fSmrg /* 0 - ME 265d8807b2fSmrg * 1 - PFP 266d8807b2fSmrg */ 267d8807b2fSmrg# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 268d8807b2fSmrg /* 0 - DST_ADDR using DAS 269d8807b2fSmrg * 1 - GDS 270d8807b2fSmrg * 3 - DST_ADDR using L2 271d8807b2fSmrg */ 272d8807b2fSmrg# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 273d8807b2fSmrg /* 0 - SRC_ADDR using SAS 274d8807b2fSmrg * 1 - GDS 275d8807b2fSmrg * 2 - DATA 276d8807b2fSmrg * 3 - SRC_ADDR using L2 277d8807b2fSmrg */ 278d8807b2fSmrg# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 279d8807b2fSmrg 28000a23bdaSmrg 28100a23bdaSmrg#define PKT3_CONTEXT_CONTROL 0x28 28200a23bdaSmrg#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 28300a23bdaSmrg#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 28400a23bdaSmrg#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 28500a23bdaSmrg 28600a23bdaSmrg#define PKT3_CLEAR_STATE 0x12 28700a23bdaSmrg 28800a23bdaSmrg#define PKT3_SET_SH_REG 0x76 28900a23bdaSmrg#define PACKET3_SET_SH_REG_START 0x00002c00 29000a23bdaSmrg 29100a23bdaSmrg#define PACKET3_DISPATCH_DIRECT 0x15 2925324fb0dSmrg#define PACKET3_EVENT_WRITE 0x46 2935324fb0dSmrg#define PACKET3_ACQUIRE_MEM 0x58 2945324fb0dSmrg#define PACKET3_SET_CONTEXT_REG 0x69 2955324fb0dSmrg#define PACKET3_SET_UCONFIG_REG 0x79 2965324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO 0x2D 29700a23bdaSmrg/* gfx 8 */ 29800a23bdaSmrg#define mmCOMPUTE_PGM_LO 0x2e0c 29900a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1 0x2e12 30000a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE 0x2e18 30100a23bdaSmrg#define mmCOMPUTE_USER_DATA_0 0x2e40 30200a23bdaSmrg#define mmCOMPUTE_USER_DATA_1 0x2e41 30300a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 30400a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X 0x2e07 30500a23bdaSmrg 30600a23bdaSmrg 30700a23bdaSmrg 30800a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 30900a23bdaSmrg ((num & 0x0000ff00) << 8) | \ 31000a23bdaSmrg ((num & 0x00ff0000) >> 8) | \ 31100a23bdaSmrg ((num & 0x000000ff) << 24)) 31200a23bdaSmrg 31300a23bdaSmrg 31400a23bdaSmrg/* Shader code 31500a23bdaSmrg * void main() 31600a23bdaSmrg{ 31700a23bdaSmrg 31800a23bdaSmrg float x = some_input; 31900a23bdaSmrg for (unsigned i = 0; i < 1000000; i++) 32000a23bdaSmrg x = sin(x); 32100a23bdaSmrg 32200a23bdaSmrg u[0] = 42u; 32300a23bdaSmrg} 32400a23bdaSmrg*/ 32500a23bdaSmrg 32600a23bdaSmrgstatic uint32_t shader_bin[] = { 32700a23bdaSmrg SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 32800a23bdaSmrg SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 32900a23bdaSmrg SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 33000a23bdaSmrg SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 33100a23bdaSmrg}; 33200a23bdaSmrg 33300a23bdaSmrg#define CODE_OFFSET 512 33400a23bdaSmrg#define DATA_OFFSET 1024 33500a23bdaSmrg 3365324fb0dSmrgenum cs_type { 3375324fb0dSmrg CS_BUFFERCLEAR, 3389bd392adSmrg CS_BUFFERCOPY, 3399bd392adSmrg CS_HANG, 3409bd392adSmrg CS_HANG_SLOW 3415324fb0dSmrg}; 3425324fb0dSmrg 3435324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = { 3445324fb0dSmrg 0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205, 3455324fb0dSmrg 0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100, 3465324fb0dSmrg 0xBF810000 3475324fb0dSmrg}; 3485324fb0dSmrg 3495324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { 3505324fb0dSmrg {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, 3515324fb0dSmrg {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, 3525324fb0dSmrg {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, 3535324fb0dSmrg {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, 3545324fb0dSmrg {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } 3555324fb0dSmrg}; 3565324fb0dSmrg 3575324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5; 3585324fb0dSmrg 3595324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = { 3605324fb0dSmrg 0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100, 3615324fb0dSmrg 0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000 3625324fb0dSmrg}; 3635324fb0dSmrg 3645324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = { 3655324fb0dSmrg 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 3665324fb0dSmrg 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 3675324fb0dSmrg 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 3685324fb0dSmrg 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 3695324fb0dSmrg 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 3705324fb0dSmrg 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 3715324fb0dSmrg 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 3725324fb0dSmrg 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 37388f8a8d2Smrg 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20, 3745324fb0dSmrg 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 3755324fb0dSmrg 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, 3765324fb0dSmrg 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 3775324fb0dSmrg 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 3785324fb0dSmrg 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 3795324fb0dSmrg 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 38088f8a8d2Smrg 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 38188f8a8d2Smrg 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 3825324fb0dSmrg 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, 3835324fb0dSmrg 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 3845324fb0dSmrg 0xc0017900, 0x24b, 0x0 3855324fb0dSmrg}; 3865324fb0dSmrg 3875324fb0dSmrgenum ps_type { 3885324fb0dSmrg PS_CONST, 3899bd392adSmrg PS_TEX, 3909bd392adSmrg PS_HANG, 3919bd392adSmrg PS_HANG_SLOW 3925324fb0dSmrg}; 3935324fb0dSmrg 3945324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = { 3955324fb0dSmrg 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 3965324fb0dSmrg 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 3975324fb0dSmrg 0xC4001C0F, 0x00000100, 0xBF810000 3985324fb0dSmrg}; 3995324fb0dSmrg 4005324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; 4015324fb0dSmrg 4025324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { 4035324fb0dSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 4045324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, 4055324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, 4065324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, 4075324fb0dSmrg { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4085324fb0dSmrg { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4095324fb0dSmrg { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4105324fb0dSmrg { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4115324fb0dSmrg { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, 4125324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } 4135324fb0dSmrg } 4145324fb0dSmrg}; 4155324fb0dSmrg 4165324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { 4175324fb0dSmrg 0x00000004 4185324fb0dSmrg}; 4195324fb0dSmrg 4205324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2; 4215324fb0dSmrg 4225324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = { 4235324fb0dSmrg {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, 4245324fb0dSmrg {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 4255324fb0dSmrg}; 4265324fb0dSmrg 4275324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7; 4285324fb0dSmrg 4295324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = { 4305324fb0dSmrg {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 4315324fb0dSmrg {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, 4325324fb0dSmrg {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 4335324fb0dSmrg {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 4345324fb0dSmrg {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 4355324fb0dSmrg {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 4365324fb0dSmrg {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 4375324fb0dSmrg}; 4385324fb0dSmrg 4395324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = { 4405324fb0dSmrg 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 4415324fb0dSmrg 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, 4425324fb0dSmrg 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, 4435324fb0dSmrg 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 4445324fb0dSmrg 0x00000100, 0xBF810000 4455324fb0dSmrg}; 4465324fb0dSmrg 4475324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { 4485324fb0dSmrg 0x0000000B 4495324fb0dSmrg}; 4505324fb0dSmrg 4515324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; 4525324fb0dSmrg 4535324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { 4545324fb0dSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 4555324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, 4565324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, 4575324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, 4585324fb0dSmrg { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4595324fb0dSmrg { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4605324fb0dSmrg { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4615324fb0dSmrg { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4625324fb0dSmrg { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4635324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } 4645324fb0dSmrg } 4655324fb0dSmrg}; 4665324fb0dSmrg 4675324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = { 4685324fb0dSmrg {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, 4695324fb0dSmrg {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } 4705324fb0dSmrg}; 4715324fb0dSmrg 4725324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = { 4735324fb0dSmrg {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 4745324fb0dSmrg {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, 4755324fb0dSmrg {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 4765324fb0dSmrg {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 4775324fb0dSmrg {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 4785324fb0dSmrg {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 4795324fb0dSmrg {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 4805324fb0dSmrg}; 4815324fb0dSmrg 4825324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 4835324fb0dSmrg 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 4845324fb0dSmrg 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, 4855324fb0dSmrg 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, 4865324fb0dSmrg 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, 4875324fb0dSmrg 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, 4885324fb0dSmrg 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, 4895324fb0dSmrg 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, 4905324fb0dSmrg 0xC400020F, 0x05060403, 0xBF810000 4915324fb0dSmrg}; 4925324fb0dSmrg 4935324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = { 4945324fb0dSmrg 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 4955324fb0dSmrg 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 4965324fb0dSmrg 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 4979bd392adSmrg 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12, 4985324fb0dSmrg 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 4995324fb0dSmrg 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 5005324fb0dSmrg 0xc0026900, 0x292, 0x20, 0x60201b8, 5015324fb0dSmrg 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 5025324fb0dSmrg}; 50300a23bdaSmrg 5049bd392adSmrgunsigned int memcpy_ps_hang[] = { 5059bd392adSmrg 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100, 5069bd392adSmrg 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001, 5079bd392adSmrg 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002, 5089bd392adSmrg 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000, 5099bd392adSmrg 0xF800180F, 0x03020100, 0xBF810000 5109bd392adSmrg}; 5119bd392adSmrg 5129bd392adSmrgstruct amdgpu_test_shader { 5139bd392adSmrg uint32_t *shader; 5149bd392adSmrg uint32_t header_length; 5159bd392adSmrg uint32_t body_length; 5169bd392adSmrg uint32_t foot_length; 5179bd392adSmrg}; 5189bd392adSmrg 5199bd392adSmrgunsigned int memcpy_cs_hang_slow_ai_codes[] = { 5209bd392adSmrg 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100, 5219bd392adSmrg 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000 5229bd392adSmrg}; 5239bd392adSmrg 5249bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_ai = { 5259bd392adSmrg memcpy_cs_hang_slow_ai_codes, 5269bd392adSmrg 4, 5279bd392adSmrg 3, 5289bd392adSmrg 1 5299bd392adSmrg}; 5309bd392adSmrg 5319bd392adSmrgunsigned int memcpy_cs_hang_slow_rv_codes[] = { 5329bd392adSmrg 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100, 5339bd392adSmrg 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000 5349bd392adSmrg}; 5359bd392adSmrg 5369bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_rv = { 5379bd392adSmrg memcpy_cs_hang_slow_rv_codes, 5389bd392adSmrg 4, 5399bd392adSmrg 3, 5409bd392adSmrg 1 5419bd392adSmrg}; 5429bd392adSmrg 5439bd392adSmrgunsigned int memcpy_ps_hang_slow_ai_codes[] = { 5449bd392adSmrg 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000, 5459bd392adSmrg 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00, 5469bd392adSmrg 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000, 5479bd392adSmrg 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f, 5489bd392adSmrg 0x03020100, 0xbf810000 5499bd392adSmrg}; 5509bd392adSmrg 5519bd392adSmrgstruct amdgpu_test_shader memcpy_ps_hang_slow_ai = { 5529bd392adSmrg memcpy_ps_hang_slow_ai_codes, 5539bd392adSmrg 7, 5549bd392adSmrg 2, 5559bd392adSmrg 9 5569bd392adSmrg}; 5579bd392adSmrg 5587cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, 5597cdc0497Smrg unsigned alignment, unsigned heap, uint64_t alloc_flags, 5607cdc0497Smrg uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, 5617cdc0497Smrg uint64_t *mc_address, 5627cdc0497Smrg amdgpu_va_handle *va_handle) 5637cdc0497Smrg{ 5647cdc0497Smrg struct amdgpu_bo_alloc_request request = {}; 5657cdc0497Smrg amdgpu_bo_handle buf_handle; 5667cdc0497Smrg amdgpu_va_handle handle; 5677cdc0497Smrg uint64_t vmc_addr; 5687cdc0497Smrg int r; 5697cdc0497Smrg 5707cdc0497Smrg request.alloc_size = size; 5717cdc0497Smrg request.phys_alignment = alignment; 5727cdc0497Smrg request.preferred_heap = heap; 5737cdc0497Smrg request.flags = alloc_flags; 5747cdc0497Smrg 5757cdc0497Smrg r = amdgpu_bo_alloc(dev, &request, &buf_handle); 5767cdc0497Smrg if (r) 5777cdc0497Smrg return r; 5787cdc0497Smrg 5797cdc0497Smrg r = amdgpu_va_range_alloc(dev, 5807cdc0497Smrg amdgpu_gpu_va_range_general, 5817cdc0497Smrg size, alignment, 0, &vmc_addr, 5827cdc0497Smrg &handle, 0); 5837cdc0497Smrg if (r) 5847cdc0497Smrg goto error_va_alloc; 5857cdc0497Smrg 5867cdc0497Smrg r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr, 5877cdc0497Smrg AMDGPU_VM_PAGE_READABLE | 5887cdc0497Smrg AMDGPU_VM_PAGE_WRITEABLE | 5897cdc0497Smrg AMDGPU_VM_PAGE_EXECUTABLE | 5907cdc0497Smrg mapping_flags, 5917cdc0497Smrg AMDGPU_VA_OP_MAP); 5927cdc0497Smrg if (r) 5937cdc0497Smrg goto error_va_map; 5947cdc0497Smrg 5957cdc0497Smrg r = amdgpu_bo_cpu_map(buf_handle, cpu); 5967cdc0497Smrg if (r) 5977cdc0497Smrg goto error_cpu_map; 5987cdc0497Smrg 5997cdc0497Smrg *bo = buf_handle; 6007cdc0497Smrg *mc_address = vmc_addr; 6017cdc0497Smrg *va_handle = handle; 6027cdc0497Smrg 6037cdc0497Smrg return 0; 6047cdc0497Smrg 6057cdc0497Smrg error_cpu_map: 6067cdc0497Smrg amdgpu_bo_cpu_unmap(buf_handle); 6077cdc0497Smrg 6087cdc0497Smrg error_va_map: 6097cdc0497Smrg amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 6107cdc0497Smrg 6117cdc0497Smrg error_va_alloc: 6127cdc0497Smrg amdgpu_bo_free(buf_handle); 6137cdc0497Smrg return r; 6147cdc0497Smrg} 6157cdc0497Smrg 6167cdc0497Smrg 6177cdc0497Smrg 61841687f09SmrgCU_BOOL suite_basic_tests_enable(void) 61941687f09Smrg{ 62041687f09Smrg uint32_t asic_id; 62141687f09Smrg 62241687f09Smrg if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, 62341687f09Smrg &minor_version, &device_handle)) 62441687f09Smrg return CU_FALSE; 62541687f09Smrg 62641687f09Smrg asic_id = device_handle->info.asic_id; 62741687f09Smrg 62841687f09Smrg if (amdgpu_device_deinitialize(device_handle)) 62941687f09Smrg return CU_FALSE; 63041687f09Smrg 63141687f09Smrg /* disable gfx engine basic test cases for Arturus due to no CPG */ 63241687f09Smrg if (asic_is_arcturus(asic_id)) { 63341687f09Smrg if (amdgpu_set_test_active("Basic Tests", 63441687f09Smrg "Command submission Test (GFX)", 63541687f09Smrg CU_FALSE)) 63641687f09Smrg fprintf(stderr, "test deactivation failed - %s\n", 63741687f09Smrg CU_get_error_msg()); 63841687f09Smrg 63941687f09Smrg if (amdgpu_set_test_active("Basic Tests", 64041687f09Smrg "Command submission Test (Multi-Fence)", 64141687f09Smrg CU_FALSE)) 64241687f09Smrg fprintf(stderr, "test deactivation failed - %s\n", 64341687f09Smrg CU_get_error_msg()); 64441687f09Smrg 64541687f09Smrg if (amdgpu_set_test_active("Basic Tests", 64641687f09Smrg "Sync dependency Test", 64741687f09Smrg CU_FALSE)) 64841687f09Smrg fprintf(stderr, "test deactivation failed - %s\n", 64941687f09Smrg CU_get_error_msg()); 65041687f09Smrg } 65141687f09Smrg 65241687f09Smrg return CU_TRUE; 65341687f09Smrg} 65441687f09Smrg 6553f012e29Smrgint suite_basic_tests_init(void) 6563f012e29Smrg{ 657d8807b2fSmrg struct amdgpu_gpu_info gpu_info = {0}; 6583f012e29Smrg int r; 6593f012e29Smrg 6603f012e29Smrg r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 6613f012e29Smrg &minor_version, &device_handle); 6623f012e29Smrg 663d8807b2fSmrg if (r) { 664037b3c26Smrg if ((r == -EACCES) && (errno == EACCES)) 665037b3c26Smrg printf("\n\nError:%s. " 666037b3c26Smrg "Hint:Try to run this test program as root.", 667037b3c26Smrg strerror(errno)); 6683f012e29Smrg return CUE_SINIT_FAILED; 669037b3c26Smrg } 670d8807b2fSmrg 671d8807b2fSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 672d8807b2fSmrg if (r) 673d8807b2fSmrg return CUE_SINIT_FAILED; 674d8807b2fSmrg 675d8807b2fSmrg family_id = gpu_info.family_id; 676d8807b2fSmrg 677d8807b2fSmrg return CUE_SUCCESS; 6783f012e29Smrg} 6793f012e29Smrg 6803f012e29Smrgint suite_basic_tests_clean(void) 6813f012e29Smrg{ 6823f012e29Smrg int r = amdgpu_device_deinitialize(device_handle); 6833f012e29Smrg 6843f012e29Smrg if (r == 0) 6853f012e29Smrg return CUE_SUCCESS; 6863f012e29Smrg else 6873f012e29Smrg return CUE_SCLEAN_FAILED; 6883f012e29Smrg} 6893f012e29Smrg 6903f012e29Smrgstatic void amdgpu_query_info_test(void) 6913f012e29Smrg{ 6923f012e29Smrg struct amdgpu_gpu_info gpu_info = {0}; 6933f012e29Smrg uint32_t version, feature; 6943f012e29Smrg int r; 6953f012e29Smrg 6963f012e29Smrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 6973f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6983f012e29Smrg 6993f012e29Smrg r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 7003f012e29Smrg 0, &version, &feature); 7013f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7023f012e29Smrg} 7033f012e29Smrg 7043f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void) 7053f012e29Smrg{ 7063f012e29Smrg amdgpu_context_handle context_handle; 7073f012e29Smrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 7083f012e29Smrg void *ib_result_cpu, *ib_result_ce_cpu; 7093f012e29Smrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 7103f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 7113f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 7123f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 7133f012e29Smrg uint32_t *ptr; 7143f012e29Smrg uint32_t expired; 7153f012e29Smrg amdgpu_bo_list_handle bo_list; 7163f012e29Smrg amdgpu_va_handle va_handle, va_handle_ce; 717d8807b2fSmrg int r, i = 0; 7183f012e29Smrg 7193f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 7203f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7213f012e29Smrg 7223f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 7233f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 7243f012e29Smrg &ib_result_handle, &ib_result_cpu, 7253f012e29Smrg &ib_result_mc_address, &va_handle); 7263f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7273f012e29Smrg 7283f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 7293f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 7303f012e29Smrg &ib_result_ce_handle, &ib_result_ce_cpu, 7313f012e29Smrg &ib_result_ce_mc_address, &va_handle_ce); 7323f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7333f012e29Smrg 7343f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 7353f012e29Smrg ib_result_ce_handle, &bo_list); 7363f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7373f012e29Smrg 7383f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 7393f012e29Smrg 7403f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 7413f012e29Smrg ptr = ib_result_ce_cpu; 742d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 743d8807b2fSmrg ptr[i++] = 0xc0008900; 744d8807b2fSmrg ptr[i++] = 0; 745d8807b2fSmrg } 746d8807b2fSmrg ptr[i++] = 0xc0008400; 747d8807b2fSmrg ptr[i++] = 1; 7483f012e29Smrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 749d8807b2fSmrg ib_info[0].size = i; 7503f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 7513f012e29Smrg 7523f012e29Smrg /* IT_WAIT_ON_CE_COUNTER */ 7533f012e29Smrg ptr = ib_result_cpu; 7543f012e29Smrg ptr[0] = 0xc0008600; 7553f012e29Smrg ptr[1] = 0x00000001; 7563f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address; 7573f012e29Smrg ib_info[1].size = 2; 7583f012e29Smrg 7593f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 7603f012e29Smrg ibs_request.number_of_ibs = 2; 7613f012e29Smrg ibs_request.ibs = ib_info; 7623f012e29Smrg ibs_request.resources = bo_list; 7633f012e29Smrg ibs_request.fence_info.handle = NULL; 7643f012e29Smrg 7653f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 7663f012e29Smrg 7673f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7683f012e29Smrg 7693f012e29Smrg fence_status.context = context_handle; 7703f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 7713f012e29Smrg fence_status.ip_instance = 0; 7723f012e29Smrg fence_status.fence = ibs_request.seq_no; 7733f012e29Smrg 7743f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 7753f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 7763f012e29Smrg 0, &expired); 7773f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7783f012e29Smrg 7793f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 7803f012e29Smrg ib_result_mc_address, 4096); 7813f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7823f012e29Smrg 7833f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 7843f012e29Smrg ib_result_ce_mc_address, 4096); 7853f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7863f012e29Smrg 7873f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 7883f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7893f012e29Smrg 7903f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 7913f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7923f012e29Smrg 7933f012e29Smrg} 7943f012e29Smrg 7953f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void) 7963f012e29Smrg{ 7973f012e29Smrg amdgpu_context_handle context_handle; 7983f012e29Smrg amdgpu_bo_handle ib_result_handle; 7993f012e29Smrg void *ib_result_cpu; 8003f012e29Smrg uint64_t ib_result_mc_address; 8013f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 8023f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 8033f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 8043f012e29Smrg uint32_t *ptr; 8053f012e29Smrg uint32_t expired; 8063f012e29Smrg amdgpu_bo_list_handle bo_list; 8073f012e29Smrg amdgpu_va_handle va_handle; 808d8807b2fSmrg int r, i = 0; 8093f012e29Smrg 8103f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 8113f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8123f012e29Smrg 8133f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 8143f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 8153f012e29Smrg &ib_result_handle, &ib_result_cpu, 8163f012e29Smrg &ib_result_mc_address, &va_handle); 8173f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8183f012e29Smrg 8193f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 8203f012e29Smrg &bo_list); 8213f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8223f012e29Smrg 8233f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 8243f012e29Smrg 8253f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 8263f012e29Smrg ptr = ib_result_cpu; 827d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 828d8807b2fSmrg ptr[i++] = 0xc0008900; 829d8807b2fSmrg ptr[i++] = 0; 830d8807b2fSmrg } 831d8807b2fSmrg ptr[i++] = 0xc0008400; 832d8807b2fSmrg ptr[i++] = 1; 8333f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address; 834d8807b2fSmrg ib_info[0].size = i; 8353f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 8363f012e29Smrg 8373f012e29Smrg ptr = (uint32_t *)ib_result_cpu + 4; 8383f012e29Smrg ptr[0] = 0xc0008600; 8393f012e29Smrg ptr[1] = 0x00000001; 8403f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address + 16; 8413f012e29Smrg ib_info[1].size = 2; 8423f012e29Smrg 8433f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 8443f012e29Smrg ibs_request.number_of_ibs = 2; 8453f012e29Smrg ibs_request.ibs = ib_info; 8463f012e29Smrg ibs_request.resources = bo_list; 8473f012e29Smrg ibs_request.fence_info.handle = NULL; 8483f012e29Smrg 8493f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 8503f012e29Smrg 8513f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8523f012e29Smrg 8533f012e29Smrg fence_status.context = context_handle; 8543f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 8553f012e29Smrg fence_status.ip_instance = 0; 8563f012e29Smrg fence_status.fence = ibs_request.seq_no; 8573f012e29Smrg 8583f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 8593f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 8603f012e29Smrg 0, &expired); 8613f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8623f012e29Smrg 8633f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 8643f012e29Smrg ib_result_mc_address, 4096); 8653f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8663f012e29Smrg 8673f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 8683f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8693f012e29Smrg 8703f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 8713f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8723f012e29Smrg} 8733f012e29Smrg 8743f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void) 8753f012e29Smrg{ 8763f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 8773f012e29Smrg} 8783f012e29Smrg 8793f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void) 8803f012e29Smrg{ 8813f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 8823f012e29Smrg} 8833f012e29Smrg 8843f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void) 8853f012e29Smrg{ 8863f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 8873f012e29Smrg} 8883f012e29Smrg 88900a23bdaSmrgstatic void amdgpu_bo_eviction_test(void) 89000a23bdaSmrg{ 89100a23bdaSmrg const int sdma_write_length = 1024; 89200a23bdaSmrg const int pm4_dw = 256; 89300a23bdaSmrg amdgpu_context_handle context_handle; 89400a23bdaSmrg amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 89500a23bdaSmrg amdgpu_bo_handle *resources; 89600a23bdaSmrg uint32_t *pm4; 89700a23bdaSmrg struct amdgpu_cs_ib_info *ib_info; 89800a23bdaSmrg struct amdgpu_cs_request *ibs_request; 89900a23bdaSmrg uint64_t bo1_mc, bo2_mc; 90000a23bdaSmrg volatile unsigned char *bo1_cpu, *bo2_cpu; 90100a23bdaSmrg int i, j, r, loop1, loop2; 90200a23bdaSmrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 90300a23bdaSmrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 90400a23bdaSmrg struct amdgpu_heap_info vram_info, gtt_info; 90500a23bdaSmrg 90600a23bdaSmrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 90700a23bdaSmrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 90800a23bdaSmrg 90900a23bdaSmrg ib_info = calloc(1, sizeof(*ib_info)); 91000a23bdaSmrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 91100a23bdaSmrg 91200a23bdaSmrg ibs_request = calloc(1, sizeof(*ibs_request)); 91300a23bdaSmrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 91400a23bdaSmrg 91500a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 91600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 91700a23bdaSmrg 91800a23bdaSmrg /* prepare resource */ 91900a23bdaSmrg resources = calloc(4, sizeof(amdgpu_bo_handle)); 92000a23bdaSmrg CU_ASSERT_NOT_EQUAL(resources, NULL); 92100a23bdaSmrg 92200a23bdaSmrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 92300a23bdaSmrg 0, &vram_info); 92400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 92500a23bdaSmrg 92641687f09Smrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 92741687f09Smrg 0, >t_info); 92841687f09Smrg CU_ASSERT_EQUAL(r, 0); 92941687f09Smrg 93041687f09Smrg if (vram_info.max_allocation > gtt_info.heap_size/3) { 93141687f09Smrg vram_info.max_allocation = gtt_info.heap_size/3; 93241687f09Smrg gtt_info.max_allocation = vram_info.max_allocation; 93341687f09Smrg } 93441687f09Smrg 93500a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 93600a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 93700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 93800a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 93900a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 94000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 94100a23bdaSmrg 94200a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 94300a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 94400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 94500a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 94600a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 94700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 94800a23bdaSmrg 94900a23bdaSmrg 95000a23bdaSmrg 95100a23bdaSmrg loop1 = loop2 = 0; 95200a23bdaSmrg /* run 9 circle to test all mapping combination */ 95300a23bdaSmrg while(loop1 < 2) { 95400a23bdaSmrg while(loop2 < 2) { 95500a23bdaSmrg /* allocate UC bo1for sDMA use */ 95600a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 95700a23bdaSmrg sdma_write_length, 4096, 95800a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 95900a23bdaSmrg gtt_flags[loop1], &bo1, 96000a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 96100a23bdaSmrg &bo1_va_handle); 96200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 96300a23bdaSmrg 96400a23bdaSmrg /* set bo1 */ 96500a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 96600a23bdaSmrg 96700a23bdaSmrg /* allocate UC bo2 for sDMA use */ 96800a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 96900a23bdaSmrg sdma_write_length, 4096, 97000a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 97100a23bdaSmrg gtt_flags[loop2], &bo2, 97200a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 97300a23bdaSmrg &bo2_va_handle); 97400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 97500a23bdaSmrg 97600a23bdaSmrg /* clear bo2 */ 97700a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 97800a23bdaSmrg 97900a23bdaSmrg resources[0] = bo1; 98000a23bdaSmrg resources[1] = bo2; 98100a23bdaSmrg resources[2] = vram_max[loop2]; 98200a23bdaSmrg resources[3] = gtt_max[loop2]; 98300a23bdaSmrg 98400a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 98500a23bdaSmrg i = j = 0; 98600a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 98700a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 98800a23bdaSmrg sdma_write_length); 98900a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 99000a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 99100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 99200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 99300a23bdaSmrg } else { 99400a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 99500a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 99600a23bdaSmrg pm4[i++] = sdma_write_length - 1; 99700a23bdaSmrg else 99800a23bdaSmrg pm4[i++] = sdma_write_length; 99900a23bdaSmrg pm4[i++] = 0; 100000a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 100100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 100200a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 100300a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 100400a23bdaSmrg } 100500a23bdaSmrg 100600a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 100700a23bdaSmrg AMDGPU_HW_IP_DMA, 0, 100800a23bdaSmrg i, pm4, 100900a23bdaSmrg 4, resources, 101000a23bdaSmrg ib_info, ibs_request); 101100a23bdaSmrg 101200a23bdaSmrg /* verify if SDMA test result meets with expected */ 101300a23bdaSmrg i = 0; 101400a23bdaSmrg while(i < sdma_write_length) { 101500a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 101600a23bdaSmrg } 101700a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 101800a23bdaSmrg sdma_write_length); 101900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 102000a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 102100a23bdaSmrg sdma_write_length); 102200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 102300a23bdaSmrg loop2++; 102400a23bdaSmrg } 102500a23bdaSmrg loop2 = 0; 102600a23bdaSmrg loop1++; 102700a23bdaSmrg } 102800a23bdaSmrg amdgpu_bo_free(vram_max[0]); 102900a23bdaSmrg amdgpu_bo_free(vram_max[1]); 103000a23bdaSmrg amdgpu_bo_free(gtt_max[0]); 103100a23bdaSmrg amdgpu_bo_free(gtt_max[1]); 103200a23bdaSmrg /* clean resources */ 103300a23bdaSmrg free(resources); 103400a23bdaSmrg free(ibs_request); 103500a23bdaSmrg free(ib_info); 103600a23bdaSmrg free(pm4); 103700a23bdaSmrg 103800a23bdaSmrg /* end of test */ 103900a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle); 104000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 104100a23bdaSmrg} 104200a23bdaSmrg 104300a23bdaSmrg 10443f012e29Smrgstatic void amdgpu_command_submission_gfx(void) 10453f012e29Smrg{ 10463f012e29Smrg /* write data using the CP */ 10473f012e29Smrg amdgpu_command_submission_gfx_cp_write_data(); 10483f012e29Smrg /* const fill using the CP */ 10493f012e29Smrg amdgpu_command_submission_gfx_cp_const_fill(); 10503f012e29Smrg /* copy data using the CP */ 10513f012e29Smrg amdgpu_command_submission_gfx_cp_copy_data(); 10523f012e29Smrg /* separate IB buffers for multi-IB submission */ 10533f012e29Smrg amdgpu_command_submission_gfx_separate_ibs(); 10543f012e29Smrg /* shared IB buffer for multi-IB submission */ 10553f012e29Smrg amdgpu_command_submission_gfx_shared_ib(); 10563f012e29Smrg} 10573f012e29Smrg 10583f012e29Smrgstatic void amdgpu_semaphore_test(void) 10593f012e29Smrg{ 10603f012e29Smrg amdgpu_context_handle context_handle[2]; 10613f012e29Smrg amdgpu_semaphore_handle sem; 10623f012e29Smrg amdgpu_bo_handle ib_result_handle[2]; 10633f012e29Smrg void *ib_result_cpu[2]; 10643f012e29Smrg uint64_t ib_result_mc_address[2]; 10653f012e29Smrg struct amdgpu_cs_request ibs_request[2] = {0}; 10663f012e29Smrg struct amdgpu_cs_ib_info ib_info[2] = {0}; 10673f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 10683f012e29Smrg uint32_t *ptr; 10693f012e29Smrg uint32_t expired; 1070d8807b2fSmrg uint32_t sdma_nop, gfx_nop; 10713f012e29Smrg amdgpu_bo_list_handle bo_list[2]; 10723f012e29Smrg amdgpu_va_handle va_handle[2]; 10733f012e29Smrg int r, i; 10743f012e29Smrg 1075d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) { 1076d8807b2fSmrg sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 1077d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP_SI; 1078d8807b2fSmrg } else { 1079d8807b2fSmrg sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 1080d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP; 1081d8807b2fSmrg } 1082d8807b2fSmrg 10833f012e29Smrg r = amdgpu_cs_create_semaphore(&sem); 10843f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10853f012e29Smrg for (i = 0; i < 2; i++) { 10863f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 10873f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10883f012e29Smrg 10893f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 10903f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 10913f012e29Smrg &ib_result_handle[i], &ib_result_cpu[i], 10923f012e29Smrg &ib_result_mc_address[i], &va_handle[i]); 10933f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10943f012e29Smrg 10953f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 10963f012e29Smrg NULL, &bo_list[i]); 10973f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10983f012e29Smrg } 10993f012e29Smrg 11003f012e29Smrg /* 1. same context different engine */ 11013f012e29Smrg ptr = ib_result_cpu[0]; 1102d8807b2fSmrg ptr[0] = sdma_nop; 11033f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 11043f012e29Smrg ib_info[0].size = 1; 11053f012e29Smrg 11063f012e29Smrg ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 11073f012e29Smrg ibs_request[0].number_of_ibs = 1; 11083f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 11093f012e29Smrg ibs_request[0].resources = bo_list[0]; 11103f012e29Smrg ibs_request[0].fence_info.handle = NULL; 11113f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 11123f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11133f012e29Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 11143f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11153f012e29Smrg 11163f012e29Smrg r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 11173f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11183f012e29Smrg ptr = ib_result_cpu[1]; 1119d8807b2fSmrg ptr[0] = gfx_nop; 11203f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 11213f012e29Smrg ib_info[1].size = 1; 11223f012e29Smrg 11233f012e29Smrg ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 11243f012e29Smrg ibs_request[1].number_of_ibs = 1; 11253f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 11263f012e29Smrg ibs_request[1].resources = bo_list[1]; 11273f012e29Smrg ibs_request[1].fence_info.handle = NULL; 11283f012e29Smrg 11293f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 11303f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11313f012e29Smrg 11323f012e29Smrg fence_status.context = context_handle[0]; 11333f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 11343f012e29Smrg fence_status.ip_instance = 0; 11353f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 11363f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 11373f012e29Smrg 500000000, 0, &expired); 11383f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11393f012e29Smrg CU_ASSERT_EQUAL(expired, true); 11403f012e29Smrg 11413f012e29Smrg /* 2. same engine different context */ 11423f012e29Smrg ptr = ib_result_cpu[0]; 1143d8807b2fSmrg ptr[0] = gfx_nop; 11443f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 11453f012e29Smrg ib_info[0].size = 1; 11463f012e29Smrg 11473f012e29Smrg ibs_request[0].ip_type = AMDGPU_HW_IP_GFX; 11483f012e29Smrg ibs_request[0].number_of_ibs = 1; 11493f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 11503f012e29Smrg ibs_request[0].resources = bo_list[0]; 11513f012e29Smrg ibs_request[0].fence_info.handle = NULL; 11523f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 11533f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11543f012e29Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 11553f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11563f012e29Smrg 11573f012e29Smrg r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem); 11583f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11593f012e29Smrg ptr = ib_result_cpu[1]; 1160d8807b2fSmrg ptr[0] = gfx_nop; 11613f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 11623f012e29Smrg ib_info[1].size = 1; 11633f012e29Smrg 11643f012e29Smrg ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 11653f012e29Smrg ibs_request[1].number_of_ibs = 1; 11663f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 11673f012e29Smrg ibs_request[1].resources = bo_list[1]; 11683f012e29Smrg ibs_request[1].fence_info.handle = NULL; 11693f012e29Smrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 11703f012e29Smrg 11713f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11723f012e29Smrg 11733f012e29Smrg fence_status.context = context_handle[1]; 11743f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 11753f012e29Smrg fence_status.ip_instance = 0; 11763f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 11773f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 11783f012e29Smrg 500000000, 0, &expired); 11793f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11803f012e29Smrg CU_ASSERT_EQUAL(expired, true); 1181d8807b2fSmrg 11823f012e29Smrg for (i = 0; i < 2; i++) { 11833f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 11843f012e29Smrg ib_result_mc_address[i], 4096); 11853f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11863f012e29Smrg 11873f012e29Smrg r = amdgpu_bo_list_destroy(bo_list[i]); 11883f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11893f012e29Smrg 11903f012e29Smrg r = amdgpu_cs_ctx_free(context_handle[i]); 11913f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11923f012e29Smrg } 11933f012e29Smrg 11943f012e29Smrg r = amdgpu_cs_destroy_semaphore(sem); 11953f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11963f012e29Smrg} 11973f012e29Smrg 11983f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void) 11993f012e29Smrg{ 12003f012e29Smrg amdgpu_context_handle context_handle; 12013f012e29Smrg amdgpu_bo_handle ib_result_handle; 12023f012e29Smrg void *ib_result_cpu; 12033f012e29Smrg uint64_t ib_result_mc_address; 12043f012e29Smrg struct amdgpu_cs_request ibs_request; 12053f012e29Smrg struct amdgpu_cs_ib_info ib_info; 12063f012e29Smrg struct amdgpu_cs_fence fence_status; 12073f012e29Smrg uint32_t *ptr; 12083f012e29Smrg uint32_t expired; 120900a23bdaSmrg int r, instance; 12103f012e29Smrg amdgpu_bo_list_handle bo_list; 12113f012e29Smrg amdgpu_va_handle va_handle; 1212d8807b2fSmrg struct drm_amdgpu_info_hw_ip info; 1213d8807b2fSmrg 1214d8807b2fSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 1215d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 12163f012e29Smrg 12173f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 12183f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12193f012e29Smrg 1220d8807b2fSmrg for (instance = 0; (1 << instance) & info.available_rings; instance++) { 12213f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 12223f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 12233f012e29Smrg &ib_result_handle, &ib_result_cpu, 12243f012e29Smrg &ib_result_mc_address, &va_handle); 12253f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12263f012e29Smrg 12273f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 12283f012e29Smrg &bo_list); 12293f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12303f012e29Smrg 12313f012e29Smrg ptr = ib_result_cpu; 1232d8807b2fSmrg memset(ptr, 0, 16); 1233d8807b2fSmrg ptr[0]=PACKET3(PACKET3_NOP, 14); 12343f012e29Smrg 12353f012e29Smrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 12363f012e29Smrg ib_info.ib_mc_address = ib_result_mc_address; 12373f012e29Smrg ib_info.size = 16; 12383f012e29Smrg 12393f012e29Smrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 12403f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 12413f012e29Smrg ibs_request.ring = instance; 12423f012e29Smrg ibs_request.number_of_ibs = 1; 12433f012e29Smrg ibs_request.ibs = &ib_info; 12443f012e29Smrg ibs_request.resources = bo_list; 12453f012e29Smrg ibs_request.fence_info.handle = NULL; 12463f012e29Smrg 12473f012e29Smrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 12483f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 12493f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12503f012e29Smrg 12513f012e29Smrg fence_status.context = context_handle; 12523f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 12533f012e29Smrg fence_status.ip_instance = 0; 12543f012e29Smrg fence_status.ring = instance; 12553f012e29Smrg fence_status.fence = ibs_request.seq_no; 12563f012e29Smrg 12573f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 12583f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 12593f012e29Smrg 0, &expired); 12603f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12613f012e29Smrg 12623f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 12633f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12643f012e29Smrg 12653f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 12663f012e29Smrg ib_result_mc_address, 4096); 12673f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12683f012e29Smrg } 12693f012e29Smrg 12703f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 12713f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12723f012e29Smrg} 12733f012e29Smrg 12743f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void) 12753f012e29Smrg{ 12763f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 12773f012e29Smrg} 12783f012e29Smrg 12793f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void) 12803f012e29Smrg{ 12813f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 12823f012e29Smrg} 12833f012e29Smrg 12843f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void) 12853f012e29Smrg{ 12863f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 12873f012e29Smrg} 12883f012e29Smrg 12893f012e29Smrgstatic void amdgpu_command_submission_compute(void) 12903f012e29Smrg{ 12913f012e29Smrg /* write data using the CP */ 12923f012e29Smrg amdgpu_command_submission_compute_cp_write_data(); 12933f012e29Smrg /* const fill using the CP */ 12943f012e29Smrg amdgpu_command_submission_compute_cp_const_fill(); 12953f012e29Smrg /* copy data using the CP */ 12963f012e29Smrg amdgpu_command_submission_compute_cp_copy_data(); 12973f012e29Smrg /* nop test */ 12983f012e29Smrg amdgpu_command_submission_compute_nop(); 12993f012e29Smrg} 13003f012e29Smrg 13013f012e29Smrg/* 13023f012e29Smrg * caller need create/release: 13033f012e29Smrg * pm4_src, resources, ib_info, and ibs_request 13043f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished 13053f012e29Smrg */ 130641687f09Smrgvoid 130741687f09Smrgamdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle, 130841687f09Smrg amdgpu_context_handle context_handle, 130941687f09Smrg unsigned ip_type, int instance, int pm4_dw, 131041687f09Smrg uint32_t *pm4_src, int res_cnt, 131141687f09Smrg amdgpu_bo_handle *resources, 131241687f09Smrg struct amdgpu_cs_ib_info *ib_info, 131341687f09Smrg struct amdgpu_cs_request *ibs_request, 131441687f09Smrg bool secure) 13153f012e29Smrg{ 13163f012e29Smrg int r; 13173f012e29Smrg uint32_t expired; 13183f012e29Smrg uint32_t *ring_ptr; 13193f012e29Smrg amdgpu_bo_handle ib_result_handle; 13203f012e29Smrg void *ib_result_cpu; 13213f012e29Smrg uint64_t ib_result_mc_address; 13223f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 13233f012e29Smrg amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 13243f012e29Smrg amdgpu_va_handle va_handle; 13253f012e29Smrg 13263f012e29Smrg /* prepare CS */ 13273f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 13283f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 13293f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 13303f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 13313f012e29Smrg CU_ASSERT_TRUE(pm4_dw <= 1024); 13323f012e29Smrg 13333f012e29Smrg /* allocate IB */ 13343f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 13353f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 13363f012e29Smrg &ib_result_handle, &ib_result_cpu, 13373f012e29Smrg &ib_result_mc_address, &va_handle); 13383f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13393f012e29Smrg 13403f012e29Smrg /* copy PM4 packet to ring from caller */ 13413f012e29Smrg ring_ptr = ib_result_cpu; 13423f012e29Smrg memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 13433f012e29Smrg 13443f012e29Smrg ib_info->ib_mc_address = ib_result_mc_address; 13453f012e29Smrg ib_info->size = pm4_dw; 134641687f09Smrg if (secure) 134741687f09Smrg ib_info->flags |= AMDGPU_IB_FLAGS_SECURE; 13483f012e29Smrg 13493f012e29Smrg ibs_request->ip_type = ip_type; 13503f012e29Smrg ibs_request->ring = instance; 13513f012e29Smrg ibs_request->number_of_ibs = 1; 13523f012e29Smrg ibs_request->ibs = ib_info; 13533f012e29Smrg ibs_request->fence_info.handle = NULL; 13543f012e29Smrg 13553f012e29Smrg memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 13563f012e29Smrg all_res[res_cnt] = ib_result_handle; 13573f012e29Smrg 13583f012e29Smrg r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 13593f012e29Smrg NULL, &ibs_request->resources); 13603f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13613f012e29Smrg 13623f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 13633f012e29Smrg 13643f012e29Smrg /* submit CS */ 13653f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 13663f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13673f012e29Smrg 13683f012e29Smrg r = amdgpu_bo_list_destroy(ibs_request->resources); 13693f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13703f012e29Smrg 13713f012e29Smrg fence_status.ip_type = ip_type; 13723f012e29Smrg fence_status.ip_instance = 0; 13733f012e29Smrg fence_status.ring = ibs_request->ring; 13743f012e29Smrg fence_status.context = context_handle; 13753f012e29Smrg fence_status.fence = ibs_request->seq_no; 13763f012e29Smrg 13773f012e29Smrg /* wait for IB accomplished */ 13783f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 13793f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 13803f012e29Smrg 0, &expired); 13813f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13823f012e29Smrg CU_ASSERT_EQUAL(expired, true); 13833f012e29Smrg 13843f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 13853f012e29Smrg ib_result_mc_address, 4096); 13863f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13873f012e29Smrg} 13883f012e29Smrg 138941687f09Smrgstatic void 139041687f09Smrgamdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 139141687f09Smrg unsigned ip_type, int instance, int pm4_dw, 139241687f09Smrg uint32_t *pm4_src, int res_cnt, 139341687f09Smrg amdgpu_bo_handle *resources, 139441687f09Smrg struct amdgpu_cs_ib_info *ib_info, 139541687f09Smrg struct amdgpu_cs_request *ibs_request) 139641687f09Smrg{ 139741687f09Smrg amdgpu_test_exec_cs_helper_raw(device_handle, context_handle, 139841687f09Smrg ip_type, instance, pm4_dw, pm4_src, 139941687f09Smrg res_cnt, resources, ib_info, 140041687f09Smrg ibs_request, false); 140141687f09Smrg} 140241687f09Smrg 140341687f09Smrgvoid 140441687f09Smrgamdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle 140541687f09Smrg device, unsigned 140641687f09Smrg ip_type, bool secure) 14073f012e29Smrg{ 14083f012e29Smrg const int sdma_write_length = 128; 14093f012e29Smrg const int pm4_dw = 256; 14103f012e29Smrg amdgpu_context_handle context_handle; 14113f012e29Smrg amdgpu_bo_handle bo; 14123f012e29Smrg amdgpu_bo_handle *resources; 14133f012e29Smrg uint32_t *pm4; 14143f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 14153f012e29Smrg struct amdgpu_cs_request *ibs_request; 14163f012e29Smrg uint64_t bo_mc; 14173f012e29Smrg volatile uint32_t *bo_cpu; 141841687f09Smrg uint32_t bo_cpu_origin; 141900a23bdaSmrg int i, j, r, loop, ring_id; 14203f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 14213f012e29Smrg amdgpu_va_handle va_handle; 142200a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 14233f012e29Smrg 14243f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 14253f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 14263f012e29Smrg 14273f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 14283f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 14293f012e29Smrg 14303f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 14313f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 14323f012e29Smrg 143341687f09Smrg r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info); 143400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 143500a23bdaSmrg 143641687f09Smrg for (i = 0; secure && (i < 2); i++) 143741687f09Smrg gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED; 143841687f09Smrg 143941687f09Smrg r = amdgpu_cs_ctx_create(device, &context_handle); 144041687f09Smrg 14413f012e29Smrg CU_ASSERT_EQUAL(r, 0); 14423f012e29Smrg 14433f012e29Smrg /* prepare resource */ 14443f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 14453f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 14463f012e29Smrg 144700a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 144800a23bdaSmrg loop = 0; 144900a23bdaSmrg while(loop < 2) { 145000a23bdaSmrg /* allocate UC bo for sDMA use */ 145141687f09Smrg r = amdgpu_bo_alloc_and_map(device, 145200a23bdaSmrg sdma_write_length * sizeof(uint32_t), 145300a23bdaSmrg 4096, AMDGPU_GEM_DOMAIN_GTT, 145400a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 145500a23bdaSmrg &bo_mc, &va_handle); 145600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 14573f012e29Smrg 145800a23bdaSmrg /* clear bo */ 145900a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 14603f012e29Smrg 146100a23bdaSmrg resources[0] = bo; 14623f012e29Smrg 146300a23bdaSmrg /* fulfill PM4: test DMA write-linear */ 146400a23bdaSmrg i = j = 0; 146500a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 146600a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) 146700a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 146800a23bdaSmrg sdma_write_length); 146900a23bdaSmrg else 147000a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 147141687f09Smrg SDMA_WRITE_SUB_OPCODE_LINEAR, 147241687f09Smrg secure ? SDMA_ATOMIC_TMZ(1) : 0); 147341687f09Smrg pm4[i++] = 0xfffffffc & bo_mc; 147400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 147500a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 147600a23bdaSmrg pm4[i++] = sdma_write_length - 1; 147700a23bdaSmrg else if (family_id != AMDGPU_FAMILY_SI) 147800a23bdaSmrg pm4[i++] = sdma_write_length; 147900a23bdaSmrg while(j++ < sdma_write_length) 148000a23bdaSmrg pm4[i++] = 0xdeadbeaf; 148100a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 148200a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 148300a23bdaSmrg pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 148400a23bdaSmrg pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 148500a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 148600a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 148700a23bdaSmrg while(j++ < sdma_write_length) 148800a23bdaSmrg pm4[i++] = 0xdeadbeaf; 148900a23bdaSmrg } 14903f012e29Smrg 149141687f09Smrg amdgpu_test_exec_cs_helper_raw(device, context_handle, 149241687f09Smrg ip_type, ring_id, i, pm4, 149341687f09Smrg 1, resources, ib_info, 149441687f09Smrg ibs_request, secure); 14953f012e29Smrg 149600a23bdaSmrg /* verify if SDMA test result meets with expected */ 149700a23bdaSmrg i = 0; 149841687f09Smrg if (!secure) { 149941687f09Smrg while(i < sdma_write_length) { 150041687f09Smrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 150141687f09Smrg } 150241687f09Smrg } else if (ip_type == AMDGPU_HW_IP_GFX) { 150341687f09Smrg memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 150441687f09Smrg pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7); 150541687f09Smrg /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 150641687f09Smrg * command, 1-loop_until_compare_satisfied. 150741687f09Smrg * single_pass_atomic, 0-lru 150841687f09Smrg * engine_sel, 0-micro_engine 150941687f09Smrg */ 151041687f09Smrg pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 | 151141687f09Smrg ATOMIC_MEM_COMMAND(1) | 151241687f09Smrg ATOMIC_MEM_CACHEPOLICAY(0) | 151341687f09Smrg ATOMIC_MEM_ENGINESEL(0)); 151441687f09Smrg pm4[i++] = 0xfffffffc & bo_mc; 151541687f09Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 151641687f09Smrg pm4[i++] = 0x12345678; 151741687f09Smrg pm4[i++] = 0x0; 151841687f09Smrg pm4[i++] = 0xdeadbeaf; 151941687f09Smrg pm4[i++] = 0x0; 152041687f09Smrg pm4[i++] = 0x100; 152141687f09Smrg amdgpu_test_exec_cs_helper_raw(device, context_handle, 152241687f09Smrg ip_type, ring_id, i, pm4, 152341687f09Smrg 1, resources, ib_info, 152441687f09Smrg ibs_request, true); 152541687f09Smrg } else if (ip_type == AMDGPU_HW_IP_DMA) { 152641687f09Smrg /* restore the bo_cpu to compare */ 152741687f09Smrg bo_cpu_origin = bo_cpu[0]; 152841687f09Smrg memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 152941687f09Smrg /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 153041687f09Smrg * loop, 1-loop_until_compare_satisfied. 153141687f09Smrg * single_pass_atomic, 0-lru 153241687f09Smrg */ 153341687f09Smrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 153441687f09Smrg 0, 153541687f09Smrg SDMA_ATOMIC_LOOP(1) | 153641687f09Smrg SDMA_ATOMIC_TMZ(1) | 153741687f09Smrg SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 153841687f09Smrg pm4[i++] = 0xfffffffc & bo_mc; 153941687f09Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 154041687f09Smrg pm4[i++] = 0x12345678; 154141687f09Smrg pm4[i++] = 0x0; 154241687f09Smrg pm4[i++] = 0xdeadbeaf; 154341687f09Smrg pm4[i++] = 0x0; 154441687f09Smrg pm4[i++] = 0x100; 154541687f09Smrg amdgpu_test_exec_cs_helper_raw(device, context_handle, 154641687f09Smrg ip_type, ring_id, i, pm4, 154741687f09Smrg 1, resources, ib_info, 154841687f09Smrg ibs_request, true); 154941687f09Smrg /* DMA's atomic behavir is unlike GFX 155041687f09Smrg * If the comparing data is not equal to destination data, 155141687f09Smrg * For GFX, loop again till gfx timeout(system hang). 155241687f09Smrg * For DMA, loop again till timer expired and then send interrupt. 155341687f09Smrg * So testcase can't use interrupt mechanism. 155441687f09Smrg * We take another way to verify. When the comparing data is not 155541687f09Smrg * equal to destination data, overwrite the source data to the destination 155641687f09Smrg * buffer. Otherwise, original destination data unchanged. 155741687f09Smrg * So if the bo_cpu data is overwritten, the result is passed. 155841687f09Smrg */ 155941687f09Smrg CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin); 156041687f09Smrg 156141687f09Smrg /* compare again for the case of dest_data != cmp_data */ 156241687f09Smrg i = 0; 156341687f09Smrg /* restore again, here dest_data should be */ 156441687f09Smrg bo_cpu_origin = bo_cpu[0]; 156541687f09Smrg memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 156641687f09Smrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 156741687f09Smrg 0, 156841687f09Smrg SDMA_ATOMIC_LOOP(1) | 156941687f09Smrg SDMA_ATOMIC_TMZ(1) | 157041687f09Smrg SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 157141687f09Smrg pm4[i++] = 0xfffffffc & bo_mc; 157241687f09Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 157341687f09Smrg pm4[i++] = 0x87654321; 157441687f09Smrg pm4[i++] = 0x0; 157541687f09Smrg pm4[i++] = 0xdeadbeaf; 157641687f09Smrg pm4[i++] = 0x0; 157741687f09Smrg pm4[i++] = 0x100; 157841687f09Smrg amdgpu_test_exec_cs_helper_raw(device, context_handle, 157941687f09Smrg ip_type, ring_id, i, pm4, 158041687f09Smrg 1, resources, ib_info, 158141687f09Smrg ibs_request, true); 158241687f09Smrg /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/ 158341687f09Smrg CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin); 158400a23bdaSmrg } 15853f012e29Smrg 158600a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 158700a23bdaSmrg sdma_write_length * sizeof(uint32_t)); 158800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 158900a23bdaSmrg loop++; 15903f012e29Smrg } 15913f012e29Smrg } 15923f012e29Smrg /* clean resources */ 15933f012e29Smrg free(resources); 15943f012e29Smrg free(ibs_request); 15953f012e29Smrg free(ib_info); 15963f012e29Smrg free(pm4); 15973f012e29Smrg 15983f012e29Smrg /* end of test */ 15993f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 16003f012e29Smrg CU_ASSERT_EQUAL(r, 0); 16013f012e29Smrg} 16023f012e29Smrg 160341687f09Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 160441687f09Smrg{ 160541687f09Smrg amdgpu_command_submission_write_linear_helper_with_secure(device_handle, 160641687f09Smrg ip_type, 160741687f09Smrg false); 160841687f09Smrg} 160941687f09Smrg 16103f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void) 16113f012e29Smrg{ 16123f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 16133f012e29Smrg} 16143f012e29Smrg 16153f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 16163f012e29Smrg{ 16173f012e29Smrg const int sdma_write_length = 1024 * 1024; 16183f012e29Smrg const int pm4_dw = 256; 16193f012e29Smrg amdgpu_context_handle context_handle; 16203f012e29Smrg amdgpu_bo_handle bo; 16213f012e29Smrg amdgpu_bo_handle *resources; 16223f012e29Smrg uint32_t *pm4; 16233f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 16243f012e29Smrg struct amdgpu_cs_request *ibs_request; 16253f012e29Smrg uint64_t bo_mc; 16263f012e29Smrg volatile uint32_t *bo_cpu; 162700a23bdaSmrg int i, j, r, loop, ring_id; 16283f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 16293f012e29Smrg amdgpu_va_handle va_handle; 163000a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 16313f012e29Smrg 16323f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 16333f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 16343f012e29Smrg 16353f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 16363f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 16373f012e29Smrg 16383f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 16393f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 16403f012e29Smrg 164100a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 164200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 164300a23bdaSmrg 16443f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 16453f012e29Smrg CU_ASSERT_EQUAL(r, 0); 16463f012e29Smrg 16473f012e29Smrg /* prepare resource */ 16483f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 16493f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 16503f012e29Smrg 165100a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 165200a23bdaSmrg loop = 0; 165300a23bdaSmrg while(loop < 2) { 165400a23bdaSmrg /* allocate UC bo for sDMA use */ 165500a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 165600a23bdaSmrg sdma_write_length, 4096, 165700a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 165800a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 165900a23bdaSmrg &bo_mc, &va_handle); 166000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 16613f012e29Smrg 166200a23bdaSmrg /* clear bo */ 166300a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length); 16643f012e29Smrg 166500a23bdaSmrg resources[0] = bo; 16663f012e29Smrg 166700a23bdaSmrg /* fulfill PM4: test DMA const fill */ 166800a23bdaSmrg i = j = 0; 166900a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 167000a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 167100a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 167200a23bdaSmrg 0, 0, 0, 167300a23bdaSmrg sdma_write_length / 4); 167400a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 167500a23bdaSmrg pm4[i++] = 0xdeadbeaf; 167600a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 167700a23bdaSmrg } else { 167800a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 167900a23bdaSmrg SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 168000a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 168100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 168200a23bdaSmrg pm4[i++] = 0xdeadbeaf; 168300a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 168400a23bdaSmrg pm4[i++] = sdma_write_length - 1; 168500a23bdaSmrg else 168600a23bdaSmrg pm4[i++] = sdma_write_length; 168700a23bdaSmrg } 168800a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 168900a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 169000a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 169100a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 169200a23bdaSmrg pm4[i++] = 0xdeadbeaf; 169300a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 169400a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 169500a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(2) | 169600a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC; 169700a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 169800a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1699d8807b2fSmrg pm4[i++] = sdma_write_length; 170000a23bdaSmrg } else { 170100a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 170200a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 170300a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 170400a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(2) | 170500a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 170600a23bdaSmrg pm4[i++] = 0xdeadbeaf; 170700a23bdaSmrg pm4[i++] = 0; 170800a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 170900a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 171000a23bdaSmrg pm4[i++] = sdma_write_length; 171100a23bdaSmrg } 1712d8807b2fSmrg } 17133f012e29Smrg 171400a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 171500a23bdaSmrg ip_type, ring_id, 171600a23bdaSmrg i, pm4, 171700a23bdaSmrg 1, resources, 171800a23bdaSmrg ib_info, ibs_request); 17193f012e29Smrg 172000a23bdaSmrg /* verify if SDMA test result meets with expected */ 172100a23bdaSmrg i = 0; 172200a23bdaSmrg while(i < (sdma_write_length / 4)) { 172300a23bdaSmrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 172400a23bdaSmrg } 17253f012e29Smrg 172600a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 172700a23bdaSmrg sdma_write_length); 172800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 172900a23bdaSmrg loop++; 173000a23bdaSmrg } 17313f012e29Smrg } 17323f012e29Smrg /* clean resources */ 17333f012e29Smrg free(resources); 17343f012e29Smrg free(ibs_request); 17353f012e29Smrg free(ib_info); 17363f012e29Smrg free(pm4); 17373f012e29Smrg 17383f012e29Smrg /* end of test */ 17393f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 17403f012e29Smrg CU_ASSERT_EQUAL(r, 0); 17413f012e29Smrg} 17423f012e29Smrg 17433f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void) 17443f012e29Smrg{ 17453f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 17463f012e29Smrg} 17473f012e29Smrg 17483f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 17493f012e29Smrg{ 17503f012e29Smrg const int sdma_write_length = 1024; 17513f012e29Smrg const int pm4_dw = 256; 17523f012e29Smrg amdgpu_context_handle context_handle; 17533f012e29Smrg amdgpu_bo_handle bo1, bo2; 17543f012e29Smrg amdgpu_bo_handle *resources; 17553f012e29Smrg uint32_t *pm4; 17563f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 17573f012e29Smrg struct amdgpu_cs_request *ibs_request; 17583f012e29Smrg uint64_t bo1_mc, bo2_mc; 17593f012e29Smrg volatile unsigned char *bo1_cpu, *bo2_cpu; 176000a23bdaSmrg int i, j, r, loop1, loop2, ring_id; 17613f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 17623f012e29Smrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 176300a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 17643f012e29Smrg 17653f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 17663f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 17673f012e29Smrg 17683f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 17693f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 17703f012e29Smrg 17713f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 17723f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 17733f012e29Smrg 177400a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 177500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 177600a23bdaSmrg 17773f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 17783f012e29Smrg CU_ASSERT_EQUAL(r, 0); 17793f012e29Smrg 17803f012e29Smrg /* prepare resource */ 17813f012e29Smrg resources = calloc(2, sizeof(amdgpu_bo_handle)); 17823f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 17833f012e29Smrg 178400a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 178500a23bdaSmrg loop1 = loop2 = 0; 178600a23bdaSmrg /* run 9 circle to test all mapping combination */ 178700a23bdaSmrg while(loop1 < 2) { 178800a23bdaSmrg while(loop2 < 2) { 178900a23bdaSmrg /* allocate UC bo1for sDMA use */ 179000a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 179100a23bdaSmrg sdma_write_length, 4096, 179200a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 179300a23bdaSmrg gtt_flags[loop1], &bo1, 179400a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 179500a23bdaSmrg &bo1_va_handle); 179600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 179700a23bdaSmrg 179800a23bdaSmrg /* set bo1 */ 179900a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 180000a23bdaSmrg 180100a23bdaSmrg /* allocate UC bo2 for sDMA use */ 180200a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 180300a23bdaSmrg sdma_write_length, 4096, 180400a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 180500a23bdaSmrg gtt_flags[loop2], &bo2, 180600a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 180700a23bdaSmrg &bo2_va_handle); 180800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 180900a23bdaSmrg 181000a23bdaSmrg /* clear bo2 */ 181100a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 181200a23bdaSmrg 181300a23bdaSmrg resources[0] = bo1; 181400a23bdaSmrg resources[1] = bo2; 181500a23bdaSmrg 181600a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 181700a23bdaSmrg i = j = 0; 181800a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 181900a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 182000a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 182100a23bdaSmrg 0, 0, 0, 182200a23bdaSmrg sdma_write_length); 182300a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 182400a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 182500a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 182600a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 182700a23bdaSmrg } else { 182800a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 182900a23bdaSmrg SDMA_COPY_SUB_OPCODE_LINEAR, 183000a23bdaSmrg 0); 183100a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 183200a23bdaSmrg pm4[i++] = sdma_write_length - 1; 183300a23bdaSmrg else 183400a23bdaSmrg pm4[i++] = sdma_write_length; 183500a23bdaSmrg pm4[i++] = 0; 183600a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 183700a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 183800a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 183900a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 184000a23bdaSmrg } 184100a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 184200a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 184300a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 184400a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 184500a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 184600a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 184700a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 184800a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(0) | 184900a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC | 185000a23bdaSmrg (0xffff00000000 & bo1_mc) >> 32; 185100a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 185200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1853d8807b2fSmrg pm4[i++] = sdma_write_length; 185400a23bdaSmrg } else { 185500a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 185600a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 185700a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 185800a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(0) | 185900a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 186000a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 186100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 186200a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 186300a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 186400a23bdaSmrg pm4[i++] = sdma_write_length; 186500a23bdaSmrg } 1866d8807b2fSmrg } 18673f012e29Smrg 186800a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 186900a23bdaSmrg ip_type, ring_id, 187000a23bdaSmrg i, pm4, 187100a23bdaSmrg 2, resources, 187200a23bdaSmrg ib_info, ibs_request); 18733f012e29Smrg 187400a23bdaSmrg /* verify if SDMA test result meets with expected */ 187500a23bdaSmrg i = 0; 187600a23bdaSmrg while(i < sdma_write_length) { 187700a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 187800a23bdaSmrg } 187900a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 188000a23bdaSmrg sdma_write_length); 188100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 188200a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 188300a23bdaSmrg sdma_write_length); 188400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 188500a23bdaSmrg loop2++; 18863f012e29Smrg } 188700a23bdaSmrg loop1++; 18883f012e29Smrg } 18893f012e29Smrg } 18903f012e29Smrg /* clean resources */ 18913f012e29Smrg free(resources); 18923f012e29Smrg free(ibs_request); 18933f012e29Smrg free(ib_info); 18943f012e29Smrg free(pm4); 18953f012e29Smrg 18963f012e29Smrg /* end of test */ 18973f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 18983f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18993f012e29Smrg} 19003f012e29Smrg 19013f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void) 19023f012e29Smrg{ 19033f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 19043f012e29Smrg} 19053f012e29Smrg 19063f012e29Smrgstatic void amdgpu_command_submission_sdma(void) 19073f012e29Smrg{ 19083f012e29Smrg amdgpu_command_submission_sdma_write_linear(); 19093f012e29Smrg amdgpu_command_submission_sdma_const_fill(); 19103f012e29Smrg amdgpu_command_submission_sdma_copy_linear(); 19113f012e29Smrg} 19123f012e29Smrg 1913d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1914d8807b2fSmrg{ 1915d8807b2fSmrg amdgpu_context_handle context_handle; 1916d8807b2fSmrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1917d8807b2fSmrg void *ib_result_cpu, *ib_result_ce_cpu; 1918d8807b2fSmrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1919d8807b2fSmrg struct amdgpu_cs_request ibs_request[2] = {0}; 1920d8807b2fSmrg struct amdgpu_cs_ib_info ib_info[2]; 1921d8807b2fSmrg struct amdgpu_cs_fence fence_status[2] = {0}; 1922d8807b2fSmrg uint32_t *ptr; 1923d8807b2fSmrg uint32_t expired; 1924d8807b2fSmrg amdgpu_bo_list_handle bo_list; 1925d8807b2fSmrg amdgpu_va_handle va_handle, va_handle_ce; 1926d8807b2fSmrg int r; 1927d8807b2fSmrg int i = 0, ib_cs_num = 2; 1928d8807b2fSmrg 1929d8807b2fSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1930d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1931d8807b2fSmrg 1932d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1933d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1934d8807b2fSmrg &ib_result_handle, &ib_result_cpu, 1935d8807b2fSmrg &ib_result_mc_address, &va_handle); 1936d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1937d8807b2fSmrg 1938d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1939d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1940d8807b2fSmrg &ib_result_ce_handle, &ib_result_ce_cpu, 1941d8807b2fSmrg &ib_result_ce_mc_address, &va_handle_ce); 1942d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1943d8807b2fSmrg 1944d8807b2fSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1945d8807b2fSmrg ib_result_ce_handle, &bo_list); 1946d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1947d8807b2fSmrg 1948d8807b2fSmrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1949d8807b2fSmrg 1950d8807b2fSmrg /* IT_SET_CE_DE_COUNTERS */ 1951d8807b2fSmrg ptr = ib_result_ce_cpu; 1952d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 1953d8807b2fSmrg ptr[i++] = 0xc0008900; 1954d8807b2fSmrg ptr[i++] = 0; 1955d8807b2fSmrg } 1956d8807b2fSmrg ptr[i++] = 0xc0008400; 1957d8807b2fSmrg ptr[i++] = 1; 1958d8807b2fSmrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1959d8807b2fSmrg ib_info[0].size = i; 1960d8807b2fSmrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1961d8807b2fSmrg 1962d8807b2fSmrg /* IT_WAIT_ON_CE_COUNTER */ 1963d8807b2fSmrg ptr = ib_result_cpu; 1964d8807b2fSmrg ptr[0] = 0xc0008600; 1965d8807b2fSmrg ptr[1] = 0x00000001; 1966d8807b2fSmrg ib_info[1].ib_mc_address = ib_result_mc_address; 1967d8807b2fSmrg ib_info[1].size = 2; 1968d8807b2fSmrg 1969d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 1970d8807b2fSmrg ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1971d8807b2fSmrg ibs_request[i].number_of_ibs = 2; 1972d8807b2fSmrg ibs_request[i].ibs = ib_info; 1973d8807b2fSmrg ibs_request[i].resources = bo_list; 1974d8807b2fSmrg ibs_request[i].fence_info.handle = NULL; 1975d8807b2fSmrg } 1976d8807b2fSmrg 1977d8807b2fSmrg r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1978d8807b2fSmrg 1979d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1980d8807b2fSmrg 1981d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 1982d8807b2fSmrg fence_status[i].context = context_handle; 1983d8807b2fSmrg fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1984d8807b2fSmrg fence_status[i].fence = ibs_request[i].seq_no; 1985d8807b2fSmrg } 1986d8807b2fSmrg 1987d8807b2fSmrg r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1988d8807b2fSmrg AMDGPU_TIMEOUT_INFINITE, 1989d8807b2fSmrg &expired, NULL); 1990d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1991d8807b2fSmrg 1992d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1993d8807b2fSmrg ib_result_mc_address, 4096); 1994d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1995d8807b2fSmrg 1996d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 1997d8807b2fSmrg ib_result_ce_mc_address, 4096); 1998d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1999d8807b2fSmrg 2000d8807b2fSmrg r = amdgpu_bo_list_destroy(bo_list); 2001d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2002d8807b2fSmrg 2003d8807b2fSmrg r = amdgpu_cs_ctx_free(context_handle); 2004d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 2005d8807b2fSmrg} 2006d8807b2fSmrg 2007d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void) 2008d8807b2fSmrg{ 2009d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(true); 2010d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(false); 2011d8807b2fSmrg} 2012d8807b2fSmrg 20133f012e29Smrgstatic void amdgpu_userptr_test(void) 20143f012e29Smrg{ 20153f012e29Smrg int i, r, j; 20163f012e29Smrg uint32_t *pm4 = NULL; 20173f012e29Smrg uint64_t bo_mc; 20183f012e29Smrg void *ptr = NULL; 20193f012e29Smrg int pm4_dw = 256; 20203f012e29Smrg int sdma_write_length = 4; 20213f012e29Smrg amdgpu_bo_handle handle; 20223f012e29Smrg amdgpu_context_handle context_handle; 20233f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 20243f012e29Smrg struct amdgpu_cs_request *ibs_request; 20253f012e29Smrg amdgpu_bo_handle buf_handle; 20263f012e29Smrg amdgpu_va_handle va_handle; 20273f012e29Smrg 20283f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 20293f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 20303f012e29Smrg 20313f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 20323f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 20333f012e29Smrg 20343f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 20353f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 20363f012e29Smrg 20373f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 20383f012e29Smrg CU_ASSERT_EQUAL(r, 0); 20393f012e29Smrg 20403f012e29Smrg posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 20413f012e29Smrg CU_ASSERT_NOT_EQUAL(ptr, NULL); 20423f012e29Smrg memset(ptr, 0, BUFFER_SIZE); 20433f012e29Smrg 20443f012e29Smrg r = amdgpu_create_bo_from_user_mem(device_handle, 20453f012e29Smrg ptr, BUFFER_SIZE, &buf_handle); 20463f012e29Smrg CU_ASSERT_EQUAL(r, 0); 20473f012e29Smrg 20483f012e29Smrg r = amdgpu_va_range_alloc(device_handle, 20493f012e29Smrg amdgpu_gpu_va_range_general, 20503f012e29Smrg BUFFER_SIZE, 1, 0, &bo_mc, 20513f012e29Smrg &va_handle, 0); 20523f012e29Smrg CU_ASSERT_EQUAL(r, 0); 20533f012e29Smrg 20543f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 20553f012e29Smrg CU_ASSERT_EQUAL(r, 0); 20563f012e29Smrg 20573f012e29Smrg handle = buf_handle; 20583f012e29Smrg 20593f012e29Smrg j = i = 0; 2060d8807b2fSmrg 2061d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) 2062d8807b2fSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 2063d8807b2fSmrg sdma_write_length); 2064d8807b2fSmrg else 2065d8807b2fSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 2066d8807b2fSmrg SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 20673f012e29Smrg pm4[i++] = 0xffffffff & bo_mc; 20683f012e29Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 2069d8807b2fSmrg if (family_id >= AMDGPU_FAMILY_AI) 2070d8807b2fSmrg pm4[i++] = sdma_write_length - 1; 2071d8807b2fSmrg else if (family_id != AMDGPU_FAMILY_SI) 2072d8807b2fSmrg pm4[i++] = sdma_write_length; 20733f012e29Smrg 20743f012e29Smrg while (j++ < sdma_write_length) 20753f012e29Smrg pm4[i++] = 0xdeadbeaf; 20763f012e29Smrg 207700a23bdaSmrg if (!fork()) { 207800a23bdaSmrg pm4[0] = 0x0; 207900a23bdaSmrg exit(0); 208000a23bdaSmrg } 208100a23bdaSmrg 20823f012e29Smrg amdgpu_test_exec_cs_helper(context_handle, 20833f012e29Smrg AMDGPU_HW_IP_DMA, 0, 20843f012e29Smrg i, pm4, 20853f012e29Smrg 1, &handle, 20863f012e29Smrg ib_info, ibs_request); 20873f012e29Smrg i = 0; 20883f012e29Smrg while (i < sdma_write_length) { 20893f012e29Smrg CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 20903f012e29Smrg } 20913f012e29Smrg free(ibs_request); 20923f012e29Smrg free(ib_info); 20933f012e29Smrg free(pm4); 20943f012e29Smrg 20953f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 20963f012e29Smrg CU_ASSERT_EQUAL(r, 0); 20973f012e29Smrg r = amdgpu_va_range_free(va_handle); 20983f012e29Smrg CU_ASSERT_EQUAL(r, 0); 20993f012e29Smrg r = amdgpu_bo_free(buf_handle); 21003f012e29Smrg CU_ASSERT_EQUAL(r, 0); 21013f012e29Smrg free(ptr); 21023f012e29Smrg 21033f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 21043f012e29Smrg CU_ASSERT_EQUAL(r, 0); 210500a23bdaSmrg 210600a23bdaSmrg wait(NULL); 210700a23bdaSmrg} 210800a23bdaSmrg 210900a23bdaSmrgstatic void amdgpu_sync_dependency_test(void) 211000a23bdaSmrg{ 211100a23bdaSmrg amdgpu_context_handle context_handle[2]; 211200a23bdaSmrg amdgpu_bo_handle ib_result_handle; 211300a23bdaSmrg void *ib_result_cpu; 211400a23bdaSmrg uint64_t ib_result_mc_address; 211500a23bdaSmrg struct amdgpu_cs_request ibs_request; 211600a23bdaSmrg struct amdgpu_cs_ib_info ib_info; 211700a23bdaSmrg struct amdgpu_cs_fence fence_status; 211800a23bdaSmrg uint32_t expired; 211900a23bdaSmrg int i, j, r; 212000a23bdaSmrg amdgpu_bo_list_handle bo_list; 212100a23bdaSmrg amdgpu_va_handle va_handle; 212200a23bdaSmrg static uint32_t *ptr; 212300a23bdaSmrg uint64_t seq_no; 212400a23bdaSmrg 212500a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 212600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 212700a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 212800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 212900a23bdaSmrg 213000a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 213100a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 213200a23bdaSmrg &ib_result_handle, &ib_result_cpu, 213300a23bdaSmrg &ib_result_mc_address, &va_handle); 213400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 213500a23bdaSmrg 213600a23bdaSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 213700a23bdaSmrg &bo_list); 213800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 213900a23bdaSmrg 214000a23bdaSmrg ptr = ib_result_cpu; 214100a23bdaSmrg i = 0; 214200a23bdaSmrg 214300a23bdaSmrg memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 214400a23bdaSmrg 214500a23bdaSmrg /* Dispatch minimal init config and verify it's executed */ 214600a23bdaSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 214700a23bdaSmrg ptr[i++] = 0x80000000; 214800a23bdaSmrg ptr[i++] = 0x80000000; 214900a23bdaSmrg 215000a23bdaSmrg ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 215100a23bdaSmrg ptr[i++] = 0x80000000; 215200a23bdaSmrg 215300a23bdaSmrg 215400a23bdaSmrg /* Program compute regs */ 215500a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 215600a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 215700a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 215800a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 215900a23bdaSmrg 216000a23bdaSmrg 216100a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 216200a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 216300a23bdaSmrg /* 216400a23bdaSmrg * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 216500a23bdaSmrg SGPRS = 1 216600a23bdaSmrg PRIORITY = 0 216700a23bdaSmrg FLOAT_MODE = 192 (0xc0) 216800a23bdaSmrg PRIV = 0 216900a23bdaSmrg DX10_CLAMP = 1 217000a23bdaSmrg DEBUG_MODE = 0 217100a23bdaSmrg IEEE_MODE = 0 217200a23bdaSmrg BULKY = 0 217300a23bdaSmrg CDBG_USER = 0 217400a23bdaSmrg * 217500a23bdaSmrg */ 217600a23bdaSmrg ptr[i++] = 0x002c0040; 217700a23bdaSmrg 217800a23bdaSmrg 217900a23bdaSmrg /* 218000a23bdaSmrg * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 218100a23bdaSmrg USER_SGPR = 8 218200a23bdaSmrg TRAP_PRESENT = 0 218300a23bdaSmrg TGID_X_EN = 0 218400a23bdaSmrg TGID_Y_EN = 0 218500a23bdaSmrg TGID_Z_EN = 0 218600a23bdaSmrg TG_SIZE_EN = 0 218700a23bdaSmrg TIDIG_COMP_CNT = 0 218800a23bdaSmrg EXCP_EN_MSB = 0 218900a23bdaSmrg LDS_SIZE = 0 219000a23bdaSmrg EXCP_EN = 0 219100a23bdaSmrg * 219200a23bdaSmrg */ 219300a23bdaSmrg ptr[i++] = 0x00000010; 219400a23bdaSmrg 219500a23bdaSmrg 219600a23bdaSmrg/* 219700a23bdaSmrg * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 219800a23bdaSmrg WAVESIZE = 0 219900a23bdaSmrg * 220000a23bdaSmrg */ 220100a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 220200a23bdaSmrg ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 220300a23bdaSmrg ptr[i++] = 0x00000100; 220400a23bdaSmrg 220500a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 220600a23bdaSmrg ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 220700a23bdaSmrg ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 220800a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 220900a23bdaSmrg 221000a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 221100a23bdaSmrg ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 221200a23bdaSmrg ptr[i++] = 0; 221300a23bdaSmrg 221400a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 221500a23bdaSmrg ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 221600a23bdaSmrg ptr[i++] = 1; 221700a23bdaSmrg ptr[i++] = 1; 221800a23bdaSmrg ptr[i++] = 1; 221900a23bdaSmrg 222000a23bdaSmrg 222100a23bdaSmrg /* Dispatch */ 222200a23bdaSmrg ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 222300a23bdaSmrg ptr[i++] = 1; 222400a23bdaSmrg ptr[i++] = 1; 222500a23bdaSmrg ptr[i++] = 1; 222600a23bdaSmrg ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 222700a23bdaSmrg 222800a23bdaSmrg 222900a23bdaSmrg while (i & 7) 223000a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 223100a23bdaSmrg 223200a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 223300a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address; 223400a23bdaSmrg ib_info.size = i; 223500a23bdaSmrg 223600a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 223700a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 223800a23bdaSmrg ibs_request.ring = 0; 223900a23bdaSmrg ibs_request.number_of_ibs = 1; 224000a23bdaSmrg ibs_request.ibs = &ib_info; 224100a23bdaSmrg ibs_request.resources = bo_list; 224200a23bdaSmrg ibs_request.fence_info.handle = NULL; 224300a23bdaSmrg 224400a23bdaSmrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 224500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 224600a23bdaSmrg seq_no = ibs_request.seq_no; 224700a23bdaSmrg 224800a23bdaSmrg 224900a23bdaSmrg 225000a23bdaSmrg /* Prepare second command with dependency on the first */ 225100a23bdaSmrg j = i; 225200a23bdaSmrg ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 225300a23bdaSmrg ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 225400a23bdaSmrg ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 225500a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 225600a23bdaSmrg ptr[i++] = 99; 225700a23bdaSmrg 225800a23bdaSmrg while (i & 7) 225900a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 226000a23bdaSmrg 226100a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 226200a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address + j * 4; 226300a23bdaSmrg ib_info.size = i - j; 226400a23bdaSmrg 226500a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 226600a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 226700a23bdaSmrg ibs_request.ring = 0; 226800a23bdaSmrg ibs_request.number_of_ibs = 1; 226900a23bdaSmrg ibs_request.ibs = &ib_info; 227000a23bdaSmrg ibs_request.resources = bo_list; 227100a23bdaSmrg ibs_request.fence_info.handle = NULL; 227200a23bdaSmrg 227300a23bdaSmrg ibs_request.number_of_dependencies = 1; 227400a23bdaSmrg 227500a23bdaSmrg ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 227600a23bdaSmrg ibs_request.dependencies[0].context = context_handle[1]; 227700a23bdaSmrg ibs_request.dependencies[0].ip_instance = 0; 227800a23bdaSmrg ibs_request.dependencies[0].ring = 0; 227900a23bdaSmrg ibs_request.dependencies[0].fence = seq_no; 228000a23bdaSmrg 228100a23bdaSmrg 228200a23bdaSmrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 228300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 228400a23bdaSmrg 228500a23bdaSmrg 228600a23bdaSmrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 228700a23bdaSmrg fence_status.context = context_handle[0]; 228800a23bdaSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 228900a23bdaSmrg fence_status.ip_instance = 0; 229000a23bdaSmrg fence_status.ring = 0; 229100a23bdaSmrg fence_status.fence = ibs_request.seq_no; 229200a23bdaSmrg 229300a23bdaSmrg r = amdgpu_cs_query_fence_status(&fence_status, 229400a23bdaSmrg AMDGPU_TIMEOUT_INFINITE,0, &expired); 229500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 229600a23bdaSmrg 229700a23bdaSmrg /* Expect the second command to wait for shader to complete */ 229800a23bdaSmrg CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 229900a23bdaSmrg 230000a23bdaSmrg r = amdgpu_bo_list_destroy(bo_list); 230100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 230200a23bdaSmrg 230300a23bdaSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 230400a23bdaSmrg ib_result_mc_address, 4096); 230500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 230600a23bdaSmrg 230700a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[0]); 230800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 230900a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[1]); 231000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 231100a23bdaSmrg 231200a23bdaSmrg free(ibs_request.dependencies); 23133f012e29Smrg} 23145324fb0dSmrg 23159bd392adSmrgstatic int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family) 23169bd392adSmrg{ 23179bd392adSmrg struct amdgpu_test_shader *shader; 23189bd392adSmrg int i, loop = 0x10000; 23199bd392adSmrg 23209bd392adSmrg switch (family) { 23219bd392adSmrg case AMDGPU_FAMILY_AI: 23229bd392adSmrg shader = &memcpy_cs_hang_slow_ai; 23239bd392adSmrg break; 23249bd392adSmrg case AMDGPU_FAMILY_RV: 23259bd392adSmrg shader = &memcpy_cs_hang_slow_rv; 23269bd392adSmrg break; 23279bd392adSmrg default: 23289bd392adSmrg return -1; 23299bd392adSmrg break; 23309bd392adSmrg } 23319bd392adSmrg 23329bd392adSmrg memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 23339bd392adSmrg 23349bd392adSmrg for (i = 0; i < loop; i++) 23359bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * i, 23369bd392adSmrg shader->shader + shader->header_length, 23379bd392adSmrg shader->body_length * sizeof(uint32_t)); 23389bd392adSmrg 23399bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * loop, 23409bd392adSmrg shader->shader + shader->header_length + shader->body_length, 23419bd392adSmrg shader->foot_length * sizeof(uint32_t)); 23429bd392adSmrg 23439bd392adSmrg return 0; 23449bd392adSmrg} 23459bd392adSmrg 23465324fb0dSmrgstatic int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, 23475324fb0dSmrg int cs_type) 23485324fb0dSmrg{ 23495324fb0dSmrg uint32_t shader_size; 23505324fb0dSmrg const uint32_t *shader; 23515324fb0dSmrg 23525324fb0dSmrg switch (cs_type) { 23535324fb0dSmrg case CS_BUFFERCLEAR: 23545324fb0dSmrg shader = bufferclear_cs_shader_gfx9; 23555324fb0dSmrg shader_size = sizeof(bufferclear_cs_shader_gfx9); 23565324fb0dSmrg break; 23575324fb0dSmrg case CS_BUFFERCOPY: 23585324fb0dSmrg shader = buffercopy_cs_shader_gfx9; 23595324fb0dSmrg shader_size = sizeof(buffercopy_cs_shader_gfx9); 23605324fb0dSmrg break; 23619bd392adSmrg case CS_HANG: 23629bd392adSmrg shader = memcpy_ps_hang; 23639bd392adSmrg shader_size = sizeof(memcpy_ps_hang); 23649bd392adSmrg break; 23655324fb0dSmrg default: 23665324fb0dSmrg return -1; 23675324fb0dSmrg break; 23685324fb0dSmrg } 23695324fb0dSmrg 23705324fb0dSmrg memcpy(ptr, shader, shader_size); 23715324fb0dSmrg return 0; 23725324fb0dSmrg} 23735324fb0dSmrg 23745324fb0dSmrgstatic int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type) 23755324fb0dSmrg{ 23765324fb0dSmrg int i = 0; 23775324fb0dSmrg 23785324fb0dSmrg /* Write context control and load shadowing register if necessary */ 23795324fb0dSmrg if (ip_type == AMDGPU_HW_IP_GFX) { 23805324fb0dSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 23815324fb0dSmrg ptr[i++] = 0x80000000; 23825324fb0dSmrg ptr[i++] = 0x80000000; 23835324fb0dSmrg } 23845324fb0dSmrg 23855324fb0dSmrg /* Issue commands to set default compute state. */ 23865324fb0dSmrg /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ 23875324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3); 23885324fb0dSmrg ptr[i++] = 0x204; 23895324fb0dSmrg i += 3; 239088f8a8d2Smrg 23915324fb0dSmrg /* clear mmCOMPUTE_TMPRING_SIZE */ 23925324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 23935324fb0dSmrg ptr[i++] = 0x218; 23945324fb0dSmrg ptr[i++] = 0; 23955324fb0dSmrg 23965324fb0dSmrg return i; 23975324fb0dSmrg} 23985324fb0dSmrg 23995324fb0dSmrgstatic int amdgpu_dispatch_write_cumask(uint32_t *ptr) 24005324fb0dSmrg{ 24015324fb0dSmrg int i = 0; 24025324fb0dSmrg 24035324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 24045324fb0dSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 24055324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 24065324fb0dSmrg ptr[i++] = 0x216; 24075324fb0dSmrg ptr[i++] = 0xffffffff; 24085324fb0dSmrg ptr[i++] = 0xffffffff; 24095324fb0dSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 24105324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 24115324fb0dSmrg ptr[i++] = 0x219; 24125324fb0dSmrg ptr[i++] = 0xffffffff; 24135324fb0dSmrg ptr[i++] = 0xffffffff; 24145324fb0dSmrg 24155324fb0dSmrg return i; 24165324fb0dSmrg} 24175324fb0dSmrg 24185324fb0dSmrgstatic int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr) 24195324fb0dSmrg{ 24205324fb0dSmrg int i, j; 24215324fb0dSmrg 24225324fb0dSmrg i = 0; 24235324fb0dSmrg 24245324fb0dSmrg /* Writes shader state to HW */ 24255324fb0dSmrg /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ 24265324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 24275324fb0dSmrg ptr[i++] = 0x20c; 24285324fb0dSmrg ptr[i++] = (shader_addr >> 8); 24295324fb0dSmrg ptr[i++] = (shader_addr >> 40); 24305324fb0dSmrg /* write sh regs*/ 24315324fb0dSmrg for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) { 24325324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 24335324fb0dSmrg /* - Gfx9ShRegBase */ 24345324fb0dSmrg ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00; 24355324fb0dSmrg ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; 24365324fb0dSmrg } 24375324fb0dSmrg 24385324fb0dSmrg return i; 24395324fb0dSmrg} 24405324fb0dSmrg 24415324fb0dSmrgstatic void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, 24425324fb0dSmrg uint32_t ip_type, 24435324fb0dSmrg uint32_t ring) 24445324fb0dSmrg{ 24455324fb0dSmrg amdgpu_context_handle context_handle; 24465324fb0dSmrg amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; 24475324fb0dSmrg volatile unsigned char *ptr_dst; 24485324fb0dSmrg void *ptr_shader; 24495324fb0dSmrg uint32_t *ptr_cmd; 24505324fb0dSmrg uint64_t mc_address_dst, mc_address_shader, mc_address_cmd; 24515324fb0dSmrg amdgpu_va_handle va_dst, va_shader, va_cmd; 24525324fb0dSmrg int i, r; 24535324fb0dSmrg int bo_dst_size = 16384; 24545324fb0dSmrg int bo_shader_size = 4096; 24555324fb0dSmrg int bo_cmd_size = 4096; 24565324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 24575324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 24585324fb0dSmrg amdgpu_bo_list_handle bo_list; 24595324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 24605324fb0dSmrg uint32_t expired; 24615324fb0dSmrg 24625324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 24635324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24645324fb0dSmrg 24655324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 24665324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 24675324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 24685324fb0dSmrg &mc_address_cmd, &va_cmd); 24695324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24705324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 24715324fb0dSmrg 24725324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 24735324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 24745324fb0dSmrg &bo_shader, &ptr_shader, 24755324fb0dSmrg &mc_address_shader, &va_shader); 24765324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 247788f8a8d2Smrg memset(ptr_shader, 0, bo_shader_size); 24785324fb0dSmrg 24795324fb0dSmrg r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR); 24805324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24815324fb0dSmrg 24825324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 24835324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 24845324fb0dSmrg &bo_dst, (void **)&ptr_dst, 24855324fb0dSmrg &mc_address_dst, &va_dst); 24865324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24875324fb0dSmrg 24885324fb0dSmrg i = 0; 24895324fb0dSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 24905324fb0dSmrg 24915324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 24925324fb0dSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 24935324fb0dSmrg 24945324fb0dSmrg /* Writes shader state to HW */ 24955324fb0dSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 24965324fb0dSmrg 24975324fb0dSmrg /* Write constant data */ 24985324fb0dSmrg /* Writes the UAV constant data to the SGPRs. */ 24995324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 25005324fb0dSmrg ptr_cmd[i++] = 0x240; 25015324fb0dSmrg ptr_cmd[i++] = mc_address_dst; 25025324fb0dSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 25035324fb0dSmrg ptr_cmd[i++] = 0x400; 25045324fb0dSmrg ptr_cmd[i++] = 0x74fac; 25055324fb0dSmrg 25065324fb0dSmrg /* Sets a range of pixel shader constants */ 25075324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 25085324fb0dSmrg ptr_cmd[i++] = 0x244; 25095324fb0dSmrg ptr_cmd[i++] = 0x22222222; 25105324fb0dSmrg ptr_cmd[i++] = 0x22222222; 25115324fb0dSmrg ptr_cmd[i++] = 0x22222222; 25125324fb0dSmrg ptr_cmd[i++] = 0x22222222; 25135324fb0dSmrg 251488f8a8d2Smrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 251588f8a8d2Smrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 251688f8a8d2Smrg ptr_cmd[i++] = 0x215; 251788f8a8d2Smrg ptr_cmd[i++] = 0; 251888f8a8d2Smrg 25195324fb0dSmrg /* dispatch direct command */ 25205324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 25215324fb0dSmrg ptr_cmd[i++] = 0x10; 25225324fb0dSmrg ptr_cmd[i++] = 1; 25235324fb0dSmrg ptr_cmd[i++] = 1; 25245324fb0dSmrg ptr_cmd[i++] = 1; 25255324fb0dSmrg 25265324fb0dSmrg while (i & 7) 25275324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 25285324fb0dSmrg 25295324fb0dSmrg resources[0] = bo_dst; 25305324fb0dSmrg resources[1] = bo_shader; 25315324fb0dSmrg resources[2] = bo_cmd; 25325324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 25335324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25345324fb0dSmrg 25355324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 25365324fb0dSmrg ib_info.size = i; 25375324fb0dSmrg ibs_request.ip_type = ip_type; 25385324fb0dSmrg ibs_request.ring = ring; 25395324fb0dSmrg ibs_request.resources = bo_list; 25405324fb0dSmrg ibs_request.number_of_ibs = 1; 25415324fb0dSmrg ibs_request.ibs = &ib_info; 25425324fb0dSmrg ibs_request.fence_info.handle = NULL; 25435324fb0dSmrg 25445324fb0dSmrg /* submit CS */ 25455324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 25465324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25475324fb0dSmrg 25485324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 25495324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25505324fb0dSmrg 25515324fb0dSmrg fence_status.ip_type = ip_type; 25525324fb0dSmrg fence_status.ip_instance = 0; 25535324fb0dSmrg fence_status.ring = ring; 25545324fb0dSmrg fence_status.context = context_handle; 25555324fb0dSmrg fence_status.fence = ibs_request.seq_no; 25565324fb0dSmrg 25575324fb0dSmrg /* wait for IB accomplished */ 25585324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 25595324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 25605324fb0dSmrg 0, &expired); 25615324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25625324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 25635324fb0dSmrg 25645324fb0dSmrg /* verify if memset test result meets with expected */ 25655324fb0dSmrg i = 0; 25665324fb0dSmrg while(i < bo_dst_size) { 25675324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i++], 0x22); 25685324fb0dSmrg } 25695324fb0dSmrg 25705324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 25715324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25725324fb0dSmrg 25735324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 25745324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25755324fb0dSmrg 25765324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 25775324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25785324fb0dSmrg 25795324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 25805324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 25815324fb0dSmrg} 25825324fb0dSmrg 25835324fb0dSmrgstatic void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, 25845324fb0dSmrg uint32_t ip_type, 25859bd392adSmrg uint32_t ring, 25869bd392adSmrg int hang) 25875324fb0dSmrg{ 25885324fb0dSmrg amdgpu_context_handle context_handle; 25895324fb0dSmrg amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 25905324fb0dSmrg volatile unsigned char *ptr_dst; 25915324fb0dSmrg void *ptr_shader; 25925324fb0dSmrg unsigned char *ptr_src; 25935324fb0dSmrg uint32_t *ptr_cmd; 25945324fb0dSmrg uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 25955324fb0dSmrg amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 25965324fb0dSmrg int i, r; 25975324fb0dSmrg int bo_dst_size = 16384; 25985324fb0dSmrg int bo_shader_size = 4096; 25995324fb0dSmrg int bo_cmd_size = 4096; 26005324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 26015324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 26029bd392adSmrg uint32_t expired, hang_state, hangs; 26039bd392adSmrg enum cs_type cs_type; 26045324fb0dSmrg amdgpu_bo_list_handle bo_list; 26055324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 26065324fb0dSmrg 26075324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 26085324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26095324fb0dSmrg 26105324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 26115324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 26125324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 26135324fb0dSmrg &mc_address_cmd, &va_cmd); 26145324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26155324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 26165324fb0dSmrg 26175324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 26185324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 26195324fb0dSmrg &bo_shader, &ptr_shader, 26205324fb0dSmrg &mc_address_shader, &va_shader); 26215324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 262288f8a8d2Smrg memset(ptr_shader, 0, bo_shader_size); 26235324fb0dSmrg 26249bd392adSmrg cs_type = hang ? CS_HANG : CS_BUFFERCOPY; 26259bd392adSmrg r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type); 26265324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26275324fb0dSmrg 26285324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 26295324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 26305324fb0dSmrg &bo_src, (void **)&ptr_src, 26315324fb0dSmrg &mc_address_src, &va_src); 26325324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26335324fb0dSmrg 26345324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 26355324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 26365324fb0dSmrg &bo_dst, (void **)&ptr_dst, 26375324fb0dSmrg &mc_address_dst, &va_dst); 26385324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26395324fb0dSmrg 26405324fb0dSmrg memset(ptr_src, 0x55, bo_dst_size); 26415324fb0dSmrg 26425324fb0dSmrg i = 0; 26435324fb0dSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 26445324fb0dSmrg 26455324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 26465324fb0dSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 26475324fb0dSmrg 26485324fb0dSmrg /* Writes shader state to HW */ 26495324fb0dSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 26505324fb0dSmrg 26515324fb0dSmrg /* Write constant data */ 26525324fb0dSmrg /* Writes the texture resource constants data to the SGPRs */ 26535324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 26545324fb0dSmrg ptr_cmd[i++] = 0x240; 26555324fb0dSmrg ptr_cmd[i++] = mc_address_src; 26565324fb0dSmrg ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 26575324fb0dSmrg ptr_cmd[i++] = 0x400; 26585324fb0dSmrg ptr_cmd[i++] = 0x74fac; 26595324fb0dSmrg 26605324fb0dSmrg /* Writes the UAV constant data to the SGPRs. */ 26615324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 26625324fb0dSmrg ptr_cmd[i++] = 0x244; 26635324fb0dSmrg ptr_cmd[i++] = mc_address_dst; 26645324fb0dSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 26655324fb0dSmrg ptr_cmd[i++] = 0x400; 26665324fb0dSmrg ptr_cmd[i++] = 0x74fac; 26675324fb0dSmrg 266888f8a8d2Smrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 266988f8a8d2Smrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 267088f8a8d2Smrg ptr_cmd[i++] = 0x215; 267188f8a8d2Smrg ptr_cmd[i++] = 0; 267288f8a8d2Smrg 26735324fb0dSmrg /* dispatch direct command */ 26745324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 26755324fb0dSmrg ptr_cmd[i++] = 0x10; 26765324fb0dSmrg ptr_cmd[i++] = 1; 26775324fb0dSmrg ptr_cmd[i++] = 1; 26785324fb0dSmrg ptr_cmd[i++] = 1; 26795324fb0dSmrg 26805324fb0dSmrg while (i & 7) 26815324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 26825324fb0dSmrg 26835324fb0dSmrg resources[0] = bo_shader; 26845324fb0dSmrg resources[1] = bo_src; 26855324fb0dSmrg resources[2] = bo_dst; 26865324fb0dSmrg resources[3] = bo_cmd; 26875324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 26885324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 26895324fb0dSmrg 26905324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 26915324fb0dSmrg ib_info.size = i; 26925324fb0dSmrg ibs_request.ip_type = ip_type; 26935324fb0dSmrg ibs_request.ring = ring; 26945324fb0dSmrg ibs_request.resources = bo_list; 26955324fb0dSmrg ibs_request.number_of_ibs = 1; 26965324fb0dSmrg ibs_request.ibs = &ib_info; 26975324fb0dSmrg ibs_request.fence_info.handle = NULL; 26985324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 26995324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27005324fb0dSmrg 27015324fb0dSmrg fence_status.ip_type = ip_type; 27025324fb0dSmrg fence_status.ip_instance = 0; 27035324fb0dSmrg fence_status.ring = ring; 27045324fb0dSmrg fence_status.context = context_handle; 27055324fb0dSmrg fence_status.fence = ibs_request.seq_no; 27065324fb0dSmrg 27075324fb0dSmrg /* wait for IB accomplished */ 27085324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 27095324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 27105324fb0dSmrg 0, &expired); 27115324fb0dSmrg 27129bd392adSmrg if (!hang) { 27139bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27149bd392adSmrg CU_ASSERT_EQUAL(expired, true); 27159bd392adSmrg 27169bd392adSmrg /* verify if memcpy test result meets with expected */ 27179bd392adSmrg i = 0; 27189bd392adSmrg while(i < bo_dst_size) { 27199bd392adSmrg CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 27209bd392adSmrg i++; 27219bd392adSmrg } 27229bd392adSmrg } else { 27239bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 27249bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27259bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 27265324fb0dSmrg } 27275324fb0dSmrg 27285324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 27295324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27305324fb0dSmrg 27315324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 27325324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27335324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 27345324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27355324fb0dSmrg 27365324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 27375324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27385324fb0dSmrg 27395324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 27405324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27415324fb0dSmrg 27425324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 27435324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27445324fb0dSmrg} 274588f8a8d2Smrg 274688f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void) 27475324fb0dSmrg{ 27485324fb0dSmrg int r; 27495324fb0dSmrg struct drm_amdgpu_info_hw_ip info; 27505324fb0dSmrg uint32_t ring_id; 27515324fb0dSmrg 27525324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 27535324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 275488f8a8d2Smrg if (!info.available_rings) 275588f8a8d2Smrg printf("SKIP ... as there's no compute ring\n"); 27565324fb0dSmrg 27575324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 27585324fb0dSmrg amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); 27599bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0); 27605324fb0dSmrg } 276188f8a8d2Smrg} 276288f8a8d2Smrg 276388f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void) 276488f8a8d2Smrg{ 276588f8a8d2Smrg int r; 276688f8a8d2Smrg struct drm_amdgpu_info_hw_ip info; 276788f8a8d2Smrg uint32_t ring_id; 27685324fb0dSmrg 27695324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 27705324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 277188f8a8d2Smrg if (!info.available_rings) 277288f8a8d2Smrg printf("SKIP ... as there's no graphics ring\n"); 27735324fb0dSmrg 27745324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 27755324fb0dSmrg amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); 27769bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0); 27779bd392adSmrg } 27789bd392adSmrg} 27799bd392adSmrg 27809bd392adSmrgvoid amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 27819bd392adSmrg{ 27829bd392adSmrg int r; 27839bd392adSmrg struct drm_amdgpu_info_hw_ip info; 27849bd392adSmrg uint32_t ring_id; 27859bd392adSmrg 27869bd392adSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 27879bd392adSmrg CU_ASSERT_EQUAL(r, 0); 27889bd392adSmrg if (!info.available_rings) 27899bd392adSmrg printf("SKIP ... as there's no ring for ip %d\n", ip_type); 27909bd392adSmrg 27919bd392adSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 27929bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 27939bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1); 27949bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 27959bd392adSmrg } 27969bd392adSmrg} 27979bd392adSmrg 27989bd392adSmrgstatic void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle, 27999bd392adSmrg uint32_t ip_type, uint32_t ring) 28009bd392adSmrg{ 28019bd392adSmrg amdgpu_context_handle context_handle; 28029bd392adSmrg amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 28039bd392adSmrg volatile unsigned char *ptr_dst; 28049bd392adSmrg void *ptr_shader; 28059bd392adSmrg unsigned char *ptr_src; 28069bd392adSmrg uint32_t *ptr_cmd; 28079bd392adSmrg uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 28089bd392adSmrg amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 28099bd392adSmrg int i, r; 28109bd392adSmrg int bo_dst_size = 0x4000000; 28119bd392adSmrg int bo_shader_size = 0x400000; 28129bd392adSmrg int bo_cmd_size = 4096; 28139bd392adSmrg struct amdgpu_cs_request ibs_request = {0}; 28149bd392adSmrg struct amdgpu_cs_ib_info ib_info= {0}; 28159bd392adSmrg uint32_t hang_state, hangs, expired; 28169bd392adSmrg struct amdgpu_gpu_info gpu_info = {0}; 28179bd392adSmrg amdgpu_bo_list_handle bo_list; 28189bd392adSmrg struct amdgpu_cs_fence fence_status = {0}; 28199bd392adSmrg 28209bd392adSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 28219bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28229bd392adSmrg 28239bd392adSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 28249bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28259bd392adSmrg 28269bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 28279bd392adSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 28289bd392adSmrg &bo_cmd, (void **)&ptr_cmd, 28299bd392adSmrg &mc_address_cmd, &va_cmd); 28309bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28319bd392adSmrg memset(ptr_cmd, 0, bo_cmd_size); 28329bd392adSmrg 28339bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 28349bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 28359bd392adSmrg &bo_shader, &ptr_shader, 28369bd392adSmrg &mc_address_shader, &va_shader); 28379bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28389bd392adSmrg memset(ptr_shader, 0, bo_shader_size); 28399bd392adSmrg 28409bd392adSmrg r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id); 28419bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28429bd392adSmrg 28439bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 28449bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 28459bd392adSmrg &bo_src, (void **)&ptr_src, 28469bd392adSmrg &mc_address_src, &va_src); 28479bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28489bd392adSmrg 28499bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 28509bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 28519bd392adSmrg &bo_dst, (void **)&ptr_dst, 28529bd392adSmrg &mc_address_dst, &va_dst); 28539bd392adSmrg CU_ASSERT_EQUAL(r, 0); 28549bd392adSmrg 28559bd392adSmrg memset(ptr_src, 0x55, bo_dst_size); 28569bd392adSmrg 28579bd392adSmrg i = 0; 28589bd392adSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 28599bd392adSmrg 28609bd392adSmrg /* Issue commands to set cu mask used in current dispatch */ 28619bd392adSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 28629bd392adSmrg 28639bd392adSmrg /* Writes shader state to HW */ 28649bd392adSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 28659bd392adSmrg 28669bd392adSmrg /* Write constant data */ 28679bd392adSmrg /* Writes the texture resource constants data to the SGPRs */ 28689bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 28699bd392adSmrg ptr_cmd[i++] = 0x240; 28709bd392adSmrg ptr_cmd[i++] = mc_address_src; 28719bd392adSmrg ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 28729bd392adSmrg ptr_cmd[i++] = 0x400000; 28739bd392adSmrg ptr_cmd[i++] = 0x74fac; 28749bd392adSmrg 28759bd392adSmrg /* Writes the UAV constant data to the SGPRs. */ 28769bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 28779bd392adSmrg ptr_cmd[i++] = 0x244; 28789bd392adSmrg ptr_cmd[i++] = mc_address_dst; 28799bd392adSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 28809bd392adSmrg ptr_cmd[i++] = 0x400000; 28819bd392adSmrg ptr_cmd[i++] = 0x74fac; 28829bd392adSmrg 28839bd392adSmrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 28849bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 28859bd392adSmrg ptr_cmd[i++] = 0x215; 28869bd392adSmrg ptr_cmd[i++] = 0; 28879bd392adSmrg 28889bd392adSmrg /* dispatch direct command */ 28899bd392adSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 28909bd392adSmrg ptr_cmd[i++] = 0x10000; 28919bd392adSmrg ptr_cmd[i++] = 1; 28929bd392adSmrg ptr_cmd[i++] = 1; 28939bd392adSmrg ptr_cmd[i++] = 1; 28949bd392adSmrg 28959bd392adSmrg while (i & 7) 28969bd392adSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 28979bd392adSmrg 28989bd392adSmrg resources[0] = bo_shader; 28999bd392adSmrg resources[1] = bo_src; 29009bd392adSmrg resources[2] = bo_dst; 29019bd392adSmrg resources[3] = bo_cmd; 29029bd392adSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 29039bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29049bd392adSmrg 29059bd392adSmrg ib_info.ib_mc_address = mc_address_cmd; 29069bd392adSmrg ib_info.size = i; 29079bd392adSmrg ibs_request.ip_type = ip_type; 29089bd392adSmrg ibs_request.ring = ring; 29099bd392adSmrg ibs_request.resources = bo_list; 29109bd392adSmrg ibs_request.number_of_ibs = 1; 29119bd392adSmrg ibs_request.ibs = &ib_info; 29129bd392adSmrg ibs_request.fence_info.handle = NULL; 29139bd392adSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 29149bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29159bd392adSmrg 29169bd392adSmrg fence_status.ip_type = ip_type; 29179bd392adSmrg fence_status.ip_instance = 0; 29189bd392adSmrg fence_status.ring = ring; 29199bd392adSmrg fence_status.context = context_handle; 29209bd392adSmrg fence_status.fence = ibs_request.seq_no; 29219bd392adSmrg 29229bd392adSmrg /* wait for IB accomplished */ 29239bd392adSmrg r = amdgpu_cs_query_fence_status(&fence_status, 29249bd392adSmrg AMDGPU_TIMEOUT_INFINITE, 29259bd392adSmrg 0, &expired); 29269bd392adSmrg 29279bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 29289bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29299bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 29309bd392adSmrg 29319bd392adSmrg r = amdgpu_bo_list_destroy(bo_list); 29329bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29339bd392adSmrg 29349bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 29359bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29369bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 29379bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29389bd392adSmrg 29399bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 29409bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29419bd392adSmrg 29429bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 29439bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29449bd392adSmrg 29459bd392adSmrg r = amdgpu_cs_ctx_free(context_handle); 29469bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29479bd392adSmrg} 29489bd392adSmrg 29499bd392adSmrgvoid amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 29509bd392adSmrg{ 29519bd392adSmrg int r; 29529bd392adSmrg struct drm_amdgpu_info_hw_ip info; 29539bd392adSmrg uint32_t ring_id; 29549bd392adSmrg 29559bd392adSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 29569bd392adSmrg CU_ASSERT_EQUAL(r, 0); 29579bd392adSmrg if (!info.available_rings) 29589bd392adSmrg printf("SKIP ... as there's no ring for ip %d\n", ip_type); 29599bd392adSmrg 29609bd392adSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 29619bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 29629bd392adSmrg amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id); 29639bd392adSmrg amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 29649bd392adSmrg } 29659bd392adSmrg} 29669bd392adSmrg 29679bd392adSmrgstatic int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family) 29689bd392adSmrg{ 29699bd392adSmrg struct amdgpu_test_shader *shader; 29709bd392adSmrg int i, loop = 0x40000; 29719bd392adSmrg 29729bd392adSmrg switch (family) { 29739bd392adSmrg case AMDGPU_FAMILY_AI: 29749bd392adSmrg case AMDGPU_FAMILY_RV: 29759bd392adSmrg shader = &memcpy_ps_hang_slow_ai; 29769bd392adSmrg break; 29779bd392adSmrg default: 29789bd392adSmrg return -1; 29799bd392adSmrg break; 29805324fb0dSmrg } 29819bd392adSmrg 29829bd392adSmrg memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 29839bd392adSmrg 29849bd392adSmrg for (i = 0; i < loop; i++) 29859bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * i, 29869bd392adSmrg shader->shader + shader->header_length, 29879bd392adSmrg shader->body_length * sizeof(uint32_t)); 29889bd392adSmrg 29899bd392adSmrg memcpy(ptr + shader->header_length + shader->body_length * loop, 29909bd392adSmrg shader->shader + shader->header_length + shader->body_length, 29919bd392adSmrg shader->foot_length * sizeof(uint32_t)); 29929bd392adSmrg 29939bd392adSmrg return 0; 29945324fb0dSmrg} 29955324fb0dSmrg 29965324fb0dSmrgstatic int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type) 29975324fb0dSmrg{ 29985324fb0dSmrg int i; 29995324fb0dSmrg uint32_t shader_offset= 256; 30005324fb0dSmrg uint32_t mem_offset, patch_code_offset; 30015324fb0dSmrg uint32_t shader_size, patchinfo_code_size; 30025324fb0dSmrg const uint32_t *shader; 30035324fb0dSmrg const uint32_t *patchinfo_code; 30045324fb0dSmrg const uint32_t *patchcode_offset; 30055324fb0dSmrg 30065324fb0dSmrg switch (ps_type) { 30075324fb0dSmrg case PS_CONST: 30085324fb0dSmrg shader = ps_const_shader_gfx9; 30095324fb0dSmrg shader_size = sizeof(ps_const_shader_gfx9); 30105324fb0dSmrg patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; 30115324fb0dSmrg patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; 30125324fb0dSmrg patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; 30135324fb0dSmrg break; 30145324fb0dSmrg case PS_TEX: 30155324fb0dSmrg shader = ps_tex_shader_gfx9; 30165324fb0dSmrg shader_size = sizeof(ps_tex_shader_gfx9); 30175324fb0dSmrg patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; 30185324fb0dSmrg patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; 30195324fb0dSmrg patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; 30205324fb0dSmrg break; 30219bd392adSmrg case PS_HANG: 30229bd392adSmrg shader = memcpy_ps_hang; 30239bd392adSmrg shader_size = sizeof(memcpy_ps_hang); 30249bd392adSmrg 30259bd392adSmrg memcpy(ptr, shader, shader_size); 30269bd392adSmrg return 0; 30275324fb0dSmrg default: 30285324fb0dSmrg return -1; 30295324fb0dSmrg break; 30305324fb0dSmrg } 30315324fb0dSmrg 30325324fb0dSmrg /* write main shader program */ 30335324fb0dSmrg for (i = 0 ; i < 10; i++) { 30345324fb0dSmrg mem_offset = i * shader_offset; 30355324fb0dSmrg memcpy(ptr + mem_offset, shader, shader_size); 30365324fb0dSmrg } 30375324fb0dSmrg 30385324fb0dSmrg /* overwrite patch codes */ 30395324fb0dSmrg for (i = 0 ; i < 10; i++) { 30405324fb0dSmrg mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t); 30415324fb0dSmrg patch_code_offset = i * patchinfo_code_size; 30425324fb0dSmrg memcpy(ptr + mem_offset, 30435324fb0dSmrg patchinfo_code + patch_code_offset, 30445324fb0dSmrg patchinfo_code_size * sizeof(uint32_t)); 30455324fb0dSmrg } 30465324fb0dSmrg 30475324fb0dSmrg return 0; 30485324fb0dSmrg} 30495324fb0dSmrg 30505324fb0dSmrg/* load RectPosTexFast_VS */ 30515324fb0dSmrgstatic int amdgpu_draw_load_vs_shader(uint8_t *ptr) 30525324fb0dSmrg{ 30535324fb0dSmrg const uint32_t *shader; 30545324fb0dSmrg uint32_t shader_size; 30555324fb0dSmrg 30565324fb0dSmrg shader = vs_RectPosTexFast_shader_gfx9; 30575324fb0dSmrg shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); 30585324fb0dSmrg 30595324fb0dSmrg memcpy(ptr, shader, shader_size); 30605324fb0dSmrg 30615324fb0dSmrg return 0; 30625324fb0dSmrg} 30635324fb0dSmrg 30645324fb0dSmrgstatic int amdgpu_draw_init(uint32_t *ptr) 30655324fb0dSmrg{ 30665324fb0dSmrg int i = 0; 30675324fb0dSmrg const uint32_t *preamblecache_ptr; 30685324fb0dSmrg uint32_t preamblecache_size; 30695324fb0dSmrg 30705324fb0dSmrg /* Write context control and load shadowing register if necessary */ 30715324fb0dSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 30725324fb0dSmrg ptr[i++] = 0x80000000; 30735324fb0dSmrg ptr[i++] = 0x80000000; 30745324fb0dSmrg 30755324fb0dSmrg preamblecache_ptr = preamblecache_gfx9; 30765324fb0dSmrg preamblecache_size = sizeof(preamblecache_gfx9); 30775324fb0dSmrg 30785324fb0dSmrg memcpy(ptr + i, preamblecache_ptr, preamblecache_size); 30795324fb0dSmrg return i + preamblecache_size/sizeof(uint32_t); 30805324fb0dSmrg} 30815324fb0dSmrg 30825324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, 30839bd392adSmrg uint64_t dst_addr, 30849bd392adSmrg int hang_slow) 30855324fb0dSmrg{ 30865324fb0dSmrg int i = 0; 30875324fb0dSmrg 30885324fb0dSmrg /* setup color buffer */ 30895324fb0dSmrg /* offset reg 30905324fb0dSmrg 0xA318 CB_COLOR0_BASE 30915324fb0dSmrg 0xA319 CB_COLOR0_BASE_EXT 30925324fb0dSmrg 0xA31A CB_COLOR0_ATTRIB2 30935324fb0dSmrg 0xA31B CB_COLOR0_VIEW 30945324fb0dSmrg 0xA31C CB_COLOR0_INFO 30955324fb0dSmrg 0xA31D CB_COLOR0_ATTRIB 30965324fb0dSmrg 0xA31E CB_COLOR0_DCC_CONTROL 30975324fb0dSmrg 0xA31F CB_COLOR0_CMASK 30985324fb0dSmrg 0xA320 CB_COLOR0_CMASK_BASE_EXT 30995324fb0dSmrg 0xA321 CB_COLOR0_FMASK 31005324fb0dSmrg 0xA322 CB_COLOR0_FMASK_BASE_EXT 31015324fb0dSmrg 0xA323 CB_COLOR0_CLEAR_WORD0 31025324fb0dSmrg 0xA324 CB_COLOR0_CLEAR_WORD1 31035324fb0dSmrg 0xA325 CB_COLOR0_DCC_BASE 31045324fb0dSmrg 0xA326 CB_COLOR0_DCC_BASE_EXT */ 31055324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); 31065324fb0dSmrg ptr[i++] = 0x318; 31075324fb0dSmrg ptr[i++] = dst_addr >> 8; 31085324fb0dSmrg ptr[i++] = dst_addr >> 40; 31099bd392adSmrg ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f; 31105324fb0dSmrg ptr[i++] = 0; 31115324fb0dSmrg ptr[i++] = 0x50438; 31125324fb0dSmrg ptr[i++] = 0x10140000; 31135324fb0dSmrg i += 9; 31145324fb0dSmrg 31155324fb0dSmrg /* mmCB_MRT0_EPITCH */ 31165324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31175324fb0dSmrg ptr[i++] = 0x1e8; 31189bd392adSmrg ptr[i++] = hang_slow ? 0x7ff : 0x1f; 31195324fb0dSmrg 31205324fb0dSmrg /* 0xA32B CB_COLOR1_BASE */ 31215324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31225324fb0dSmrg ptr[i++] = 0x32b; 31235324fb0dSmrg ptr[i++] = 0; 31245324fb0dSmrg 31255324fb0dSmrg /* 0xA33A CB_COLOR1_BASE */ 31265324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31275324fb0dSmrg ptr[i++] = 0x33a; 31285324fb0dSmrg ptr[i++] = 0; 31295324fb0dSmrg 31305324fb0dSmrg /* SPI_SHADER_COL_FORMAT */ 31315324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31325324fb0dSmrg ptr[i++] = 0x1c5; 31335324fb0dSmrg ptr[i++] = 9; 31345324fb0dSmrg 31355324fb0dSmrg /* Setup depth buffer */ 31365324fb0dSmrg /* mmDB_Z_INFO */ 31375324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 31385324fb0dSmrg ptr[i++] = 0xe; 31395324fb0dSmrg i += 2; 31405324fb0dSmrg 31415324fb0dSmrg return i; 31425324fb0dSmrg} 31435324fb0dSmrg 31449bd392adSmrgstatic int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow) 31455324fb0dSmrg{ 31465324fb0dSmrg int i = 0; 31475324fb0dSmrg const uint32_t *cached_cmd_ptr; 31485324fb0dSmrg uint32_t cached_cmd_size; 31495324fb0dSmrg 31505324fb0dSmrg /* mmPA_SC_TILE_STEERING_OVERRIDE */ 31515324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31525324fb0dSmrg ptr[i++] = 0xd7; 31535324fb0dSmrg ptr[i++] = 0; 31545324fb0dSmrg 31555324fb0dSmrg ptr[i++] = 0xffff1000; 31565324fb0dSmrg ptr[i++] = 0xc0021000; 31575324fb0dSmrg 31585324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31595324fb0dSmrg ptr[i++] = 0xd7; 31605324fb0dSmrg ptr[i++] = 1; 31615324fb0dSmrg 31625324fb0dSmrg /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 31635324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); 31645324fb0dSmrg ptr[i++] = 0x2fe; 31655324fb0dSmrg i += 16; 31665324fb0dSmrg 31675324fb0dSmrg /* mmPA_SC_CENTROID_PRIORITY_0 */ 31685324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 31695324fb0dSmrg ptr[i++] = 0x2f5; 31705324fb0dSmrg i += 2; 31715324fb0dSmrg 31725324fb0dSmrg cached_cmd_ptr = cached_cmd_gfx9; 31735324fb0dSmrg cached_cmd_size = sizeof(cached_cmd_gfx9); 31745324fb0dSmrg 31755324fb0dSmrg memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); 31769bd392adSmrg if (hang_slow) 31779bd392adSmrg *(ptr + i + 12) = 0x8000800; 31785324fb0dSmrg i += cached_cmd_size/sizeof(uint32_t); 31795324fb0dSmrg 31805324fb0dSmrg return i; 31815324fb0dSmrg} 31825324fb0dSmrg 31835324fb0dSmrgstatic int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, 31845324fb0dSmrg int ps_type, 31859bd392adSmrg uint64_t shader_addr, 31869bd392adSmrg int hang_slow) 31875324fb0dSmrg{ 31885324fb0dSmrg int i = 0; 31895324fb0dSmrg 31905324fb0dSmrg /* mmPA_CL_VS_OUT_CNTL */ 31915324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 31925324fb0dSmrg ptr[i++] = 0x207; 31935324fb0dSmrg ptr[i++] = 0; 31945324fb0dSmrg 31955324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC3_VS */ 31965324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 31975324fb0dSmrg ptr[i++] = 0x46; 31985324fb0dSmrg ptr[i++] = 0xffff; 31995324fb0dSmrg 32005324fb0dSmrg /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ 32015324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 32025324fb0dSmrg ptr[i++] = 0x48; 32035324fb0dSmrg ptr[i++] = shader_addr >> 8; 32045324fb0dSmrg ptr[i++] = shader_addr >> 40; 32055324fb0dSmrg 32065324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC1_VS */ 32075324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 32085324fb0dSmrg ptr[i++] = 0x4a; 32095324fb0dSmrg ptr[i++] = 0xc0081; 32105324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC2_VS */ 32115324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 32125324fb0dSmrg ptr[i++] = 0x4b; 32135324fb0dSmrg ptr[i++] = 0x18; 32145324fb0dSmrg 32155324fb0dSmrg /* mmSPI_VS_OUT_CONFIG */ 32165324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 32175324fb0dSmrg ptr[i++] = 0x1b1; 32185324fb0dSmrg ptr[i++] = 2; 32195324fb0dSmrg 32205324fb0dSmrg /* mmSPI_SHADER_POS_FORMAT */ 32215324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 32225324fb0dSmrg ptr[i++] = 0x1c3; 32235324fb0dSmrg ptr[i++] = 4; 32245324fb0dSmrg 32255324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 32265324fb0dSmrg ptr[i++] = 0x4c; 32275324fb0dSmrg i += 2; 32289bd392adSmrg ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 32299bd392adSmrg ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 32305324fb0dSmrg 32315324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 32325324fb0dSmrg ptr[i++] = 0x50; 32335324fb0dSmrg i += 2; 32345324fb0dSmrg if (ps_type == PS_CONST) { 32355324fb0dSmrg i += 2; 32365324fb0dSmrg } else if (ps_type == PS_TEX) { 32375324fb0dSmrg ptr[i++] = 0x3f800000; 32385324fb0dSmrg ptr[i++] = 0x3f800000; 32395324fb0dSmrg } 32405324fb0dSmrg 32415324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 32425324fb0dSmrg ptr[i++] = 0x54; 32435324fb0dSmrg i += 4; 32445324fb0dSmrg 32455324fb0dSmrg return i; 32465324fb0dSmrg} 32475324fb0dSmrg 32485324fb0dSmrgstatic int amdgpu_draw_ps_write2hw(uint32_t *ptr, 32495324fb0dSmrg int ps_type, 32505324fb0dSmrg uint64_t shader_addr) 32515324fb0dSmrg{ 32525324fb0dSmrg int i, j; 32535324fb0dSmrg const uint32_t *sh_registers; 32545324fb0dSmrg const uint32_t *context_registers; 32555324fb0dSmrg uint32_t num_sh_reg, num_context_reg; 32565324fb0dSmrg 32575324fb0dSmrg if (ps_type == PS_CONST) { 32585324fb0dSmrg sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; 32595324fb0dSmrg context_registers = (const uint32_t *)ps_const_context_reg_gfx9; 32605324fb0dSmrg num_sh_reg = ps_num_sh_registers_gfx9; 32615324fb0dSmrg num_context_reg = ps_num_context_registers_gfx9; 32625324fb0dSmrg } else if (ps_type == PS_TEX) { 32635324fb0dSmrg sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; 32645324fb0dSmrg context_registers = (const uint32_t *)ps_tex_context_reg_gfx9; 32655324fb0dSmrg num_sh_reg = ps_num_sh_registers_gfx9; 32665324fb0dSmrg num_context_reg = ps_num_context_registers_gfx9; 32675324fb0dSmrg } 32685324fb0dSmrg 32695324fb0dSmrg i = 0; 32705324fb0dSmrg 32715324fb0dSmrg /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS 32725324fb0dSmrg 0x2c08 SPI_SHADER_PGM_LO_PS 32735324fb0dSmrg 0x2c09 SPI_SHADER_PGM_HI_PS */ 32745324fb0dSmrg shader_addr += 256 * 9; 32755324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 32765324fb0dSmrg ptr[i++] = 0x7; 32775324fb0dSmrg ptr[i++] = 0xffff; 32785324fb0dSmrg ptr[i++] = shader_addr >> 8; 32795324fb0dSmrg ptr[i++] = shader_addr >> 40; 32805324fb0dSmrg 32815324fb0dSmrg for (j = 0; j < num_sh_reg; j++) { 32825324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 32835324fb0dSmrg ptr[i++] = sh_registers[j * 2] - 0x2c00; 32845324fb0dSmrg ptr[i++] = sh_registers[j * 2 + 1]; 32855324fb0dSmrg } 32865324fb0dSmrg 32875324fb0dSmrg for (j = 0; j < num_context_reg; j++) { 32885324fb0dSmrg if (context_registers[j * 2] != 0xA1C5) { 32895324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 32905324fb0dSmrg ptr[i++] = context_registers[j * 2] - 0xa000; 32915324fb0dSmrg ptr[i++] = context_registers[j * 2 + 1]; 32925324fb0dSmrg } 32935324fb0dSmrg 32945324fb0dSmrg if (context_registers[j * 2] == 0xA1B4) { 32955324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 32965324fb0dSmrg ptr[i++] = 0x1b3; 32975324fb0dSmrg ptr[i++] = 2; 32985324fb0dSmrg } 32995324fb0dSmrg } 33005324fb0dSmrg 33015324fb0dSmrg return i; 33025324fb0dSmrg} 33035324fb0dSmrg 33045324fb0dSmrgstatic int amdgpu_draw_draw(uint32_t *ptr) 33055324fb0dSmrg{ 33065324fb0dSmrg int i = 0; 33075324fb0dSmrg 33085324fb0dSmrg /* mmIA_MULTI_VGT_PARAM */ 33095324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 33105324fb0dSmrg ptr[i++] = 0x40000258; 33115324fb0dSmrg ptr[i++] = 0xd00ff; 33125324fb0dSmrg 33135324fb0dSmrg /* mmVGT_PRIMITIVE_TYPE */ 33145324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 33155324fb0dSmrg ptr[i++] = 0x10000242; 33165324fb0dSmrg ptr[i++] = 0x11; 33175324fb0dSmrg 33185324fb0dSmrg ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); 33195324fb0dSmrg ptr[i++] = 3; 33205324fb0dSmrg ptr[i++] = 2; 33215324fb0dSmrg 33225324fb0dSmrg return i; 33235324fb0dSmrg} 33245324fb0dSmrg 33255324fb0dSmrgvoid amdgpu_memset_draw(amdgpu_device_handle device_handle, 33265324fb0dSmrg amdgpu_bo_handle bo_shader_ps, 33275324fb0dSmrg amdgpu_bo_handle bo_shader_vs, 33285324fb0dSmrg uint64_t mc_address_shader_ps, 33295324fb0dSmrg uint64_t mc_address_shader_vs, 33305324fb0dSmrg uint32_t ring_id) 33315324fb0dSmrg{ 33325324fb0dSmrg amdgpu_context_handle context_handle; 33335324fb0dSmrg amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; 33345324fb0dSmrg volatile unsigned char *ptr_dst; 33355324fb0dSmrg uint32_t *ptr_cmd; 33365324fb0dSmrg uint64_t mc_address_dst, mc_address_cmd; 33375324fb0dSmrg amdgpu_va_handle va_dst, va_cmd; 33385324fb0dSmrg int i, r; 33395324fb0dSmrg int bo_dst_size = 16384; 33405324fb0dSmrg int bo_cmd_size = 4096; 33415324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 33425324fb0dSmrg struct amdgpu_cs_ib_info ib_info = {0}; 33435324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 33445324fb0dSmrg uint32_t expired; 33455324fb0dSmrg amdgpu_bo_list_handle bo_list; 33465324fb0dSmrg 33475324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 33485324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33495324fb0dSmrg 33505324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 33515324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 33525324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 33535324fb0dSmrg &mc_address_cmd, &va_cmd); 33545324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33555324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 33565324fb0dSmrg 33575324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 33585324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 33595324fb0dSmrg &bo_dst, (void **)&ptr_dst, 33605324fb0dSmrg &mc_address_dst, &va_dst); 33615324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33625324fb0dSmrg 33635324fb0dSmrg i = 0; 33645324fb0dSmrg i += amdgpu_draw_init(ptr_cmd + i); 33655324fb0dSmrg 33669bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); 33675324fb0dSmrg 33689bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); 33695324fb0dSmrg 33709bd392adSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0); 33715324fb0dSmrg 33725324fb0dSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps); 33735324fb0dSmrg 33745324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 33755324fb0dSmrg ptr_cmd[i++] = 0xc; 33765324fb0dSmrg ptr_cmd[i++] = 0x33333333; 33775324fb0dSmrg ptr_cmd[i++] = 0x33333333; 33785324fb0dSmrg ptr_cmd[i++] = 0x33333333; 33795324fb0dSmrg ptr_cmd[i++] = 0x33333333; 33805324fb0dSmrg 33815324fb0dSmrg i += amdgpu_draw_draw(ptr_cmd + i); 33825324fb0dSmrg 33835324fb0dSmrg while (i & 7) 33845324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 33855324fb0dSmrg 33865324fb0dSmrg resources[0] = bo_dst; 33875324fb0dSmrg resources[1] = bo_shader_ps; 33885324fb0dSmrg resources[2] = bo_shader_vs; 33895324fb0dSmrg resources[3] = bo_cmd; 33909bd392adSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 33915324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 33925324fb0dSmrg 33935324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 33945324fb0dSmrg ib_info.size = i; 33955324fb0dSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 33965324fb0dSmrg ibs_request.ring = ring_id; 33975324fb0dSmrg ibs_request.resources = bo_list; 33985324fb0dSmrg ibs_request.number_of_ibs = 1; 33995324fb0dSmrg ibs_request.ibs = &ib_info; 34005324fb0dSmrg ibs_request.fence_info.handle = NULL; 34015324fb0dSmrg 34025324fb0dSmrg /* submit CS */ 34035324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 34045324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34055324fb0dSmrg 34065324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 34075324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34085324fb0dSmrg 34095324fb0dSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 34105324fb0dSmrg fence_status.ip_instance = 0; 34115324fb0dSmrg fence_status.ring = ring_id; 34125324fb0dSmrg fence_status.context = context_handle; 34135324fb0dSmrg fence_status.fence = ibs_request.seq_no; 34145324fb0dSmrg 34155324fb0dSmrg /* wait for IB accomplished */ 34165324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 34175324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 34185324fb0dSmrg 0, &expired); 34195324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34205324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 34215324fb0dSmrg 34225324fb0dSmrg /* verify if memset test result meets with expected */ 34235324fb0dSmrg i = 0; 34245324fb0dSmrg while(i < bo_dst_size) { 34255324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i++], 0x33); 34265324fb0dSmrg } 34275324fb0dSmrg 34285324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 34295324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34305324fb0dSmrg 34315324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 34325324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34335324fb0dSmrg 34345324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 34355324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34365324fb0dSmrg} 34375324fb0dSmrg 34385324fb0dSmrgstatic void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, 34395324fb0dSmrg uint32_t ring) 34405324fb0dSmrg{ 34415324fb0dSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 34425324fb0dSmrg void *ptr_shader_ps; 34435324fb0dSmrg void *ptr_shader_vs; 34445324fb0dSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 34455324fb0dSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 34465324fb0dSmrg int r; 34475324fb0dSmrg int bo_shader_size = 4096; 34485324fb0dSmrg 34495324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 34505324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 34515324fb0dSmrg &bo_shader_ps, &ptr_shader_ps, 34525324fb0dSmrg &mc_address_shader_ps, &va_shader_ps); 34535324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 345488f8a8d2Smrg memset(ptr_shader_ps, 0, bo_shader_size); 34555324fb0dSmrg 34565324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 34575324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 34585324fb0dSmrg &bo_shader_vs, &ptr_shader_vs, 34595324fb0dSmrg &mc_address_shader_vs, &va_shader_vs); 34605324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 346188f8a8d2Smrg memset(ptr_shader_vs, 0, bo_shader_size); 34625324fb0dSmrg 34635324fb0dSmrg r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST); 34645324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34655324fb0dSmrg 34665324fb0dSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 34675324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34685324fb0dSmrg 34695324fb0dSmrg amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, 34705324fb0dSmrg mc_address_shader_ps, mc_address_shader_vs, ring); 34715324fb0dSmrg 34725324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 34735324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34745324fb0dSmrg 34755324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 34765324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 34775324fb0dSmrg} 34785324fb0dSmrg 34795324fb0dSmrgstatic void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, 34805324fb0dSmrg amdgpu_bo_handle bo_shader_ps, 34815324fb0dSmrg amdgpu_bo_handle bo_shader_vs, 34825324fb0dSmrg uint64_t mc_address_shader_ps, 34835324fb0dSmrg uint64_t mc_address_shader_vs, 34849bd392adSmrg uint32_t ring, int hang) 34855324fb0dSmrg{ 34865324fb0dSmrg amdgpu_context_handle context_handle; 34875324fb0dSmrg amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 34885324fb0dSmrg volatile unsigned char *ptr_dst; 34895324fb0dSmrg unsigned char *ptr_src; 34905324fb0dSmrg uint32_t *ptr_cmd; 34915324fb0dSmrg uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 34925324fb0dSmrg amdgpu_va_handle va_dst, va_src, va_cmd; 34935324fb0dSmrg int i, r; 34945324fb0dSmrg int bo_size = 16384; 34955324fb0dSmrg int bo_cmd_size = 4096; 34965324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 34975324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 34989bd392adSmrg uint32_t hang_state, hangs; 34999bd392adSmrg uint32_t expired; 35005324fb0dSmrg amdgpu_bo_list_handle bo_list; 35015324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 35025324fb0dSmrg 35035324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 35045324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 35055324fb0dSmrg 35065324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 35075324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 35085324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 35095324fb0dSmrg &mc_address_cmd, &va_cmd); 35105324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 35115324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 35125324fb0dSmrg 35135324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 35145324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 35155324fb0dSmrg &bo_src, (void **)&ptr_src, 35165324fb0dSmrg &mc_address_src, &va_src); 35175324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 35185324fb0dSmrg 35195324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 35205324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 35215324fb0dSmrg &bo_dst, (void **)&ptr_dst, 35225324fb0dSmrg &mc_address_dst, &va_dst); 35235324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 35245324fb0dSmrg 35255324fb0dSmrg memset(ptr_src, 0x55, bo_size); 35265324fb0dSmrg 35275324fb0dSmrg i = 0; 35285324fb0dSmrg i += amdgpu_draw_init(ptr_cmd + i); 35295324fb0dSmrg 35309bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); 35315324fb0dSmrg 35329bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); 35335324fb0dSmrg 35349bd392adSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0); 35355324fb0dSmrg 35365324fb0dSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 35375324fb0dSmrg 35385324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 35395324fb0dSmrg ptr_cmd[i++] = 0xc; 35405324fb0dSmrg ptr_cmd[i++] = mc_address_src >> 8; 35415324fb0dSmrg ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 35425324fb0dSmrg ptr_cmd[i++] = 0x7c01f; 35435324fb0dSmrg ptr_cmd[i++] = 0x90500fac; 35445324fb0dSmrg ptr_cmd[i++] = 0x3e000; 35455324fb0dSmrg i += 3; 35465324fb0dSmrg 35475324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 35485324fb0dSmrg ptr_cmd[i++] = 0x14; 35495324fb0dSmrg ptr_cmd[i++] = 0x92; 35505324fb0dSmrg i += 3; 35515324fb0dSmrg 355288f8a8d2Smrg ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 35535324fb0dSmrg ptr_cmd[i++] = 0x191; 35545324fb0dSmrg ptr_cmd[i++] = 0; 35555324fb0dSmrg 35565324fb0dSmrg i += amdgpu_draw_draw(ptr_cmd + i); 35575324fb0dSmrg 35585324fb0dSmrg while (i & 7) 35595324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 35605324fb0dSmrg 35615324fb0dSmrg resources[0] = bo_dst; 35625324fb0dSmrg resources[1] = bo_src; 35635324fb0dSmrg resources[2] = bo_shader_ps; 35645324fb0dSmrg resources[3] = bo_shader_vs; 35655324fb0dSmrg resources[4] = bo_cmd; 35665324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 35675324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 35685324fb0dSmrg 35695324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 35705324fb0dSmrg ib_info.size = i; 35715324fb0dSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 35725324fb0dSmrg ibs_request.ring = ring; 35735324fb0dSmrg ibs_request.resources = bo_list; 35745324fb0dSmrg ibs_request.number_of_ibs = 1; 35755324fb0dSmrg ibs_request.ibs = &ib_info; 35765324fb0dSmrg ibs_request.fence_info.handle = NULL; 35775324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 35785324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 35795324fb0dSmrg 35805324fb0dSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 35815324fb0dSmrg fence_status.ip_instance = 0; 35825324fb0dSmrg fence_status.ring = ring; 35835324fb0dSmrg fence_status.context = context_handle; 35845324fb0dSmrg fence_status.fence = ibs_request.seq_no; 35855324fb0dSmrg 35865324fb0dSmrg /* wait for IB accomplished */ 35875324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 35885324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 35895324fb0dSmrg 0, &expired); 35909bd392adSmrg if (!hang) { 35919bd392adSmrg CU_ASSERT_EQUAL(r, 0); 35929bd392adSmrg CU_ASSERT_EQUAL(expired, true); 35935324fb0dSmrg 35949bd392adSmrg /* verify if memcpy test result meets with expected */ 35959bd392adSmrg i = 0; 35969bd392adSmrg while(i < bo_size) { 35979bd392adSmrg CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 35989bd392adSmrg i++; 35999bd392adSmrg } 36009bd392adSmrg } else { 36019bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 36029bd392adSmrg CU_ASSERT_EQUAL(r, 0); 36039bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 36045324fb0dSmrg } 36055324fb0dSmrg 36065324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 36075324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36085324fb0dSmrg 36095324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 36105324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36115324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 36125324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36135324fb0dSmrg 36145324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 36155324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36165324fb0dSmrg 36175324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 36185324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36195324fb0dSmrg} 36205324fb0dSmrg 36219bd392adSmrgvoid amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, 36229bd392adSmrg int hang) 36235324fb0dSmrg{ 36245324fb0dSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 36255324fb0dSmrg void *ptr_shader_ps; 36265324fb0dSmrg void *ptr_shader_vs; 36275324fb0dSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 36285324fb0dSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 36295324fb0dSmrg int bo_shader_size = 4096; 36309bd392adSmrg enum ps_type ps_type = hang ? PS_HANG : PS_TEX; 36315324fb0dSmrg int r; 36325324fb0dSmrg 36335324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 36345324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 36355324fb0dSmrg &bo_shader_ps, &ptr_shader_ps, 36365324fb0dSmrg &mc_address_shader_ps, &va_shader_ps); 36375324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 363888f8a8d2Smrg memset(ptr_shader_ps, 0, bo_shader_size); 36395324fb0dSmrg 36405324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 36415324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 36425324fb0dSmrg &bo_shader_vs, &ptr_shader_vs, 36435324fb0dSmrg &mc_address_shader_vs, &va_shader_vs); 36445324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 364588f8a8d2Smrg memset(ptr_shader_vs, 0, bo_shader_size); 36465324fb0dSmrg 36479bd392adSmrg r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type); 36485324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36495324fb0dSmrg 36505324fb0dSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 36515324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36525324fb0dSmrg 36535324fb0dSmrg amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, 36549bd392adSmrg mc_address_shader_ps, mc_address_shader_vs, ring, hang); 36555324fb0dSmrg 36565324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 36575324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36585324fb0dSmrg 36595324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 36605324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 36615324fb0dSmrg} 36625324fb0dSmrg 36635324fb0dSmrgstatic void amdgpu_draw_test(void) 36645324fb0dSmrg{ 36655324fb0dSmrg int r; 36665324fb0dSmrg struct drm_amdgpu_info_hw_ip info; 36675324fb0dSmrg uint32_t ring_id; 36685324fb0dSmrg 36695324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 36705324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 367188f8a8d2Smrg if (!info.available_rings) 367288f8a8d2Smrg printf("SKIP ... as there's no graphics ring\n"); 36735324fb0dSmrg 36745324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 36755324fb0dSmrg amdgpu_memset_draw_test(device_handle, ring_id); 36769bd392adSmrg amdgpu_memcpy_draw_test(device_handle, ring_id, 0); 36775324fb0dSmrg } 36785324fb0dSmrg} 367988f8a8d2Smrg 36809bd392adSmrgvoid amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring) 36819bd392adSmrg{ 36829bd392adSmrg amdgpu_context_handle context_handle; 36839bd392adSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 36849bd392adSmrg amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 36859bd392adSmrg void *ptr_shader_ps; 36869bd392adSmrg void *ptr_shader_vs; 36879bd392adSmrg volatile unsigned char *ptr_dst; 36889bd392adSmrg unsigned char *ptr_src; 36899bd392adSmrg uint32_t *ptr_cmd; 36909bd392adSmrg uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 36919bd392adSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 36929bd392adSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 36939bd392adSmrg amdgpu_va_handle va_dst, va_src, va_cmd; 36949bd392adSmrg struct amdgpu_gpu_info gpu_info = {0}; 36959bd392adSmrg int i, r; 36969bd392adSmrg int bo_size = 0x4000000; 36979bd392adSmrg int bo_shader_ps_size = 0x400000; 36989bd392adSmrg int bo_shader_vs_size = 4096; 36999bd392adSmrg int bo_cmd_size = 4096; 37009bd392adSmrg struct amdgpu_cs_request ibs_request = {0}; 37019bd392adSmrg struct amdgpu_cs_ib_info ib_info= {0}; 37029bd392adSmrg uint32_t hang_state, hangs, expired; 37039bd392adSmrg amdgpu_bo_list_handle bo_list; 37049bd392adSmrg struct amdgpu_cs_fence fence_status = {0}; 37059bd392adSmrg 37069bd392adSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 37079bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37089bd392adSmrg 37099bd392adSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 37109bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37119bd392adSmrg 37129bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 37139bd392adSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 37149bd392adSmrg &bo_cmd, (void **)&ptr_cmd, 37159bd392adSmrg &mc_address_cmd, &va_cmd); 37169bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37179bd392adSmrg memset(ptr_cmd, 0, bo_cmd_size); 37189bd392adSmrg 37199bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096, 37209bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 37219bd392adSmrg &bo_shader_ps, &ptr_shader_ps, 37229bd392adSmrg &mc_address_shader_ps, &va_shader_ps); 37239bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37249bd392adSmrg memset(ptr_shader_ps, 0, bo_shader_ps_size); 37259bd392adSmrg 37269bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096, 37279bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 37289bd392adSmrg &bo_shader_vs, &ptr_shader_vs, 37299bd392adSmrg &mc_address_shader_vs, &va_shader_vs); 37309bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37319bd392adSmrg memset(ptr_shader_vs, 0, bo_shader_vs_size); 37329bd392adSmrg 37339bd392adSmrg r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id); 37349bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37359bd392adSmrg 37369bd392adSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 37379bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37389bd392adSmrg 37399bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 37409bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 37419bd392adSmrg &bo_src, (void **)&ptr_src, 37429bd392adSmrg &mc_address_src, &va_src); 37439bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37449bd392adSmrg 37459bd392adSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 37469bd392adSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 37479bd392adSmrg &bo_dst, (void **)&ptr_dst, 37489bd392adSmrg &mc_address_dst, &va_dst); 37499bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37509bd392adSmrg 37519bd392adSmrg memset(ptr_src, 0x55, bo_size); 37529bd392adSmrg 37539bd392adSmrg i = 0; 37549bd392adSmrg i += amdgpu_draw_init(ptr_cmd + i); 37559bd392adSmrg 37569bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1); 37579bd392adSmrg 37589bd392adSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1); 37599bd392adSmrg 37609bd392adSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, 37619bd392adSmrg mc_address_shader_vs, 1); 37629bd392adSmrg 37639bd392adSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 37649bd392adSmrg 37659bd392adSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 37669bd392adSmrg ptr_cmd[i++] = 0xc; 37679bd392adSmrg ptr_cmd[i++] = mc_address_src >> 8; 37689bd392adSmrg ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 37699bd392adSmrg ptr_cmd[i++] = 0x1ffc7ff; 37709bd392adSmrg ptr_cmd[i++] = 0x90500fac; 37719bd392adSmrg ptr_cmd[i++] = 0xffe000; 37729bd392adSmrg i += 3; 37739bd392adSmrg 37749bd392adSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 37759bd392adSmrg ptr_cmd[i++] = 0x14; 37769bd392adSmrg ptr_cmd[i++] = 0x92; 37779bd392adSmrg i += 3; 37789bd392adSmrg 37799bd392adSmrg ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 37809bd392adSmrg ptr_cmd[i++] = 0x191; 37819bd392adSmrg ptr_cmd[i++] = 0; 37829bd392adSmrg 37839bd392adSmrg i += amdgpu_draw_draw(ptr_cmd + i); 37849bd392adSmrg 37859bd392adSmrg while (i & 7) 37869bd392adSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 37879bd392adSmrg 37889bd392adSmrg resources[0] = bo_dst; 37899bd392adSmrg resources[1] = bo_src; 37909bd392adSmrg resources[2] = bo_shader_ps; 37919bd392adSmrg resources[3] = bo_shader_vs; 37929bd392adSmrg resources[4] = bo_cmd; 37939bd392adSmrg r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 37949bd392adSmrg CU_ASSERT_EQUAL(r, 0); 37959bd392adSmrg 37969bd392adSmrg ib_info.ib_mc_address = mc_address_cmd; 37979bd392adSmrg ib_info.size = i; 37989bd392adSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 37999bd392adSmrg ibs_request.ring = ring; 38009bd392adSmrg ibs_request.resources = bo_list; 38019bd392adSmrg ibs_request.number_of_ibs = 1; 38029bd392adSmrg ibs_request.ibs = &ib_info; 38039bd392adSmrg ibs_request.fence_info.handle = NULL; 38049bd392adSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 38059bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38069bd392adSmrg 38079bd392adSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 38089bd392adSmrg fence_status.ip_instance = 0; 38099bd392adSmrg fence_status.ring = ring; 38109bd392adSmrg fence_status.context = context_handle; 38119bd392adSmrg fence_status.fence = ibs_request.seq_no; 38129bd392adSmrg 38139bd392adSmrg /* wait for IB accomplished */ 38149bd392adSmrg r = amdgpu_cs_query_fence_status(&fence_status, 38159bd392adSmrg AMDGPU_TIMEOUT_INFINITE, 38169bd392adSmrg 0, &expired); 38179bd392adSmrg 38189bd392adSmrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 38199bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38209bd392adSmrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 38219bd392adSmrg 38229bd392adSmrg r = amdgpu_bo_list_destroy(bo_list); 38239bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38249bd392adSmrg 38259bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 38269bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38279bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 38289bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38299bd392adSmrg 38309bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 38319bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38329bd392adSmrg 38339bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size); 38349bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38359bd392adSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size); 38369bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38379bd392adSmrg 38389bd392adSmrg r = amdgpu_cs_ctx_free(context_handle); 38399bd392adSmrg CU_ASSERT_EQUAL(r, 0); 38409bd392adSmrg} 38419bd392adSmrg 384288f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void) 384388f8a8d2Smrg{ 384488f8a8d2Smrg int r; 384588f8a8d2Smrg char debugfs_path[256], tmp[10]; 384688f8a8d2Smrg int fd; 384788f8a8d2Smrg struct stat sbuf; 384888f8a8d2Smrg amdgpu_context_handle context_handle; 384988f8a8d2Smrg uint32_t hang_state, hangs; 385088f8a8d2Smrg 385188f8a8d2Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 385288f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 385388f8a8d2Smrg 385488f8a8d2Smrg r = fstat(drm_amdgpu[0], &sbuf); 385588f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 385688f8a8d2Smrg 385788f8a8d2Smrg sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev)); 385888f8a8d2Smrg fd = open(debugfs_path, O_RDONLY); 385988f8a8d2Smrg CU_ASSERT(fd >= 0); 386088f8a8d2Smrg 386188f8a8d2Smrg r = read(fd, tmp, sizeof(tmp)/sizeof(char)); 386288f8a8d2Smrg CU_ASSERT(r > 0); 386388f8a8d2Smrg 386488f8a8d2Smrg r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 386588f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 386688f8a8d2Smrg CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 386788f8a8d2Smrg 386888f8a8d2Smrg close(fd); 386988f8a8d2Smrg r = amdgpu_cs_ctx_free(context_handle); 387088f8a8d2Smrg CU_ASSERT_EQUAL(r, 0); 387188f8a8d2Smrg 387288f8a8d2Smrg amdgpu_compute_dispatch_test(); 387388f8a8d2Smrg amdgpu_gfx_dispatch_test(); 387488f8a8d2Smrg} 3875