basic_tests.c revision 5324fb0d
13f012e29Smrg/* 23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc. 33f012e29Smrg * 43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a 53f012e29Smrg * copy of this software and associated documentation files (the "Software"), 63f012e29Smrg * to deal in the Software without restriction, including without limitation 73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the 93f012e29Smrg * Software is furnished to do so, subject to the following conditions: 103f012e29Smrg * 113f012e29Smrg * The above copyright notice and this permission notice shall be included in 123f012e29Smrg * all copies or substantial portions of the Software. 133f012e29Smrg * 143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE. 213f012e29Smrg * 223f012e29Smrg*/ 233f012e29Smrg 243f012e29Smrg#include <stdio.h> 253f012e29Smrg#include <stdlib.h> 263f012e29Smrg#include <unistd.h> 273f012e29Smrg#ifdef HAVE_ALLOCA_H 283f012e29Smrg# include <alloca.h> 293f012e29Smrg#endif 3000a23bdaSmrg#include <sys/wait.h> 313f012e29Smrg 323f012e29Smrg#include "CUnit/Basic.h" 333f012e29Smrg 343f012e29Smrg#include "amdgpu_test.h" 353f012e29Smrg#include "amdgpu_drm.h" 367cdc0497Smrg#include "util_math.h" 373f012e29Smrg 383f012e29Smrgstatic amdgpu_device_handle device_handle; 393f012e29Smrgstatic uint32_t major_version; 403f012e29Smrgstatic uint32_t minor_version; 41d8807b2fSmrgstatic uint32_t family_id; 423f012e29Smrg 433f012e29Smrgstatic void amdgpu_query_info_test(void); 443f012e29Smrgstatic void amdgpu_command_submission_gfx(void); 453f012e29Smrgstatic void amdgpu_command_submission_compute(void); 46d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void); 473f012e29Smrgstatic void amdgpu_command_submission_sdma(void); 483f012e29Smrgstatic void amdgpu_userptr_test(void); 493f012e29Smrgstatic void amdgpu_semaphore_test(void); 5000a23bdaSmrgstatic void amdgpu_sync_dependency_test(void); 5100a23bdaSmrgstatic void amdgpu_bo_eviction_test(void); 525324fb0dSmrgstatic void amdgpu_dispatch_test(void); 535324fb0dSmrgstatic void amdgpu_draw_test(void); 543f012e29Smrg 553f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 563f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 573f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 5800a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 5900a23bdaSmrg unsigned ip_type, 6000a23bdaSmrg int instance, int pm4_dw, uint32_t *pm4_src, 6100a23bdaSmrg int res_cnt, amdgpu_bo_handle *resources, 6200a23bdaSmrg struct amdgpu_cs_ib_info *ib_info, 6300a23bdaSmrg struct amdgpu_cs_request *ibs_request); 6400a23bdaSmrg 653f012e29SmrgCU_TestInfo basic_tests[] = { 663f012e29Smrg { "Query Info Test", amdgpu_query_info_test }, 673f012e29Smrg { "Userptr Test", amdgpu_userptr_test }, 6800a23bdaSmrg { "bo eviction Test", amdgpu_bo_eviction_test }, 693f012e29Smrg { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 703f012e29Smrg { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 71d8807b2fSmrg { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 723f012e29Smrg { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 733f012e29Smrg { "SW semaphore Test", amdgpu_semaphore_test }, 7400a23bdaSmrg { "Sync dependency Test", amdgpu_sync_dependency_test }, 755324fb0dSmrg { "Dispatch Test", amdgpu_dispatch_test }, 765324fb0dSmrg { "Draw Test", amdgpu_draw_test }, 773f012e29Smrg CU_TEST_INFO_NULL, 783f012e29Smrg}; 793f012e29Smrg#define BUFFER_SIZE (8 * 1024) 803f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0 813f012e29Smrg#define SDMA_PKT_HEADER_op_mask 0x000000FF 823f012e29Smrg#define SDMA_PKT_HEADER_op_shift 0 833f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 843f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL 11 853f012e29Smrg# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 863f012e29Smrg /* 0 = byte fill 873f012e29Smrg * 2 = DW fill 883f012e29Smrg */ 893f012e29Smrg#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 903f012e29Smrg (((sub_op) & 0xFF) << 8) | \ 913f012e29Smrg (((op) & 0xFF) << 0)) 923f012e29Smrg#define SDMA_OPCODE_WRITE 2 933f012e29Smrg# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 943f012e29Smrg# define SDMA_WRTIE_SUB_OPCODE_TILED 1 953f012e29Smrg 963f012e29Smrg#define SDMA_OPCODE_COPY 1 973f012e29Smrg# define SDMA_COPY_SUB_OPCODE_LINEAR 0 983f012e29Smrg 993f012e29Smrg#define GFX_COMPUTE_NOP 0xffff1000 1003f012e29Smrg#define SDMA_NOP 0x0 1013f012e29Smrg 1023f012e29Smrg/* PM4 */ 1033f012e29Smrg#define PACKET_TYPE0 0 1043f012e29Smrg#define PACKET_TYPE1 1 1053f012e29Smrg#define PACKET_TYPE2 2 1063f012e29Smrg#define PACKET_TYPE3 3 1073f012e29Smrg 1083f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 1093f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 1103f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 1113f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 1123f012e29Smrg#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 1133f012e29Smrg ((reg) & 0xFFFF) | \ 1143f012e29Smrg ((n) & 0x3FFF) << 16) 1153f012e29Smrg#define CP_PACKET2 0x80000000 1163f012e29Smrg#define PACKET2_PAD_SHIFT 0 1173f012e29Smrg#define PACKET2_PAD_MASK (0x3fffffff << 0) 1183f012e29Smrg 1193f012e29Smrg#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 1203f012e29Smrg 1213f012e29Smrg#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 1223f012e29Smrg (((op) & 0xFF) << 8) | \ 1233f012e29Smrg ((n) & 0x3FFF) << 16) 1245324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) 1253f012e29Smrg 1263f012e29Smrg/* Packet 3 types */ 1273f012e29Smrg#define PACKET3_NOP 0x10 1283f012e29Smrg 1293f012e29Smrg#define PACKET3_WRITE_DATA 0x37 1303f012e29Smrg#define WRITE_DATA_DST_SEL(x) ((x) << 8) 1313f012e29Smrg /* 0 - register 1323f012e29Smrg * 1 - memory (sync - via GRBM) 1333f012e29Smrg * 2 - gl2 1343f012e29Smrg * 3 - gds 1353f012e29Smrg * 4 - reserved 1363f012e29Smrg * 5 - memory (async - direct) 1373f012e29Smrg */ 1383f012e29Smrg#define WR_ONE_ADDR (1 << 16) 1393f012e29Smrg#define WR_CONFIRM (1 << 20) 1403f012e29Smrg#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 1413f012e29Smrg /* 0 - LRU 1423f012e29Smrg * 1 - Stream 1433f012e29Smrg */ 1443f012e29Smrg#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 1453f012e29Smrg /* 0 - me 1463f012e29Smrg * 1 - pfp 1473f012e29Smrg * 2 - ce 1483f012e29Smrg */ 1493f012e29Smrg 1503f012e29Smrg#define PACKET3_DMA_DATA 0x50 1513f012e29Smrg/* 1. header 1523f012e29Smrg * 2. CONTROL 1533f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0] 1543f012e29Smrg * 4. SRC_ADDR_HI [31:0] 1553f012e29Smrg * 5. DST_ADDR_LO [31:0] 1563f012e29Smrg * 6. DST_ADDR_HI [7:0] 1573f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 1583f012e29Smrg */ 1593f012e29Smrg/* CONTROL */ 1603f012e29Smrg# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 1613f012e29Smrg /* 0 - ME 1623f012e29Smrg * 1 - PFP 1633f012e29Smrg */ 1643f012e29Smrg# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 1653f012e29Smrg /* 0 - LRU 1663f012e29Smrg * 1 - Stream 1673f012e29Smrg * 2 - Bypass 1683f012e29Smrg */ 1693f012e29Smrg# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 1703f012e29Smrg# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 1713f012e29Smrg /* 0 - DST_ADDR using DAS 1723f012e29Smrg * 1 - GDS 1733f012e29Smrg * 3 - DST_ADDR using L2 1743f012e29Smrg */ 1753f012e29Smrg# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 1763f012e29Smrg /* 0 - LRU 1773f012e29Smrg * 1 - Stream 1783f012e29Smrg * 2 - Bypass 1793f012e29Smrg */ 1803f012e29Smrg# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 1813f012e29Smrg# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 1823f012e29Smrg /* 0 - SRC_ADDR using SAS 1833f012e29Smrg * 1 - GDS 1843f012e29Smrg * 2 - DATA 1853f012e29Smrg * 3 - SRC_ADDR using L2 1863f012e29Smrg */ 1873f012e29Smrg# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 1883f012e29Smrg/* COMMAND */ 1893f012e29Smrg# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 1903f012e29Smrg# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 1913f012e29Smrg /* 0 - none 1923f012e29Smrg * 1 - 8 in 16 1933f012e29Smrg * 2 - 8 in 32 1943f012e29Smrg * 3 - 8 in 64 1953f012e29Smrg */ 1963f012e29Smrg# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 1973f012e29Smrg /* 0 - none 1983f012e29Smrg * 1 - 8 in 16 1993f012e29Smrg * 2 - 8 in 32 2003f012e29Smrg * 3 - 8 in 64 2013f012e29Smrg */ 2023f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 2033f012e29Smrg /* 0 - memory 2043f012e29Smrg * 1 - register 2053f012e29Smrg */ 2063f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 2073f012e29Smrg /* 0 - memory 2083f012e29Smrg * 1 - register 2093f012e29Smrg */ 2103f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 2113f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 2123f012e29Smrg# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 2133f012e29Smrg 214d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 215d8807b2fSmrg (((b) & 0x1) << 26) | \ 216d8807b2fSmrg (((t) & 0x1) << 23) | \ 217d8807b2fSmrg (((s) & 0x1) << 22) | \ 218d8807b2fSmrg (((cnt) & 0xFFFFF) << 0)) 219d8807b2fSmrg#define SDMA_OPCODE_COPY_SI 3 220d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI 13 221d8807b2fSmrg#define SDMA_NOP_SI 0xf 222d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000 223d8807b2fSmrg#define PACKET3_DMA_DATA_SI 0x41 224d8807b2fSmrg# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 225d8807b2fSmrg /* 0 - ME 226d8807b2fSmrg * 1 - PFP 227d8807b2fSmrg */ 228d8807b2fSmrg# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 229d8807b2fSmrg /* 0 - DST_ADDR using DAS 230d8807b2fSmrg * 1 - GDS 231d8807b2fSmrg * 3 - DST_ADDR using L2 232d8807b2fSmrg */ 233d8807b2fSmrg# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 234d8807b2fSmrg /* 0 - SRC_ADDR using SAS 235d8807b2fSmrg * 1 - GDS 236d8807b2fSmrg * 2 - DATA 237d8807b2fSmrg * 3 - SRC_ADDR using L2 238d8807b2fSmrg */ 239d8807b2fSmrg# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 240d8807b2fSmrg 24100a23bdaSmrg 24200a23bdaSmrg#define PKT3_CONTEXT_CONTROL 0x28 24300a23bdaSmrg#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 24400a23bdaSmrg#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 24500a23bdaSmrg#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 24600a23bdaSmrg 24700a23bdaSmrg#define PKT3_CLEAR_STATE 0x12 24800a23bdaSmrg 24900a23bdaSmrg#define PKT3_SET_SH_REG 0x76 25000a23bdaSmrg#define PACKET3_SET_SH_REG_START 0x00002c00 25100a23bdaSmrg 25200a23bdaSmrg#define PACKET3_DISPATCH_DIRECT 0x15 2535324fb0dSmrg#define PACKET3_EVENT_WRITE 0x46 2545324fb0dSmrg#define PACKET3_ACQUIRE_MEM 0x58 2555324fb0dSmrg#define PACKET3_SET_CONTEXT_REG 0x69 2565324fb0dSmrg#define PACKET3_SET_UCONFIG_REG 0x79 2575324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO 0x2D 25800a23bdaSmrg/* gfx 8 */ 25900a23bdaSmrg#define mmCOMPUTE_PGM_LO 0x2e0c 26000a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1 0x2e12 26100a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE 0x2e18 26200a23bdaSmrg#define mmCOMPUTE_USER_DATA_0 0x2e40 26300a23bdaSmrg#define mmCOMPUTE_USER_DATA_1 0x2e41 26400a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 26500a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X 0x2e07 26600a23bdaSmrg 26700a23bdaSmrg 26800a23bdaSmrg 26900a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 27000a23bdaSmrg ((num & 0x0000ff00) << 8) | \ 27100a23bdaSmrg ((num & 0x00ff0000) >> 8) | \ 27200a23bdaSmrg ((num & 0x000000ff) << 24)) 27300a23bdaSmrg 27400a23bdaSmrg 27500a23bdaSmrg/* Shader code 27600a23bdaSmrg * void main() 27700a23bdaSmrg{ 27800a23bdaSmrg 27900a23bdaSmrg float x = some_input; 28000a23bdaSmrg for (unsigned i = 0; i < 1000000; i++) 28100a23bdaSmrg x = sin(x); 28200a23bdaSmrg 28300a23bdaSmrg u[0] = 42u; 28400a23bdaSmrg} 28500a23bdaSmrg*/ 28600a23bdaSmrg 28700a23bdaSmrgstatic uint32_t shader_bin[] = { 28800a23bdaSmrg SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 28900a23bdaSmrg SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 29000a23bdaSmrg SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 29100a23bdaSmrg SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 29200a23bdaSmrg}; 29300a23bdaSmrg 29400a23bdaSmrg#define CODE_OFFSET 512 29500a23bdaSmrg#define DATA_OFFSET 1024 29600a23bdaSmrg 2975324fb0dSmrgenum cs_type { 2985324fb0dSmrg CS_BUFFERCLEAR, 2995324fb0dSmrg CS_BUFFERCOPY 3005324fb0dSmrg}; 3015324fb0dSmrg 3025324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = { 3035324fb0dSmrg 0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205, 3045324fb0dSmrg 0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100, 3055324fb0dSmrg 0xBF810000 3065324fb0dSmrg}; 3075324fb0dSmrg 3085324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { 3095324fb0dSmrg {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, 3105324fb0dSmrg {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, 3115324fb0dSmrg {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, 3125324fb0dSmrg {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, 3135324fb0dSmrg {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } 3145324fb0dSmrg}; 3155324fb0dSmrg 3165324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5; 3175324fb0dSmrg 3185324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = { 3195324fb0dSmrg 0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100, 3205324fb0dSmrg 0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000 3215324fb0dSmrg}; 3225324fb0dSmrg 3235324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = { 3245324fb0dSmrg 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 3255324fb0dSmrg 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 3265324fb0dSmrg 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 3275324fb0dSmrg 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 3285324fb0dSmrg 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 3295324fb0dSmrg 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 3305324fb0dSmrg 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 3315324fb0dSmrg 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 3325324fb0dSmrg 0xc0026900, 0x311, 0x3, 0x0, 0xc0026900, 0x316, 0x1e, 0x20, 3335324fb0dSmrg 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 3345324fb0dSmrg 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, 3355324fb0dSmrg 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 3365324fb0dSmrg 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 3375324fb0dSmrg 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 3385324fb0dSmrg 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 3395324fb0dSmrg 0xc0016900, 0x314, 0x0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 3405324fb0dSmrg 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, 3415324fb0dSmrg 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 3425324fb0dSmrg 0xc0017900, 0x24b, 0x0 3435324fb0dSmrg}; 3445324fb0dSmrg 3455324fb0dSmrgenum ps_type { 3465324fb0dSmrg PS_CONST, 3475324fb0dSmrg PS_TEX 3485324fb0dSmrg}; 3495324fb0dSmrg 3505324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = { 3515324fb0dSmrg 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 3525324fb0dSmrg 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 3535324fb0dSmrg 0xC4001C0F, 0x00000100, 0xBF810000 3545324fb0dSmrg}; 3555324fb0dSmrg 3565324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; 3575324fb0dSmrg 3585324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { 3595324fb0dSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 3605324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, 3615324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, 3625324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, 3635324fb0dSmrg { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3645324fb0dSmrg { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3655324fb0dSmrg { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3665324fb0dSmrg { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3675324fb0dSmrg { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, 3685324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } 3695324fb0dSmrg } 3705324fb0dSmrg}; 3715324fb0dSmrg 3725324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { 3735324fb0dSmrg 0x00000004 3745324fb0dSmrg}; 3755324fb0dSmrg 3765324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2; 3775324fb0dSmrg 3785324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = { 3795324fb0dSmrg {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, 3805324fb0dSmrg {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 3815324fb0dSmrg}; 3825324fb0dSmrg 3835324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7; 3845324fb0dSmrg 3855324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = { 3865324fb0dSmrg {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 3875324fb0dSmrg {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, 3885324fb0dSmrg {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 3895324fb0dSmrg {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 3905324fb0dSmrg {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 3915324fb0dSmrg {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 3925324fb0dSmrg {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 3935324fb0dSmrg}; 3945324fb0dSmrg 3955324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = { 3965324fb0dSmrg 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 3975324fb0dSmrg 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, 3985324fb0dSmrg 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, 3995324fb0dSmrg 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 4005324fb0dSmrg 0x00000100, 0xBF810000 4015324fb0dSmrg}; 4025324fb0dSmrg 4035324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { 4045324fb0dSmrg 0x0000000B 4055324fb0dSmrg}; 4065324fb0dSmrg 4075324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; 4085324fb0dSmrg 4095324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { 4105324fb0dSmrg {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 4115324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, 4125324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, 4135324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, 4145324fb0dSmrg { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4155324fb0dSmrg { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4165324fb0dSmrg { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4175324fb0dSmrg { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4185324fb0dSmrg { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 4195324fb0dSmrg { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } 4205324fb0dSmrg } 4215324fb0dSmrg}; 4225324fb0dSmrg 4235324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = { 4245324fb0dSmrg {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, 4255324fb0dSmrg {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } 4265324fb0dSmrg}; 4275324fb0dSmrg 4285324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = { 4295324fb0dSmrg {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 4305324fb0dSmrg {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, 4315324fb0dSmrg {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 4325324fb0dSmrg {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 4335324fb0dSmrg {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 4345324fb0dSmrg {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 4355324fb0dSmrg {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 4365324fb0dSmrg}; 4375324fb0dSmrg 4385324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 4395324fb0dSmrg 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 4405324fb0dSmrg 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, 4415324fb0dSmrg 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, 4425324fb0dSmrg 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, 4435324fb0dSmrg 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, 4445324fb0dSmrg 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, 4455324fb0dSmrg 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, 4465324fb0dSmrg 0xC400020F, 0x05060403, 0xBF810000 4475324fb0dSmrg}; 4485324fb0dSmrg 4495324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = { 4505324fb0dSmrg 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 4515324fb0dSmrg 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 4525324fb0dSmrg 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 4535324fb0dSmrg 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12, 4545324fb0dSmrg 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 4555324fb0dSmrg 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 4565324fb0dSmrg 0xc0026900, 0x292, 0x20, 0x60201b8, 4575324fb0dSmrg 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 4585324fb0dSmrg}; 45900a23bdaSmrg 4607cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, 4617cdc0497Smrg unsigned alignment, unsigned heap, uint64_t alloc_flags, 4627cdc0497Smrg uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, 4637cdc0497Smrg uint64_t *mc_address, 4647cdc0497Smrg amdgpu_va_handle *va_handle) 4657cdc0497Smrg{ 4667cdc0497Smrg struct amdgpu_bo_alloc_request request = {}; 4677cdc0497Smrg amdgpu_bo_handle buf_handle; 4687cdc0497Smrg amdgpu_va_handle handle; 4697cdc0497Smrg uint64_t vmc_addr; 4707cdc0497Smrg int r; 4717cdc0497Smrg 4727cdc0497Smrg request.alloc_size = size; 4737cdc0497Smrg request.phys_alignment = alignment; 4747cdc0497Smrg request.preferred_heap = heap; 4757cdc0497Smrg request.flags = alloc_flags; 4767cdc0497Smrg 4777cdc0497Smrg r = amdgpu_bo_alloc(dev, &request, &buf_handle); 4787cdc0497Smrg if (r) 4797cdc0497Smrg return r; 4807cdc0497Smrg 4817cdc0497Smrg r = amdgpu_va_range_alloc(dev, 4827cdc0497Smrg amdgpu_gpu_va_range_general, 4837cdc0497Smrg size, alignment, 0, &vmc_addr, 4847cdc0497Smrg &handle, 0); 4857cdc0497Smrg if (r) 4867cdc0497Smrg goto error_va_alloc; 4877cdc0497Smrg 4887cdc0497Smrg r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr, 4897cdc0497Smrg AMDGPU_VM_PAGE_READABLE | 4907cdc0497Smrg AMDGPU_VM_PAGE_WRITEABLE | 4917cdc0497Smrg AMDGPU_VM_PAGE_EXECUTABLE | 4927cdc0497Smrg mapping_flags, 4937cdc0497Smrg AMDGPU_VA_OP_MAP); 4947cdc0497Smrg if (r) 4957cdc0497Smrg goto error_va_map; 4967cdc0497Smrg 4977cdc0497Smrg r = amdgpu_bo_cpu_map(buf_handle, cpu); 4987cdc0497Smrg if (r) 4997cdc0497Smrg goto error_cpu_map; 5007cdc0497Smrg 5017cdc0497Smrg *bo = buf_handle; 5027cdc0497Smrg *mc_address = vmc_addr; 5037cdc0497Smrg *va_handle = handle; 5047cdc0497Smrg 5057cdc0497Smrg return 0; 5067cdc0497Smrg 5077cdc0497Smrg error_cpu_map: 5087cdc0497Smrg amdgpu_bo_cpu_unmap(buf_handle); 5097cdc0497Smrg 5107cdc0497Smrg error_va_map: 5117cdc0497Smrg amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 5127cdc0497Smrg 5137cdc0497Smrg error_va_alloc: 5147cdc0497Smrg amdgpu_bo_free(buf_handle); 5157cdc0497Smrg return r; 5167cdc0497Smrg} 5177cdc0497Smrg 5187cdc0497Smrg 5197cdc0497Smrg 5203f012e29Smrgint suite_basic_tests_init(void) 5213f012e29Smrg{ 522d8807b2fSmrg struct amdgpu_gpu_info gpu_info = {0}; 5233f012e29Smrg int r; 5243f012e29Smrg 5253f012e29Smrg r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 5263f012e29Smrg &minor_version, &device_handle); 5273f012e29Smrg 528d8807b2fSmrg if (r) { 529037b3c26Smrg if ((r == -EACCES) && (errno == EACCES)) 530037b3c26Smrg printf("\n\nError:%s. " 531037b3c26Smrg "Hint:Try to run this test program as root.", 532037b3c26Smrg strerror(errno)); 5333f012e29Smrg return CUE_SINIT_FAILED; 534037b3c26Smrg } 535d8807b2fSmrg 536d8807b2fSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 537d8807b2fSmrg if (r) 538d8807b2fSmrg return CUE_SINIT_FAILED; 539d8807b2fSmrg 540d8807b2fSmrg family_id = gpu_info.family_id; 541d8807b2fSmrg 542d8807b2fSmrg return CUE_SUCCESS; 5433f012e29Smrg} 5443f012e29Smrg 5453f012e29Smrgint suite_basic_tests_clean(void) 5463f012e29Smrg{ 5473f012e29Smrg int r = amdgpu_device_deinitialize(device_handle); 5483f012e29Smrg 5493f012e29Smrg if (r == 0) 5503f012e29Smrg return CUE_SUCCESS; 5513f012e29Smrg else 5523f012e29Smrg return CUE_SCLEAN_FAILED; 5533f012e29Smrg} 5543f012e29Smrg 5553f012e29Smrgstatic void amdgpu_query_info_test(void) 5563f012e29Smrg{ 5573f012e29Smrg struct amdgpu_gpu_info gpu_info = {0}; 5583f012e29Smrg uint32_t version, feature; 5593f012e29Smrg int r; 5603f012e29Smrg 5613f012e29Smrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 5623f012e29Smrg CU_ASSERT_EQUAL(r, 0); 5633f012e29Smrg 5643f012e29Smrg r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 5653f012e29Smrg 0, &version, &feature); 5663f012e29Smrg CU_ASSERT_EQUAL(r, 0); 5673f012e29Smrg} 5683f012e29Smrg 5693f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void) 5703f012e29Smrg{ 5713f012e29Smrg amdgpu_context_handle context_handle; 5723f012e29Smrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 5733f012e29Smrg void *ib_result_cpu, *ib_result_ce_cpu; 5743f012e29Smrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 5753f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 5763f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 5773f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 5783f012e29Smrg uint32_t *ptr; 5793f012e29Smrg uint32_t expired; 5803f012e29Smrg amdgpu_bo_list_handle bo_list; 5813f012e29Smrg amdgpu_va_handle va_handle, va_handle_ce; 582d8807b2fSmrg int r, i = 0; 5833f012e29Smrg 5843f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 5853f012e29Smrg CU_ASSERT_EQUAL(r, 0); 5863f012e29Smrg 5873f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 5883f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 5893f012e29Smrg &ib_result_handle, &ib_result_cpu, 5903f012e29Smrg &ib_result_mc_address, &va_handle); 5913f012e29Smrg CU_ASSERT_EQUAL(r, 0); 5923f012e29Smrg 5933f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 5943f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 5953f012e29Smrg &ib_result_ce_handle, &ib_result_ce_cpu, 5963f012e29Smrg &ib_result_ce_mc_address, &va_handle_ce); 5973f012e29Smrg CU_ASSERT_EQUAL(r, 0); 5983f012e29Smrg 5993f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 6003f012e29Smrg ib_result_ce_handle, &bo_list); 6013f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6023f012e29Smrg 6033f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 6043f012e29Smrg 6053f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 6063f012e29Smrg ptr = ib_result_ce_cpu; 607d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 608d8807b2fSmrg ptr[i++] = 0xc0008900; 609d8807b2fSmrg ptr[i++] = 0; 610d8807b2fSmrg } 611d8807b2fSmrg ptr[i++] = 0xc0008400; 612d8807b2fSmrg ptr[i++] = 1; 6133f012e29Smrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 614d8807b2fSmrg ib_info[0].size = i; 6153f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 6163f012e29Smrg 6173f012e29Smrg /* IT_WAIT_ON_CE_COUNTER */ 6183f012e29Smrg ptr = ib_result_cpu; 6193f012e29Smrg ptr[0] = 0xc0008600; 6203f012e29Smrg ptr[1] = 0x00000001; 6213f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address; 6223f012e29Smrg ib_info[1].size = 2; 6233f012e29Smrg 6243f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 6253f012e29Smrg ibs_request.number_of_ibs = 2; 6263f012e29Smrg ibs_request.ibs = ib_info; 6273f012e29Smrg ibs_request.resources = bo_list; 6283f012e29Smrg ibs_request.fence_info.handle = NULL; 6293f012e29Smrg 6303f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 6313f012e29Smrg 6323f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6333f012e29Smrg 6343f012e29Smrg fence_status.context = context_handle; 6353f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 6363f012e29Smrg fence_status.ip_instance = 0; 6373f012e29Smrg fence_status.fence = ibs_request.seq_no; 6383f012e29Smrg 6393f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 6403f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 6413f012e29Smrg 0, &expired); 6423f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6433f012e29Smrg 6443f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 6453f012e29Smrg ib_result_mc_address, 4096); 6463f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6473f012e29Smrg 6483f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 6493f012e29Smrg ib_result_ce_mc_address, 4096); 6503f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6513f012e29Smrg 6523f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 6533f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6543f012e29Smrg 6553f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 6563f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6573f012e29Smrg 6583f012e29Smrg} 6593f012e29Smrg 6603f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void) 6613f012e29Smrg{ 6623f012e29Smrg amdgpu_context_handle context_handle; 6633f012e29Smrg amdgpu_bo_handle ib_result_handle; 6643f012e29Smrg void *ib_result_cpu; 6653f012e29Smrg uint64_t ib_result_mc_address; 6663f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 6673f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 6683f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 6693f012e29Smrg uint32_t *ptr; 6703f012e29Smrg uint32_t expired; 6713f012e29Smrg amdgpu_bo_list_handle bo_list; 6723f012e29Smrg amdgpu_va_handle va_handle; 673d8807b2fSmrg int r, i = 0; 6743f012e29Smrg 6753f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 6763f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6773f012e29Smrg 6783f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 6793f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 6803f012e29Smrg &ib_result_handle, &ib_result_cpu, 6813f012e29Smrg &ib_result_mc_address, &va_handle); 6823f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6833f012e29Smrg 6843f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 6853f012e29Smrg &bo_list); 6863f012e29Smrg CU_ASSERT_EQUAL(r, 0); 6873f012e29Smrg 6883f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 6893f012e29Smrg 6903f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 6913f012e29Smrg ptr = ib_result_cpu; 692d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 693d8807b2fSmrg ptr[i++] = 0xc0008900; 694d8807b2fSmrg ptr[i++] = 0; 695d8807b2fSmrg } 696d8807b2fSmrg ptr[i++] = 0xc0008400; 697d8807b2fSmrg ptr[i++] = 1; 6983f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address; 699d8807b2fSmrg ib_info[0].size = i; 7003f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 7013f012e29Smrg 7023f012e29Smrg ptr = (uint32_t *)ib_result_cpu + 4; 7033f012e29Smrg ptr[0] = 0xc0008600; 7043f012e29Smrg ptr[1] = 0x00000001; 7053f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address + 16; 7063f012e29Smrg ib_info[1].size = 2; 7073f012e29Smrg 7083f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 7093f012e29Smrg ibs_request.number_of_ibs = 2; 7103f012e29Smrg ibs_request.ibs = ib_info; 7113f012e29Smrg ibs_request.resources = bo_list; 7123f012e29Smrg ibs_request.fence_info.handle = NULL; 7133f012e29Smrg 7143f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 7153f012e29Smrg 7163f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7173f012e29Smrg 7183f012e29Smrg fence_status.context = context_handle; 7193f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 7203f012e29Smrg fence_status.ip_instance = 0; 7213f012e29Smrg fence_status.fence = ibs_request.seq_no; 7223f012e29Smrg 7233f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 7243f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 7253f012e29Smrg 0, &expired); 7263f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7273f012e29Smrg 7283f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 7293f012e29Smrg ib_result_mc_address, 4096); 7303f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7313f012e29Smrg 7323f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 7333f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7343f012e29Smrg 7353f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 7363f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7373f012e29Smrg} 7383f012e29Smrg 7393f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void) 7403f012e29Smrg{ 7413f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 7423f012e29Smrg} 7433f012e29Smrg 7443f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void) 7453f012e29Smrg{ 7463f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 7473f012e29Smrg} 7483f012e29Smrg 7493f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void) 7503f012e29Smrg{ 7513f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 7523f012e29Smrg} 7533f012e29Smrg 75400a23bdaSmrgstatic void amdgpu_bo_eviction_test(void) 75500a23bdaSmrg{ 75600a23bdaSmrg const int sdma_write_length = 1024; 75700a23bdaSmrg const int pm4_dw = 256; 75800a23bdaSmrg amdgpu_context_handle context_handle; 75900a23bdaSmrg amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 76000a23bdaSmrg amdgpu_bo_handle *resources; 76100a23bdaSmrg uint32_t *pm4; 76200a23bdaSmrg struct amdgpu_cs_ib_info *ib_info; 76300a23bdaSmrg struct amdgpu_cs_request *ibs_request; 76400a23bdaSmrg uint64_t bo1_mc, bo2_mc; 76500a23bdaSmrg volatile unsigned char *bo1_cpu, *bo2_cpu; 76600a23bdaSmrg int i, j, r, loop1, loop2; 76700a23bdaSmrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 76800a23bdaSmrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 76900a23bdaSmrg struct amdgpu_heap_info vram_info, gtt_info; 77000a23bdaSmrg 77100a23bdaSmrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 77200a23bdaSmrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 77300a23bdaSmrg 77400a23bdaSmrg ib_info = calloc(1, sizeof(*ib_info)); 77500a23bdaSmrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 77600a23bdaSmrg 77700a23bdaSmrg ibs_request = calloc(1, sizeof(*ibs_request)); 77800a23bdaSmrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 77900a23bdaSmrg 78000a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 78100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 78200a23bdaSmrg 78300a23bdaSmrg /* prepare resource */ 78400a23bdaSmrg resources = calloc(4, sizeof(amdgpu_bo_handle)); 78500a23bdaSmrg CU_ASSERT_NOT_EQUAL(resources, NULL); 78600a23bdaSmrg 78700a23bdaSmrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 78800a23bdaSmrg 0, &vram_info); 78900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 79000a23bdaSmrg 79100a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 79200a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 79300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 79400a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 79500a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 79600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 79700a23bdaSmrg 79800a23bdaSmrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 79900a23bdaSmrg 0, >t_info); 80000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 80100a23bdaSmrg 80200a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 80300a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 80400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 80500a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 80600a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 80700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 80800a23bdaSmrg 80900a23bdaSmrg 81000a23bdaSmrg 81100a23bdaSmrg loop1 = loop2 = 0; 81200a23bdaSmrg /* run 9 circle to test all mapping combination */ 81300a23bdaSmrg while(loop1 < 2) { 81400a23bdaSmrg while(loop2 < 2) { 81500a23bdaSmrg /* allocate UC bo1for sDMA use */ 81600a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 81700a23bdaSmrg sdma_write_length, 4096, 81800a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 81900a23bdaSmrg gtt_flags[loop1], &bo1, 82000a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 82100a23bdaSmrg &bo1_va_handle); 82200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 82300a23bdaSmrg 82400a23bdaSmrg /* set bo1 */ 82500a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 82600a23bdaSmrg 82700a23bdaSmrg /* allocate UC bo2 for sDMA use */ 82800a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 82900a23bdaSmrg sdma_write_length, 4096, 83000a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 83100a23bdaSmrg gtt_flags[loop2], &bo2, 83200a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 83300a23bdaSmrg &bo2_va_handle); 83400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 83500a23bdaSmrg 83600a23bdaSmrg /* clear bo2 */ 83700a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 83800a23bdaSmrg 83900a23bdaSmrg resources[0] = bo1; 84000a23bdaSmrg resources[1] = bo2; 84100a23bdaSmrg resources[2] = vram_max[loop2]; 84200a23bdaSmrg resources[3] = gtt_max[loop2]; 84300a23bdaSmrg 84400a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 84500a23bdaSmrg i = j = 0; 84600a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 84700a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 84800a23bdaSmrg sdma_write_length); 84900a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 85000a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 85100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 85200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 85300a23bdaSmrg } else { 85400a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 85500a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 85600a23bdaSmrg pm4[i++] = sdma_write_length - 1; 85700a23bdaSmrg else 85800a23bdaSmrg pm4[i++] = sdma_write_length; 85900a23bdaSmrg pm4[i++] = 0; 86000a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 86100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 86200a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 86300a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 86400a23bdaSmrg } 86500a23bdaSmrg 86600a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 86700a23bdaSmrg AMDGPU_HW_IP_DMA, 0, 86800a23bdaSmrg i, pm4, 86900a23bdaSmrg 4, resources, 87000a23bdaSmrg ib_info, ibs_request); 87100a23bdaSmrg 87200a23bdaSmrg /* verify if SDMA test result meets with expected */ 87300a23bdaSmrg i = 0; 87400a23bdaSmrg while(i < sdma_write_length) { 87500a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 87600a23bdaSmrg } 87700a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 87800a23bdaSmrg sdma_write_length); 87900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 88000a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 88100a23bdaSmrg sdma_write_length); 88200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 88300a23bdaSmrg loop2++; 88400a23bdaSmrg } 88500a23bdaSmrg loop2 = 0; 88600a23bdaSmrg loop1++; 88700a23bdaSmrg } 88800a23bdaSmrg amdgpu_bo_free(vram_max[0]); 88900a23bdaSmrg amdgpu_bo_free(vram_max[1]); 89000a23bdaSmrg amdgpu_bo_free(gtt_max[0]); 89100a23bdaSmrg amdgpu_bo_free(gtt_max[1]); 89200a23bdaSmrg /* clean resources */ 89300a23bdaSmrg free(resources); 89400a23bdaSmrg free(ibs_request); 89500a23bdaSmrg free(ib_info); 89600a23bdaSmrg free(pm4); 89700a23bdaSmrg 89800a23bdaSmrg /* end of test */ 89900a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle); 90000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 90100a23bdaSmrg} 90200a23bdaSmrg 90300a23bdaSmrg 9043f012e29Smrgstatic void amdgpu_command_submission_gfx(void) 9053f012e29Smrg{ 9063f012e29Smrg /* write data using the CP */ 9073f012e29Smrg amdgpu_command_submission_gfx_cp_write_data(); 9083f012e29Smrg /* const fill using the CP */ 9093f012e29Smrg amdgpu_command_submission_gfx_cp_const_fill(); 9103f012e29Smrg /* copy data using the CP */ 9113f012e29Smrg amdgpu_command_submission_gfx_cp_copy_data(); 9123f012e29Smrg /* separate IB buffers for multi-IB submission */ 9133f012e29Smrg amdgpu_command_submission_gfx_separate_ibs(); 9143f012e29Smrg /* shared IB buffer for multi-IB submission */ 9153f012e29Smrg amdgpu_command_submission_gfx_shared_ib(); 9163f012e29Smrg} 9173f012e29Smrg 9183f012e29Smrgstatic void amdgpu_semaphore_test(void) 9193f012e29Smrg{ 9203f012e29Smrg amdgpu_context_handle context_handle[2]; 9213f012e29Smrg amdgpu_semaphore_handle sem; 9223f012e29Smrg amdgpu_bo_handle ib_result_handle[2]; 9233f012e29Smrg void *ib_result_cpu[2]; 9243f012e29Smrg uint64_t ib_result_mc_address[2]; 9253f012e29Smrg struct amdgpu_cs_request ibs_request[2] = {0}; 9263f012e29Smrg struct amdgpu_cs_ib_info ib_info[2] = {0}; 9273f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 9283f012e29Smrg uint32_t *ptr; 9293f012e29Smrg uint32_t expired; 930d8807b2fSmrg uint32_t sdma_nop, gfx_nop; 9313f012e29Smrg amdgpu_bo_list_handle bo_list[2]; 9323f012e29Smrg amdgpu_va_handle va_handle[2]; 9333f012e29Smrg int r, i; 9343f012e29Smrg 935d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) { 936d8807b2fSmrg sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 937d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP_SI; 938d8807b2fSmrg } else { 939d8807b2fSmrg sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 940d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP; 941d8807b2fSmrg } 942d8807b2fSmrg 9433f012e29Smrg r = amdgpu_cs_create_semaphore(&sem); 9443f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9453f012e29Smrg for (i = 0; i < 2; i++) { 9463f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 9473f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9483f012e29Smrg 9493f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 9503f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 9513f012e29Smrg &ib_result_handle[i], &ib_result_cpu[i], 9523f012e29Smrg &ib_result_mc_address[i], &va_handle[i]); 9533f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9543f012e29Smrg 9553f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 9563f012e29Smrg NULL, &bo_list[i]); 9573f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9583f012e29Smrg } 9593f012e29Smrg 9603f012e29Smrg /* 1. same context different engine */ 9613f012e29Smrg ptr = ib_result_cpu[0]; 962d8807b2fSmrg ptr[0] = sdma_nop; 9633f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 9643f012e29Smrg ib_info[0].size = 1; 9653f012e29Smrg 9663f012e29Smrg ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 9673f012e29Smrg ibs_request[0].number_of_ibs = 1; 9683f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 9693f012e29Smrg ibs_request[0].resources = bo_list[0]; 9703f012e29Smrg ibs_request[0].fence_info.handle = NULL; 9713f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 9723f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9733f012e29Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 9743f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9753f012e29Smrg 9763f012e29Smrg r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 9773f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9783f012e29Smrg ptr = ib_result_cpu[1]; 979d8807b2fSmrg ptr[0] = gfx_nop; 9803f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 9813f012e29Smrg ib_info[1].size = 1; 9823f012e29Smrg 9833f012e29Smrg ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 9843f012e29Smrg ibs_request[1].number_of_ibs = 1; 9853f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 9863f012e29Smrg ibs_request[1].resources = bo_list[1]; 9873f012e29Smrg ibs_request[1].fence_info.handle = NULL; 9883f012e29Smrg 9893f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 9903f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9913f012e29Smrg 9923f012e29Smrg fence_status.context = context_handle[0]; 9933f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 9943f012e29Smrg fence_status.ip_instance = 0; 9953f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 9963f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 9973f012e29Smrg 500000000, 0, &expired); 9983f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9993f012e29Smrg CU_ASSERT_EQUAL(expired, true); 10003f012e29Smrg 10013f012e29Smrg /* 2. same engine different context */ 10023f012e29Smrg ptr = ib_result_cpu[0]; 1003d8807b2fSmrg ptr[0] = gfx_nop; 10043f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 10053f012e29Smrg ib_info[0].size = 1; 10063f012e29Smrg 10073f012e29Smrg ibs_request[0].ip_type = AMDGPU_HW_IP_GFX; 10083f012e29Smrg ibs_request[0].number_of_ibs = 1; 10093f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 10103f012e29Smrg ibs_request[0].resources = bo_list[0]; 10113f012e29Smrg ibs_request[0].fence_info.handle = NULL; 10123f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 10133f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10143f012e29Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 10153f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10163f012e29Smrg 10173f012e29Smrg r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem); 10183f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10193f012e29Smrg ptr = ib_result_cpu[1]; 1020d8807b2fSmrg ptr[0] = gfx_nop; 10213f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 10223f012e29Smrg ib_info[1].size = 1; 10233f012e29Smrg 10243f012e29Smrg ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 10253f012e29Smrg ibs_request[1].number_of_ibs = 1; 10263f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 10273f012e29Smrg ibs_request[1].resources = bo_list[1]; 10283f012e29Smrg ibs_request[1].fence_info.handle = NULL; 10293f012e29Smrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 10303f012e29Smrg 10313f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10323f012e29Smrg 10333f012e29Smrg fence_status.context = context_handle[1]; 10343f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 10353f012e29Smrg fence_status.ip_instance = 0; 10363f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 10373f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 10383f012e29Smrg 500000000, 0, &expired); 10393f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10403f012e29Smrg CU_ASSERT_EQUAL(expired, true); 1041d8807b2fSmrg 10423f012e29Smrg for (i = 0; i < 2; i++) { 10433f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 10443f012e29Smrg ib_result_mc_address[i], 4096); 10453f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10463f012e29Smrg 10473f012e29Smrg r = amdgpu_bo_list_destroy(bo_list[i]); 10483f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10493f012e29Smrg 10503f012e29Smrg r = amdgpu_cs_ctx_free(context_handle[i]); 10513f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10523f012e29Smrg } 10533f012e29Smrg 10543f012e29Smrg r = amdgpu_cs_destroy_semaphore(sem); 10553f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10563f012e29Smrg} 10573f012e29Smrg 10583f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void) 10593f012e29Smrg{ 10603f012e29Smrg amdgpu_context_handle context_handle; 10613f012e29Smrg amdgpu_bo_handle ib_result_handle; 10623f012e29Smrg void *ib_result_cpu; 10633f012e29Smrg uint64_t ib_result_mc_address; 10643f012e29Smrg struct amdgpu_cs_request ibs_request; 10653f012e29Smrg struct amdgpu_cs_ib_info ib_info; 10663f012e29Smrg struct amdgpu_cs_fence fence_status; 10673f012e29Smrg uint32_t *ptr; 10683f012e29Smrg uint32_t expired; 106900a23bdaSmrg int r, instance; 10703f012e29Smrg amdgpu_bo_list_handle bo_list; 10713f012e29Smrg amdgpu_va_handle va_handle; 1072d8807b2fSmrg struct drm_amdgpu_info_hw_ip info; 1073d8807b2fSmrg 1074d8807b2fSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 1075d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 10763f012e29Smrg 10773f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 10783f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10793f012e29Smrg 1080d8807b2fSmrg for (instance = 0; (1 << instance) & info.available_rings; instance++) { 10813f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 10823f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 10833f012e29Smrg &ib_result_handle, &ib_result_cpu, 10843f012e29Smrg &ib_result_mc_address, &va_handle); 10853f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10863f012e29Smrg 10873f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 10883f012e29Smrg &bo_list); 10893f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10903f012e29Smrg 10913f012e29Smrg ptr = ib_result_cpu; 1092d8807b2fSmrg memset(ptr, 0, 16); 1093d8807b2fSmrg ptr[0]=PACKET3(PACKET3_NOP, 14); 10943f012e29Smrg 10953f012e29Smrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 10963f012e29Smrg ib_info.ib_mc_address = ib_result_mc_address; 10973f012e29Smrg ib_info.size = 16; 10983f012e29Smrg 10993f012e29Smrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 11003f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 11013f012e29Smrg ibs_request.ring = instance; 11023f012e29Smrg ibs_request.number_of_ibs = 1; 11033f012e29Smrg ibs_request.ibs = &ib_info; 11043f012e29Smrg ibs_request.resources = bo_list; 11053f012e29Smrg ibs_request.fence_info.handle = NULL; 11063f012e29Smrg 11073f012e29Smrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 11083f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 11093f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11103f012e29Smrg 11113f012e29Smrg fence_status.context = context_handle; 11123f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 11133f012e29Smrg fence_status.ip_instance = 0; 11143f012e29Smrg fence_status.ring = instance; 11153f012e29Smrg fence_status.fence = ibs_request.seq_no; 11163f012e29Smrg 11173f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 11183f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 11193f012e29Smrg 0, &expired); 11203f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11213f012e29Smrg 11223f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 11233f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11243f012e29Smrg 11253f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 11263f012e29Smrg ib_result_mc_address, 4096); 11273f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11283f012e29Smrg } 11293f012e29Smrg 11303f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 11313f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11323f012e29Smrg} 11333f012e29Smrg 11343f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void) 11353f012e29Smrg{ 11363f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 11373f012e29Smrg} 11383f012e29Smrg 11393f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void) 11403f012e29Smrg{ 11413f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 11423f012e29Smrg} 11433f012e29Smrg 11443f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void) 11453f012e29Smrg{ 11463f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 11473f012e29Smrg} 11483f012e29Smrg 11493f012e29Smrgstatic void amdgpu_command_submission_compute(void) 11503f012e29Smrg{ 11513f012e29Smrg /* write data using the CP */ 11523f012e29Smrg amdgpu_command_submission_compute_cp_write_data(); 11533f012e29Smrg /* const fill using the CP */ 11543f012e29Smrg amdgpu_command_submission_compute_cp_const_fill(); 11553f012e29Smrg /* copy data using the CP */ 11563f012e29Smrg amdgpu_command_submission_compute_cp_copy_data(); 11573f012e29Smrg /* nop test */ 11583f012e29Smrg amdgpu_command_submission_compute_nop(); 11593f012e29Smrg} 11603f012e29Smrg 11613f012e29Smrg/* 11623f012e29Smrg * caller need create/release: 11633f012e29Smrg * pm4_src, resources, ib_info, and ibs_request 11643f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished 11653f012e29Smrg */ 11663f012e29Smrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 11673f012e29Smrg unsigned ip_type, 11683f012e29Smrg int instance, int pm4_dw, uint32_t *pm4_src, 11693f012e29Smrg int res_cnt, amdgpu_bo_handle *resources, 11703f012e29Smrg struct amdgpu_cs_ib_info *ib_info, 11713f012e29Smrg struct amdgpu_cs_request *ibs_request) 11723f012e29Smrg{ 11733f012e29Smrg int r; 11743f012e29Smrg uint32_t expired; 11753f012e29Smrg uint32_t *ring_ptr; 11763f012e29Smrg amdgpu_bo_handle ib_result_handle; 11773f012e29Smrg void *ib_result_cpu; 11783f012e29Smrg uint64_t ib_result_mc_address; 11793f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 11803f012e29Smrg amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 11813f012e29Smrg amdgpu_va_handle va_handle; 11823f012e29Smrg 11833f012e29Smrg /* prepare CS */ 11843f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 11853f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 11863f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 11873f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 11883f012e29Smrg CU_ASSERT_TRUE(pm4_dw <= 1024); 11893f012e29Smrg 11903f012e29Smrg /* allocate IB */ 11913f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 11923f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 11933f012e29Smrg &ib_result_handle, &ib_result_cpu, 11943f012e29Smrg &ib_result_mc_address, &va_handle); 11953f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11963f012e29Smrg 11973f012e29Smrg /* copy PM4 packet to ring from caller */ 11983f012e29Smrg ring_ptr = ib_result_cpu; 11993f012e29Smrg memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 12003f012e29Smrg 12013f012e29Smrg ib_info->ib_mc_address = ib_result_mc_address; 12023f012e29Smrg ib_info->size = pm4_dw; 12033f012e29Smrg 12043f012e29Smrg ibs_request->ip_type = ip_type; 12053f012e29Smrg ibs_request->ring = instance; 12063f012e29Smrg ibs_request->number_of_ibs = 1; 12073f012e29Smrg ibs_request->ibs = ib_info; 12083f012e29Smrg ibs_request->fence_info.handle = NULL; 12093f012e29Smrg 12103f012e29Smrg memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 12113f012e29Smrg all_res[res_cnt] = ib_result_handle; 12123f012e29Smrg 12133f012e29Smrg r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 12143f012e29Smrg NULL, &ibs_request->resources); 12153f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12163f012e29Smrg 12173f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 12183f012e29Smrg 12193f012e29Smrg /* submit CS */ 12203f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 12213f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12223f012e29Smrg 12233f012e29Smrg r = amdgpu_bo_list_destroy(ibs_request->resources); 12243f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12253f012e29Smrg 12263f012e29Smrg fence_status.ip_type = ip_type; 12273f012e29Smrg fence_status.ip_instance = 0; 12283f012e29Smrg fence_status.ring = ibs_request->ring; 12293f012e29Smrg fence_status.context = context_handle; 12303f012e29Smrg fence_status.fence = ibs_request->seq_no; 12313f012e29Smrg 12323f012e29Smrg /* wait for IB accomplished */ 12333f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 12343f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 12353f012e29Smrg 0, &expired); 12363f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12373f012e29Smrg CU_ASSERT_EQUAL(expired, true); 12383f012e29Smrg 12393f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 12403f012e29Smrg ib_result_mc_address, 4096); 12413f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12423f012e29Smrg} 12433f012e29Smrg 12443f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 12453f012e29Smrg{ 12463f012e29Smrg const int sdma_write_length = 128; 12473f012e29Smrg const int pm4_dw = 256; 12483f012e29Smrg amdgpu_context_handle context_handle; 12493f012e29Smrg amdgpu_bo_handle bo; 12503f012e29Smrg amdgpu_bo_handle *resources; 12513f012e29Smrg uint32_t *pm4; 12523f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 12533f012e29Smrg struct amdgpu_cs_request *ibs_request; 12543f012e29Smrg uint64_t bo_mc; 12553f012e29Smrg volatile uint32_t *bo_cpu; 125600a23bdaSmrg int i, j, r, loop, ring_id; 12573f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 12583f012e29Smrg amdgpu_va_handle va_handle; 125900a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 12603f012e29Smrg 12613f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 12623f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 12633f012e29Smrg 12643f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 12653f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 12663f012e29Smrg 12673f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 12683f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 12693f012e29Smrg 127000a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 127100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 127200a23bdaSmrg 12733f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 12743f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12753f012e29Smrg 12763f012e29Smrg /* prepare resource */ 12773f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 12783f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 12793f012e29Smrg 128000a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 128100a23bdaSmrg loop = 0; 128200a23bdaSmrg while(loop < 2) { 128300a23bdaSmrg /* allocate UC bo for sDMA use */ 128400a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 128500a23bdaSmrg sdma_write_length * sizeof(uint32_t), 128600a23bdaSmrg 4096, AMDGPU_GEM_DOMAIN_GTT, 128700a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 128800a23bdaSmrg &bo_mc, &va_handle); 128900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 12903f012e29Smrg 129100a23bdaSmrg /* clear bo */ 129200a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 12933f012e29Smrg 129400a23bdaSmrg resources[0] = bo; 12953f012e29Smrg 129600a23bdaSmrg /* fulfill PM4: test DMA write-linear */ 129700a23bdaSmrg i = j = 0; 129800a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 129900a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) 130000a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 130100a23bdaSmrg sdma_write_length); 130200a23bdaSmrg else 130300a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 130400a23bdaSmrg SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 130500a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 130600a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 130700a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 130800a23bdaSmrg pm4[i++] = sdma_write_length - 1; 130900a23bdaSmrg else if (family_id != AMDGPU_FAMILY_SI) 131000a23bdaSmrg pm4[i++] = sdma_write_length; 131100a23bdaSmrg while(j++ < sdma_write_length) 131200a23bdaSmrg pm4[i++] = 0xdeadbeaf; 131300a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 131400a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 131500a23bdaSmrg pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 131600a23bdaSmrg pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 131700a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 131800a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 131900a23bdaSmrg while(j++ < sdma_write_length) 132000a23bdaSmrg pm4[i++] = 0xdeadbeaf; 132100a23bdaSmrg } 13223f012e29Smrg 132300a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 132400a23bdaSmrg ip_type, ring_id, 132500a23bdaSmrg i, pm4, 132600a23bdaSmrg 1, resources, 132700a23bdaSmrg ib_info, ibs_request); 13283f012e29Smrg 132900a23bdaSmrg /* verify if SDMA test result meets with expected */ 133000a23bdaSmrg i = 0; 133100a23bdaSmrg while(i < sdma_write_length) { 133200a23bdaSmrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 133300a23bdaSmrg } 13343f012e29Smrg 133500a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 133600a23bdaSmrg sdma_write_length * sizeof(uint32_t)); 133700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 133800a23bdaSmrg loop++; 13393f012e29Smrg } 13403f012e29Smrg } 13413f012e29Smrg /* clean resources */ 13423f012e29Smrg free(resources); 13433f012e29Smrg free(ibs_request); 13443f012e29Smrg free(ib_info); 13453f012e29Smrg free(pm4); 13463f012e29Smrg 13473f012e29Smrg /* end of test */ 13483f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 13493f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13503f012e29Smrg} 13513f012e29Smrg 13523f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void) 13533f012e29Smrg{ 13543f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 13553f012e29Smrg} 13563f012e29Smrg 13573f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 13583f012e29Smrg{ 13593f012e29Smrg const int sdma_write_length = 1024 * 1024; 13603f012e29Smrg const int pm4_dw = 256; 13613f012e29Smrg amdgpu_context_handle context_handle; 13623f012e29Smrg amdgpu_bo_handle bo; 13633f012e29Smrg amdgpu_bo_handle *resources; 13643f012e29Smrg uint32_t *pm4; 13653f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 13663f012e29Smrg struct amdgpu_cs_request *ibs_request; 13673f012e29Smrg uint64_t bo_mc; 13683f012e29Smrg volatile uint32_t *bo_cpu; 136900a23bdaSmrg int i, j, r, loop, ring_id; 13703f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 13713f012e29Smrg amdgpu_va_handle va_handle; 137200a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 13733f012e29Smrg 13743f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 13753f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 13763f012e29Smrg 13773f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 13783f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 13793f012e29Smrg 13803f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 13813f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 13823f012e29Smrg 138300a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 138400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 138500a23bdaSmrg 13863f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 13873f012e29Smrg CU_ASSERT_EQUAL(r, 0); 13883f012e29Smrg 13893f012e29Smrg /* prepare resource */ 13903f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 13913f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 13923f012e29Smrg 139300a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 139400a23bdaSmrg loop = 0; 139500a23bdaSmrg while(loop < 2) { 139600a23bdaSmrg /* allocate UC bo for sDMA use */ 139700a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 139800a23bdaSmrg sdma_write_length, 4096, 139900a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 140000a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 140100a23bdaSmrg &bo_mc, &va_handle); 140200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 14033f012e29Smrg 140400a23bdaSmrg /* clear bo */ 140500a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length); 14063f012e29Smrg 140700a23bdaSmrg resources[0] = bo; 14083f012e29Smrg 140900a23bdaSmrg /* fulfill PM4: test DMA const fill */ 141000a23bdaSmrg i = j = 0; 141100a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 141200a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 141300a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 141400a23bdaSmrg 0, 0, 0, 141500a23bdaSmrg sdma_write_length / 4); 141600a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 141700a23bdaSmrg pm4[i++] = 0xdeadbeaf; 141800a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 141900a23bdaSmrg } else { 142000a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 142100a23bdaSmrg SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 142200a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 142300a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 142400a23bdaSmrg pm4[i++] = 0xdeadbeaf; 142500a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 142600a23bdaSmrg pm4[i++] = sdma_write_length - 1; 142700a23bdaSmrg else 142800a23bdaSmrg pm4[i++] = sdma_write_length; 142900a23bdaSmrg } 143000a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 143100a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 143200a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 143300a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 143400a23bdaSmrg pm4[i++] = 0xdeadbeaf; 143500a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 143600a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 143700a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(2) | 143800a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC; 143900a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 144000a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1441d8807b2fSmrg pm4[i++] = sdma_write_length; 144200a23bdaSmrg } else { 144300a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 144400a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 144500a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 144600a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(2) | 144700a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 144800a23bdaSmrg pm4[i++] = 0xdeadbeaf; 144900a23bdaSmrg pm4[i++] = 0; 145000a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 145100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 145200a23bdaSmrg pm4[i++] = sdma_write_length; 145300a23bdaSmrg } 1454d8807b2fSmrg } 14553f012e29Smrg 145600a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 145700a23bdaSmrg ip_type, ring_id, 145800a23bdaSmrg i, pm4, 145900a23bdaSmrg 1, resources, 146000a23bdaSmrg ib_info, ibs_request); 14613f012e29Smrg 146200a23bdaSmrg /* verify if SDMA test result meets with expected */ 146300a23bdaSmrg i = 0; 146400a23bdaSmrg while(i < (sdma_write_length / 4)) { 146500a23bdaSmrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 146600a23bdaSmrg } 14673f012e29Smrg 146800a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 146900a23bdaSmrg sdma_write_length); 147000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 147100a23bdaSmrg loop++; 147200a23bdaSmrg } 14733f012e29Smrg } 14743f012e29Smrg /* clean resources */ 14753f012e29Smrg free(resources); 14763f012e29Smrg free(ibs_request); 14773f012e29Smrg free(ib_info); 14783f012e29Smrg free(pm4); 14793f012e29Smrg 14803f012e29Smrg /* end of test */ 14813f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 14823f012e29Smrg CU_ASSERT_EQUAL(r, 0); 14833f012e29Smrg} 14843f012e29Smrg 14853f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void) 14863f012e29Smrg{ 14873f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 14883f012e29Smrg} 14893f012e29Smrg 14903f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 14913f012e29Smrg{ 14923f012e29Smrg const int sdma_write_length = 1024; 14933f012e29Smrg const int pm4_dw = 256; 14943f012e29Smrg amdgpu_context_handle context_handle; 14953f012e29Smrg amdgpu_bo_handle bo1, bo2; 14963f012e29Smrg amdgpu_bo_handle *resources; 14973f012e29Smrg uint32_t *pm4; 14983f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 14993f012e29Smrg struct amdgpu_cs_request *ibs_request; 15003f012e29Smrg uint64_t bo1_mc, bo2_mc; 15013f012e29Smrg volatile unsigned char *bo1_cpu, *bo2_cpu; 150200a23bdaSmrg int i, j, r, loop1, loop2, ring_id; 15033f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 15043f012e29Smrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 150500a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 15063f012e29Smrg 15073f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 15083f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 15093f012e29Smrg 15103f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 15113f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 15123f012e29Smrg 15133f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 15143f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 15153f012e29Smrg 151600a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 151700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 151800a23bdaSmrg 15193f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 15203f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15213f012e29Smrg 15223f012e29Smrg /* prepare resource */ 15233f012e29Smrg resources = calloc(2, sizeof(amdgpu_bo_handle)); 15243f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 15253f012e29Smrg 152600a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 152700a23bdaSmrg loop1 = loop2 = 0; 152800a23bdaSmrg /* run 9 circle to test all mapping combination */ 152900a23bdaSmrg while(loop1 < 2) { 153000a23bdaSmrg while(loop2 < 2) { 153100a23bdaSmrg /* allocate UC bo1for sDMA use */ 153200a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 153300a23bdaSmrg sdma_write_length, 4096, 153400a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 153500a23bdaSmrg gtt_flags[loop1], &bo1, 153600a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 153700a23bdaSmrg &bo1_va_handle); 153800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 153900a23bdaSmrg 154000a23bdaSmrg /* set bo1 */ 154100a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 154200a23bdaSmrg 154300a23bdaSmrg /* allocate UC bo2 for sDMA use */ 154400a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 154500a23bdaSmrg sdma_write_length, 4096, 154600a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 154700a23bdaSmrg gtt_flags[loop2], &bo2, 154800a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 154900a23bdaSmrg &bo2_va_handle); 155000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 155100a23bdaSmrg 155200a23bdaSmrg /* clear bo2 */ 155300a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 155400a23bdaSmrg 155500a23bdaSmrg resources[0] = bo1; 155600a23bdaSmrg resources[1] = bo2; 155700a23bdaSmrg 155800a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 155900a23bdaSmrg i = j = 0; 156000a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 156100a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 156200a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 156300a23bdaSmrg 0, 0, 0, 156400a23bdaSmrg sdma_write_length); 156500a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 156600a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 156700a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 156800a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 156900a23bdaSmrg } else { 157000a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 157100a23bdaSmrg SDMA_COPY_SUB_OPCODE_LINEAR, 157200a23bdaSmrg 0); 157300a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 157400a23bdaSmrg pm4[i++] = sdma_write_length - 1; 157500a23bdaSmrg else 157600a23bdaSmrg pm4[i++] = sdma_write_length; 157700a23bdaSmrg pm4[i++] = 0; 157800a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 157900a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 158000a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 158100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 158200a23bdaSmrg } 158300a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 158400a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 158500a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 158600a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 158700a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 158800a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 158900a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 159000a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(0) | 159100a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC | 159200a23bdaSmrg (0xffff00000000 & bo1_mc) >> 32; 159300a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 159400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1595d8807b2fSmrg pm4[i++] = sdma_write_length; 159600a23bdaSmrg } else { 159700a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 159800a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 159900a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 160000a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(0) | 160100a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 160200a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 160300a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 160400a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 160500a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 160600a23bdaSmrg pm4[i++] = sdma_write_length; 160700a23bdaSmrg } 1608d8807b2fSmrg } 16093f012e29Smrg 161000a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 161100a23bdaSmrg ip_type, ring_id, 161200a23bdaSmrg i, pm4, 161300a23bdaSmrg 2, resources, 161400a23bdaSmrg ib_info, ibs_request); 16153f012e29Smrg 161600a23bdaSmrg /* verify if SDMA test result meets with expected */ 161700a23bdaSmrg i = 0; 161800a23bdaSmrg while(i < sdma_write_length) { 161900a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 162000a23bdaSmrg } 162100a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 162200a23bdaSmrg sdma_write_length); 162300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 162400a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 162500a23bdaSmrg sdma_write_length); 162600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 162700a23bdaSmrg loop2++; 16283f012e29Smrg } 162900a23bdaSmrg loop1++; 16303f012e29Smrg } 16313f012e29Smrg } 16323f012e29Smrg /* clean resources */ 16333f012e29Smrg free(resources); 16343f012e29Smrg free(ibs_request); 16353f012e29Smrg free(ib_info); 16363f012e29Smrg free(pm4); 16373f012e29Smrg 16383f012e29Smrg /* end of test */ 16393f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 16403f012e29Smrg CU_ASSERT_EQUAL(r, 0); 16413f012e29Smrg} 16423f012e29Smrg 16433f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void) 16443f012e29Smrg{ 16453f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 16463f012e29Smrg} 16473f012e29Smrg 16483f012e29Smrgstatic void amdgpu_command_submission_sdma(void) 16493f012e29Smrg{ 16503f012e29Smrg amdgpu_command_submission_sdma_write_linear(); 16513f012e29Smrg amdgpu_command_submission_sdma_const_fill(); 16523f012e29Smrg amdgpu_command_submission_sdma_copy_linear(); 16533f012e29Smrg} 16543f012e29Smrg 1655d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1656d8807b2fSmrg{ 1657d8807b2fSmrg amdgpu_context_handle context_handle; 1658d8807b2fSmrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1659d8807b2fSmrg void *ib_result_cpu, *ib_result_ce_cpu; 1660d8807b2fSmrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1661d8807b2fSmrg struct amdgpu_cs_request ibs_request[2] = {0}; 1662d8807b2fSmrg struct amdgpu_cs_ib_info ib_info[2]; 1663d8807b2fSmrg struct amdgpu_cs_fence fence_status[2] = {0}; 1664d8807b2fSmrg uint32_t *ptr; 1665d8807b2fSmrg uint32_t expired; 1666d8807b2fSmrg amdgpu_bo_list_handle bo_list; 1667d8807b2fSmrg amdgpu_va_handle va_handle, va_handle_ce; 1668d8807b2fSmrg int r; 1669d8807b2fSmrg int i = 0, ib_cs_num = 2; 1670d8807b2fSmrg 1671d8807b2fSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1672d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1673d8807b2fSmrg 1674d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1675d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1676d8807b2fSmrg &ib_result_handle, &ib_result_cpu, 1677d8807b2fSmrg &ib_result_mc_address, &va_handle); 1678d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1679d8807b2fSmrg 1680d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1681d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1682d8807b2fSmrg &ib_result_ce_handle, &ib_result_ce_cpu, 1683d8807b2fSmrg &ib_result_ce_mc_address, &va_handle_ce); 1684d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1685d8807b2fSmrg 1686d8807b2fSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1687d8807b2fSmrg ib_result_ce_handle, &bo_list); 1688d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1689d8807b2fSmrg 1690d8807b2fSmrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1691d8807b2fSmrg 1692d8807b2fSmrg /* IT_SET_CE_DE_COUNTERS */ 1693d8807b2fSmrg ptr = ib_result_ce_cpu; 1694d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 1695d8807b2fSmrg ptr[i++] = 0xc0008900; 1696d8807b2fSmrg ptr[i++] = 0; 1697d8807b2fSmrg } 1698d8807b2fSmrg ptr[i++] = 0xc0008400; 1699d8807b2fSmrg ptr[i++] = 1; 1700d8807b2fSmrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1701d8807b2fSmrg ib_info[0].size = i; 1702d8807b2fSmrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1703d8807b2fSmrg 1704d8807b2fSmrg /* IT_WAIT_ON_CE_COUNTER */ 1705d8807b2fSmrg ptr = ib_result_cpu; 1706d8807b2fSmrg ptr[0] = 0xc0008600; 1707d8807b2fSmrg ptr[1] = 0x00000001; 1708d8807b2fSmrg ib_info[1].ib_mc_address = ib_result_mc_address; 1709d8807b2fSmrg ib_info[1].size = 2; 1710d8807b2fSmrg 1711d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 1712d8807b2fSmrg ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1713d8807b2fSmrg ibs_request[i].number_of_ibs = 2; 1714d8807b2fSmrg ibs_request[i].ibs = ib_info; 1715d8807b2fSmrg ibs_request[i].resources = bo_list; 1716d8807b2fSmrg ibs_request[i].fence_info.handle = NULL; 1717d8807b2fSmrg } 1718d8807b2fSmrg 1719d8807b2fSmrg r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1720d8807b2fSmrg 1721d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1722d8807b2fSmrg 1723d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 1724d8807b2fSmrg fence_status[i].context = context_handle; 1725d8807b2fSmrg fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1726d8807b2fSmrg fence_status[i].fence = ibs_request[i].seq_no; 1727d8807b2fSmrg } 1728d8807b2fSmrg 1729d8807b2fSmrg r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1730d8807b2fSmrg AMDGPU_TIMEOUT_INFINITE, 1731d8807b2fSmrg &expired, NULL); 1732d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1733d8807b2fSmrg 1734d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1735d8807b2fSmrg ib_result_mc_address, 4096); 1736d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1737d8807b2fSmrg 1738d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 1739d8807b2fSmrg ib_result_ce_mc_address, 4096); 1740d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1741d8807b2fSmrg 1742d8807b2fSmrg r = amdgpu_bo_list_destroy(bo_list); 1743d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1744d8807b2fSmrg 1745d8807b2fSmrg r = amdgpu_cs_ctx_free(context_handle); 1746d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1747d8807b2fSmrg} 1748d8807b2fSmrg 1749d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void) 1750d8807b2fSmrg{ 1751d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(true); 1752d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(false); 1753d8807b2fSmrg} 1754d8807b2fSmrg 17553f012e29Smrgstatic void amdgpu_userptr_test(void) 17563f012e29Smrg{ 17573f012e29Smrg int i, r, j; 17583f012e29Smrg uint32_t *pm4 = NULL; 17593f012e29Smrg uint64_t bo_mc; 17603f012e29Smrg void *ptr = NULL; 17613f012e29Smrg int pm4_dw = 256; 17623f012e29Smrg int sdma_write_length = 4; 17633f012e29Smrg amdgpu_bo_handle handle; 17643f012e29Smrg amdgpu_context_handle context_handle; 17653f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 17663f012e29Smrg struct amdgpu_cs_request *ibs_request; 17673f012e29Smrg amdgpu_bo_handle buf_handle; 17683f012e29Smrg amdgpu_va_handle va_handle; 17693f012e29Smrg 17703f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 17713f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 17723f012e29Smrg 17733f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 17743f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 17753f012e29Smrg 17763f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 17773f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 17783f012e29Smrg 17793f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 17803f012e29Smrg CU_ASSERT_EQUAL(r, 0); 17813f012e29Smrg 17823f012e29Smrg posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 17833f012e29Smrg CU_ASSERT_NOT_EQUAL(ptr, NULL); 17843f012e29Smrg memset(ptr, 0, BUFFER_SIZE); 17853f012e29Smrg 17863f012e29Smrg r = amdgpu_create_bo_from_user_mem(device_handle, 17873f012e29Smrg ptr, BUFFER_SIZE, &buf_handle); 17883f012e29Smrg CU_ASSERT_EQUAL(r, 0); 17893f012e29Smrg 17903f012e29Smrg r = amdgpu_va_range_alloc(device_handle, 17913f012e29Smrg amdgpu_gpu_va_range_general, 17923f012e29Smrg BUFFER_SIZE, 1, 0, &bo_mc, 17933f012e29Smrg &va_handle, 0); 17943f012e29Smrg CU_ASSERT_EQUAL(r, 0); 17953f012e29Smrg 17963f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 17973f012e29Smrg CU_ASSERT_EQUAL(r, 0); 17983f012e29Smrg 17993f012e29Smrg handle = buf_handle; 18003f012e29Smrg 18013f012e29Smrg j = i = 0; 1802d8807b2fSmrg 1803d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) 1804d8807b2fSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1805d8807b2fSmrg sdma_write_length); 1806d8807b2fSmrg else 1807d8807b2fSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1808d8807b2fSmrg SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 18093f012e29Smrg pm4[i++] = 0xffffffff & bo_mc; 18103f012e29Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1811d8807b2fSmrg if (family_id >= AMDGPU_FAMILY_AI) 1812d8807b2fSmrg pm4[i++] = sdma_write_length - 1; 1813d8807b2fSmrg else if (family_id != AMDGPU_FAMILY_SI) 1814d8807b2fSmrg pm4[i++] = sdma_write_length; 18153f012e29Smrg 18163f012e29Smrg while (j++ < sdma_write_length) 18173f012e29Smrg pm4[i++] = 0xdeadbeaf; 18183f012e29Smrg 181900a23bdaSmrg if (!fork()) { 182000a23bdaSmrg pm4[0] = 0x0; 182100a23bdaSmrg exit(0); 182200a23bdaSmrg } 182300a23bdaSmrg 18243f012e29Smrg amdgpu_test_exec_cs_helper(context_handle, 18253f012e29Smrg AMDGPU_HW_IP_DMA, 0, 18263f012e29Smrg i, pm4, 18273f012e29Smrg 1, &handle, 18283f012e29Smrg ib_info, ibs_request); 18293f012e29Smrg i = 0; 18303f012e29Smrg while (i < sdma_write_length) { 18313f012e29Smrg CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 18323f012e29Smrg } 18333f012e29Smrg free(ibs_request); 18343f012e29Smrg free(ib_info); 18353f012e29Smrg free(pm4); 18363f012e29Smrg 18373f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 18383f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18393f012e29Smrg r = amdgpu_va_range_free(va_handle); 18403f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18413f012e29Smrg r = amdgpu_bo_free(buf_handle); 18423f012e29Smrg CU_ASSERT_EQUAL(r, 0); 18433f012e29Smrg free(ptr); 18443f012e29Smrg 18453f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 18463f012e29Smrg CU_ASSERT_EQUAL(r, 0); 184700a23bdaSmrg 184800a23bdaSmrg wait(NULL); 184900a23bdaSmrg} 185000a23bdaSmrg 185100a23bdaSmrgstatic void amdgpu_sync_dependency_test(void) 185200a23bdaSmrg{ 185300a23bdaSmrg amdgpu_context_handle context_handle[2]; 185400a23bdaSmrg amdgpu_bo_handle ib_result_handle; 185500a23bdaSmrg void *ib_result_cpu; 185600a23bdaSmrg uint64_t ib_result_mc_address; 185700a23bdaSmrg struct amdgpu_cs_request ibs_request; 185800a23bdaSmrg struct amdgpu_cs_ib_info ib_info; 185900a23bdaSmrg struct amdgpu_cs_fence fence_status; 186000a23bdaSmrg uint32_t expired; 186100a23bdaSmrg int i, j, r; 186200a23bdaSmrg amdgpu_bo_list_handle bo_list; 186300a23bdaSmrg amdgpu_va_handle va_handle; 186400a23bdaSmrg static uint32_t *ptr; 186500a23bdaSmrg uint64_t seq_no; 186600a23bdaSmrg 186700a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 186800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 186900a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 187000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 187100a23bdaSmrg 187200a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 187300a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 187400a23bdaSmrg &ib_result_handle, &ib_result_cpu, 187500a23bdaSmrg &ib_result_mc_address, &va_handle); 187600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 187700a23bdaSmrg 187800a23bdaSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 187900a23bdaSmrg &bo_list); 188000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 188100a23bdaSmrg 188200a23bdaSmrg ptr = ib_result_cpu; 188300a23bdaSmrg i = 0; 188400a23bdaSmrg 188500a23bdaSmrg memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 188600a23bdaSmrg 188700a23bdaSmrg /* Dispatch minimal init config and verify it's executed */ 188800a23bdaSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 188900a23bdaSmrg ptr[i++] = 0x80000000; 189000a23bdaSmrg ptr[i++] = 0x80000000; 189100a23bdaSmrg 189200a23bdaSmrg ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 189300a23bdaSmrg ptr[i++] = 0x80000000; 189400a23bdaSmrg 189500a23bdaSmrg 189600a23bdaSmrg /* Program compute regs */ 189700a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 189800a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 189900a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 190000a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 190100a23bdaSmrg 190200a23bdaSmrg 190300a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 190400a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 190500a23bdaSmrg /* 190600a23bdaSmrg * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 190700a23bdaSmrg SGPRS = 1 190800a23bdaSmrg PRIORITY = 0 190900a23bdaSmrg FLOAT_MODE = 192 (0xc0) 191000a23bdaSmrg PRIV = 0 191100a23bdaSmrg DX10_CLAMP = 1 191200a23bdaSmrg DEBUG_MODE = 0 191300a23bdaSmrg IEEE_MODE = 0 191400a23bdaSmrg BULKY = 0 191500a23bdaSmrg CDBG_USER = 0 191600a23bdaSmrg * 191700a23bdaSmrg */ 191800a23bdaSmrg ptr[i++] = 0x002c0040; 191900a23bdaSmrg 192000a23bdaSmrg 192100a23bdaSmrg /* 192200a23bdaSmrg * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 192300a23bdaSmrg USER_SGPR = 8 192400a23bdaSmrg TRAP_PRESENT = 0 192500a23bdaSmrg TGID_X_EN = 0 192600a23bdaSmrg TGID_Y_EN = 0 192700a23bdaSmrg TGID_Z_EN = 0 192800a23bdaSmrg TG_SIZE_EN = 0 192900a23bdaSmrg TIDIG_COMP_CNT = 0 193000a23bdaSmrg EXCP_EN_MSB = 0 193100a23bdaSmrg LDS_SIZE = 0 193200a23bdaSmrg EXCP_EN = 0 193300a23bdaSmrg * 193400a23bdaSmrg */ 193500a23bdaSmrg ptr[i++] = 0x00000010; 193600a23bdaSmrg 193700a23bdaSmrg 193800a23bdaSmrg/* 193900a23bdaSmrg * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 194000a23bdaSmrg WAVESIZE = 0 194100a23bdaSmrg * 194200a23bdaSmrg */ 194300a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 194400a23bdaSmrg ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 194500a23bdaSmrg ptr[i++] = 0x00000100; 194600a23bdaSmrg 194700a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 194800a23bdaSmrg ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 194900a23bdaSmrg ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 195000a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 195100a23bdaSmrg 195200a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 195300a23bdaSmrg ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 195400a23bdaSmrg ptr[i++] = 0; 195500a23bdaSmrg 195600a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 195700a23bdaSmrg ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 195800a23bdaSmrg ptr[i++] = 1; 195900a23bdaSmrg ptr[i++] = 1; 196000a23bdaSmrg ptr[i++] = 1; 196100a23bdaSmrg 196200a23bdaSmrg 196300a23bdaSmrg /* Dispatch */ 196400a23bdaSmrg ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 196500a23bdaSmrg ptr[i++] = 1; 196600a23bdaSmrg ptr[i++] = 1; 196700a23bdaSmrg ptr[i++] = 1; 196800a23bdaSmrg ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 196900a23bdaSmrg 197000a23bdaSmrg 197100a23bdaSmrg while (i & 7) 197200a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 197300a23bdaSmrg 197400a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 197500a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address; 197600a23bdaSmrg ib_info.size = i; 197700a23bdaSmrg 197800a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 197900a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 198000a23bdaSmrg ibs_request.ring = 0; 198100a23bdaSmrg ibs_request.number_of_ibs = 1; 198200a23bdaSmrg ibs_request.ibs = &ib_info; 198300a23bdaSmrg ibs_request.resources = bo_list; 198400a23bdaSmrg ibs_request.fence_info.handle = NULL; 198500a23bdaSmrg 198600a23bdaSmrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 198700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 198800a23bdaSmrg seq_no = ibs_request.seq_no; 198900a23bdaSmrg 199000a23bdaSmrg 199100a23bdaSmrg 199200a23bdaSmrg /* Prepare second command with dependency on the first */ 199300a23bdaSmrg j = i; 199400a23bdaSmrg ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 199500a23bdaSmrg ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 199600a23bdaSmrg ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 199700a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 199800a23bdaSmrg ptr[i++] = 99; 199900a23bdaSmrg 200000a23bdaSmrg while (i & 7) 200100a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 200200a23bdaSmrg 200300a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 200400a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address + j * 4; 200500a23bdaSmrg ib_info.size = i - j; 200600a23bdaSmrg 200700a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 200800a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 200900a23bdaSmrg ibs_request.ring = 0; 201000a23bdaSmrg ibs_request.number_of_ibs = 1; 201100a23bdaSmrg ibs_request.ibs = &ib_info; 201200a23bdaSmrg ibs_request.resources = bo_list; 201300a23bdaSmrg ibs_request.fence_info.handle = NULL; 201400a23bdaSmrg 201500a23bdaSmrg ibs_request.number_of_dependencies = 1; 201600a23bdaSmrg 201700a23bdaSmrg ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 201800a23bdaSmrg ibs_request.dependencies[0].context = context_handle[1]; 201900a23bdaSmrg ibs_request.dependencies[0].ip_instance = 0; 202000a23bdaSmrg ibs_request.dependencies[0].ring = 0; 202100a23bdaSmrg ibs_request.dependencies[0].fence = seq_no; 202200a23bdaSmrg 202300a23bdaSmrg 202400a23bdaSmrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 202500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 202600a23bdaSmrg 202700a23bdaSmrg 202800a23bdaSmrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 202900a23bdaSmrg fence_status.context = context_handle[0]; 203000a23bdaSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 203100a23bdaSmrg fence_status.ip_instance = 0; 203200a23bdaSmrg fence_status.ring = 0; 203300a23bdaSmrg fence_status.fence = ibs_request.seq_no; 203400a23bdaSmrg 203500a23bdaSmrg r = amdgpu_cs_query_fence_status(&fence_status, 203600a23bdaSmrg AMDGPU_TIMEOUT_INFINITE,0, &expired); 203700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 203800a23bdaSmrg 203900a23bdaSmrg /* Expect the second command to wait for shader to complete */ 204000a23bdaSmrg CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 204100a23bdaSmrg 204200a23bdaSmrg r = amdgpu_bo_list_destroy(bo_list); 204300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 204400a23bdaSmrg 204500a23bdaSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 204600a23bdaSmrg ib_result_mc_address, 4096); 204700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 204800a23bdaSmrg 204900a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[0]); 205000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 205100a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[1]); 205200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 205300a23bdaSmrg 205400a23bdaSmrg free(ibs_request.dependencies); 20553f012e29Smrg} 20565324fb0dSmrg 20575324fb0dSmrgstatic int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, 20585324fb0dSmrg int cs_type) 20595324fb0dSmrg{ 20605324fb0dSmrg uint32_t shader_size; 20615324fb0dSmrg const uint32_t *shader; 20625324fb0dSmrg 20635324fb0dSmrg switch (cs_type) { 20645324fb0dSmrg case CS_BUFFERCLEAR: 20655324fb0dSmrg shader = bufferclear_cs_shader_gfx9; 20665324fb0dSmrg shader_size = sizeof(bufferclear_cs_shader_gfx9); 20675324fb0dSmrg break; 20685324fb0dSmrg case CS_BUFFERCOPY: 20695324fb0dSmrg shader = buffercopy_cs_shader_gfx9; 20705324fb0dSmrg shader_size = sizeof(buffercopy_cs_shader_gfx9); 20715324fb0dSmrg break; 20725324fb0dSmrg default: 20735324fb0dSmrg return -1; 20745324fb0dSmrg break; 20755324fb0dSmrg } 20765324fb0dSmrg 20775324fb0dSmrg memcpy(ptr, shader, shader_size); 20785324fb0dSmrg return 0; 20795324fb0dSmrg} 20805324fb0dSmrg 20815324fb0dSmrgstatic int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type) 20825324fb0dSmrg{ 20835324fb0dSmrg int i = 0; 20845324fb0dSmrg 20855324fb0dSmrg /* Write context control and load shadowing register if necessary */ 20865324fb0dSmrg if (ip_type == AMDGPU_HW_IP_GFX) { 20875324fb0dSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 20885324fb0dSmrg ptr[i++] = 0x80000000; 20895324fb0dSmrg ptr[i++] = 0x80000000; 20905324fb0dSmrg } 20915324fb0dSmrg 20925324fb0dSmrg /* Issue commands to set default compute state. */ 20935324fb0dSmrg /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ 20945324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3); 20955324fb0dSmrg ptr[i++] = 0x204; 20965324fb0dSmrg i += 3; 20975324fb0dSmrg /* clear mmCOMPUTE_RESOURCE_LIMITS */ 20985324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 20995324fb0dSmrg ptr[i++] = 0x215; 21005324fb0dSmrg ptr[i++] = 0; 21015324fb0dSmrg /* clear mmCOMPUTE_TMPRING_SIZE */ 21025324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 21035324fb0dSmrg ptr[i++] = 0x218; 21045324fb0dSmrg ptr[i++] = 0; 21055324fb0dSmrg 21065324fb0dSmrg return i; 21075324fb0dSmrg} 21085324fb0dSmrg 21095324fb0dSmrgstatic int amdgpu_dispatch_write_cumask(uint32_t *ptr) 21105324fb0dSmrg{ 21115324fb0dSmrg int i = 0; 21125324fb0dSmrg 21135324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 21145324fb0dSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 21155324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 21165324fb0dSmrg ptr[i++] = 0x216; 21175324fb0dSmrg ptr[i++] = 0xffffffff; 21185324fb0dSmrg ptr[i++] = 0xffffffff; 21195324fb0dSmrg /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 21205324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 21215324fb0dSmrg ptr[i++] = 0x219; 21225324fb0dSmrg ptr[i++] = 0xffffffff; 21235324fb0dSmrg ptr[i++] = 0xffffffff; 21245324fb0dSmrg 21255324fb0dSmrg return i; 21265324fb0dSmrg} 21275324fb0dSmrg 21285324fb0dSmrgstatic int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr) 21295324fb0dSmrg{ 21305324fb0dSmrg int i, j; 21315324fb0dSmrg 21325324fb0dSmrg i = 0; 21335324fb0dSmrg 21345324fb0dSmrg /* Writes shader state to HW */ 21355324fb0dSmrg /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ 21365324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 21375324fb0dSmrg ptr[i++] = 0x20c; 21385324fb0dSmrg ptr[i++] = (shader_addr >> 8); 21395324fb0dSmrg ptr[i++] = (shader_addr >> 40); 21405324fb0dSmrg /* write sh regs*/ 21415324fb0dSmrg for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) { 21425324fb0dSmrg ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 21435324fb0dSmrg /* - Gfx9ShRegBase */ 21445324fb0dSmrg ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00; 21455324fb0dSmrg ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; 21465324fb0dSmrg } 21475324fb0dSmrg 21485324fb0dSmrg return i; 21495324fb0dSmrg} 21505324fb0dSmrg 21515324fb0dSmrgstatic void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, 21525324fb0dSmrg uint32_t ip_type, 21535324fb0dSmrg uint32_t ring) 21545324fb0dSmrg{ 21555324fb0dSmrg amdgpu_context_handle context_handle; 21565324fb0dSmrg amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; 21575324fb0dSmrg volatile unsigned char *ptr_dst; 21585324fb0dSmrg void *ptr_shader; 21595324fb0dSmrg uint32_t *ptr_cmd; 21605324fb0dSmrg uint64_t mc_address_dst, mc_address_shader, mc_address_cmd; 21615324fb0dSmrg amdgpu_va_handle va_dst, va_shader, va_cmd; 21625324fb0dSmrg int i, r; 21635324fb0dSmrg int bo_dst_size = 16384; 21645324fb0dSmrg int bo_shader_size = 4096; 21655324fb0dSmrg int bo_cmd_size = 4096; 21665324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 21675324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 21685324fb0dSmrg amdgpu_bo_list_handle bo_list; 21695324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 21705324fb0dSmrg uint32_t expired; 21715324fb0dSmrg 21725324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 21735324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 21745324fb0dSmrg 21755324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 21765324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 21775324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 21785324fb0dSmrg &mc_address_cmd, &va_cmd); 21795324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 21805324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 21815324fb0dSmrg 21825324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 21835324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 21845324fb0dSmrg &bo_shader, &ptr_shader, 21855324fb0dSmrg &mc_address_shader, &va_shader); 21865324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 21875324fb0dSmrg 21885324fb0dSmrg r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR); 21895324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 21905324fb0dSmrg 21915324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 21925324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 21935324fb0dSmrg &bo_dst, (void **)&ptr_dst, 21945324fb0dSmrg &mc_address_dst, &va_dst); 21955324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 21965324fb0dSmrg 21975324fb0dSmrg i = 0; 21985324fb0dSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 21995324fb0dSmrg 22005324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 22015324fb0dSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 22025324fb0dSmrg 22035324fb0dSmrg /* Writes shader state to HW */ 22045324fb0dSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 22055324fb0dSmrg 22065324fb0dSmrg /* Write constant data */ 22075324fb0dSmrg /* Writes the UAV constant data to the SGPRs. */ 22085324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 22095324fb0dSmrg ptr_cmd[i++] = 0x240; 22105324fb0dSmrg ptr_cmd[i++] = mc_address_dst; 22115324fb0dSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 22125324fb0dSmrg ptr_cmd[i++] = 0x400; 22135324fb0dSmrg ptr_cmd[i++] = 0x74fac; 22145324fb0dSmrg 22155324fb0dSmrg /* Sets a range of pixel shader constants */ 22165324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 22175324fb0dSmrg ptr_cmd[i++] = 0x244; 22185324fb0dSmrg ptr_cmd[i++] = 0x22222222; 22195324fb0dSmrg ptr_cmd[i++] = 0x22222222; 22205324fb0dSmrg ptr_cmd[i++] = 0x22222222; 22215324fb0dSmrg ptr_cmd[i++] = 0x22222222; 22225324fb0dSmrg 22235324fb0dSmrg /* dispatch direct command */ 22245324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 22255324fb0dSmrg ptr_cmd[i++] = 0x10; 22265324fb0dSmrg ptr_cmd[i++] = 1; 22275324fb0dSmrg ptr_cmd[i++] = 1; 22285324fb0dSmrg ptr_cmd[i++] = 1; 22295324fb0dSmrg 22305324fb0dSmrg while (i & 7) 22315324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 22325324fb0dSmrg 22335324fb0dSmrg resources[0] = bo_dst; 22345324fb0dSmrg resources[1] = bo_shader; 22355324fb0dSmrg resources[2] = bo_cmd; 22365324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 22375324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22385324fb0dSmrg 22395324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 22405324fb0dSmrg ib_info.size = i; 22415324fb0dSmrg ibs_request.ip_type = ip_type; 22425324fb0dSmrg ibs_request.ring = ring; 22435324fb0dSmrg ibs_request.resources = bo_list; 22445324fb0dSmrg ibs_request.number_of_ibs = 1; 22455324fb0dSmrg ibs_request.ibs = &ib_info; 22465324fb0dSmrg ibs_request.fence_info.handle = NULL; 22475324fb0dSmrg 22485324fb0dSmrg /* submit CS */ 22495324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 22505324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22515324fb0dSmrg 22525324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 22535324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22545324fb0dSmrg 22555324fb0dSmrg fence_status.ip_type = ip_type; 22565324fb0dSmrg fence_status.ip_instance = 0; 22575324fb0dSmrg fence_status.ring = ring; 22585324fb0dSmrg fence_status.context = context_handle; 22595324fb0dSmrg fence_status.fence = ibs_request.seq_no; 22605324fb0dSmrg 22615324fb0dSmrg /* wait for IB accomplished */ 22625324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 22635324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 22645324fb0dSmrg 0, &expired); 22655324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22665324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 22675324fb0dSmrg 22685324fb0dSmrg /* verify if memset test result meets with expected */ 22695324fb0dSmrg i = 0; 22705324fb0dSmrg while(i < bo_dst_size) { 22715324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i++], 0x22); 22725324fb0dSmrg } 22735324fb0dSmrg 22745324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 22755324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22765324fb0dSmrg 22775324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 22785324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22795324fb0dSmrg 22805324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 22815324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22825324fb0dSmrg 22835324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 22845324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 22855324fb0dSmrg} 22865324fb0dSmrg 22875324fb0dSmrgstatic void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, 22885324fb0dSmrg uint32_t ip_type, 22895324fb0dSmrg uint32_t ring) 22905324fb0dSmrg{ 22915324fb0dSmrg amdgpu_context_handle context_handle; 22925324fb0dSmrg amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 22935324fb0dSmrg volatile unsigned char *ptr_dst; 22945324fb0dSmrg void *ptr_shader; 22955324fb0dSmrg unsigned char *ptr_src; 22965324fb0dSmrg uint32_t *ptr_cmd; 22975324fb0dSmrg uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 22985324fb0dSmrg amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 22995324fb0dSmrg int i, r; 23005324fb0dSmrg int bo_dst_size = 16384; 23015324fb0dSmrg int bo_shader_size = 4096; 23025324fb0dSmrg int bo_cmd_size = 4096; 23035324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 23045324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 23055324fb0dSmrg uint32_t expired; 23065324fb0dSmrg amdgpu_bo_list_handle bo_list; 23075324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 23085324fb0dSmrg 23095324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 23105324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23115324fb0dSmrg 23125324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 23135324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 23145324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 23155324fb0dSmrg &mc_address_cmd, &va_cmd); 23165324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23175324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 23185324fb0dSmrg 23195324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 23205324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 23215324fb0dSmrg &bo_shader, &ptr_shader, 23225324fb0dSmrg &mc_address_shader, &va_shader); 23235324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23245324fb0dSmrg 23255324fb0dSmrg r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCOPY ); 23265324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23275324fb0dSmrg 23285324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 23295324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 23305324fb0dSmrg &bo_src, (void **)&ptr_src, 23315324fb0dSmrg &mc_address_src, &va_src); 23325324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23335324fb0dSmrg 23345324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 23355324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 23365324fb0dSmrg &bo_dst, (void **)&ptr_dst, 23375324fb0dSmrg &mc_address_dst, &va_dst); 23385324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23395324fb0dSmrg 23405324fb0dSmrg memset(ptr_src, 0x55, bo_dst_size); 23415324fb0dSmrg 23425324fb0dSmrg i = 0; 23435324fb0dSmrg i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 23445324fb0dSmrg 23455324fb0dSmrg /* Issue commands to set cu mask used in current dispatch */ 23465324fb0dSmrg i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 23475324fb0dSmrg 23485324fb0dSmrg /* Writes shader state to HW */ 23495324fb0dSmrg i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 23505324fb0dSmrg 23515324fb0dSmrg /* Write constant data */ 23525324fb0dSmrg /* Writes the texture resource constants data to the SGPRs */ 23535324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 23545324fb0dSmrg ptr_cmd[i++] = 0x240; 23555324fb0dSmrg ptr_cmd[i++] = mc_address_src; 23565324fb0dSmrg ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 23575324fb0dSmrg ptr_cmd[i++] = 0x400; 23585324fb0dSmrg ptr_cmd[i++] = 0x74fac; 23595324fb0dSmrg 23605324fb0dSmrg /* Writes the UAV constant data to the SGPRs. */ 23615324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 23625324fb0dSmrg ptr_cmd[i++] = 0x244; 23635324fb0dSmrg ptr_cmd[i++] = mc_address_dst; 23645324fb0dSmrg ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 23655324fb0dSmrg ptr_cmd[i++] = 0x400; 23665324fb0dSmrg ptr_cmd[i++] = 0x74fac; 23675324fb0dSmrg 23685324fb0dSmrg /* dispatch direct command */ 23695324fb0dSmrg ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 23705324fb0dSmrg ptr_cmd[i++] = 0x10; 23715324fb0dSmrg ptr_cmd[i++] = 1; 23725324fb0dSmrg ptr_cmd[i++] = 1; 23735324fb0dSmrg ptr_cmd[i++] = 1; 23745324fb0dSmrg 23755324fb0dSmrg while (i & 7) 23765324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 23775324fb0dSmrg 23785324fb0dSmrg resources[0] = bo_shader; 23795324fb0dSmrg resources[1] = bo_src; 23805324fb0dSmrg resources[2] = bo_dst; 23815324fb0dSmrg resources[3] = bo_cmd; 23825324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 23835324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23845324fb0dSmrg 23855324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 23865324fb0dSmrg ib_info.size = i; 23875324fb0dSmrg ibs_request.ip_type = ip_type; 23885324fb0dSmrg ibs_request.ring = ring; 23895324fb0dSmrg ibs_request.resources = bo_list; 23905324fb0dSmrg ibs_request.number_of_ibs = 1; 23915324fb0dSmrg ibs_request.ibs = &ib_info; 23925324fb0dSmrg ibs_request.fence_info.handle = NULL; 23935324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 23945324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 23955324fb0dSmrg 23965324fb0dSmrg fence_status.ip_type = ip_type; 23975324fb0dSmrg fence_status.ip_instance = 0; 23985324fb0dSmrg fence_status.ring = ring; 23995324fb0dSmrg fence_status.context = context_handle; 24005324fb0dSmrg fence_status.fence = ibs_request.seq_no; 24015324fb0dSmrg 24025324fb0dSmrg /* wait for IB accomplished */ 24035324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 24045324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 24055324fb0dSmrg 0, &expired); 24065324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24075324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 24085324fb0dSmrg 24095324fb0dSmrg /* verify if memcpy test result meets with expected */ 24105324fb0dSmrg i = 0; 24115324fb0dSmrg while(i < bo_dst_size) { 24125324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 24135324fb0dSmrg i++; 24145324fb0dSmrg } 24155324fb0dSmrg 24165324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 24175324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24185324fb0dSmrg 24195324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 24205324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24215324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 24225324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24235324fb0dSmrg 24245324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 24255324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24265324fb0dSmrg 24275324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 24285324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24295324fb0dSmrg 24305324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 24315324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24325324fb0dSmrg} 24335324fb0dSmrgstatic void amdgpu_dispatch_test(void) 24345324fb0dSmrg{ 24355324fb0dSmrg int r; 24365324fb0dSmrg struct drm_amdgpu_info_hw_ip info; 24375324fb0dSmrg uint32_t ring_id; 24385324fb0dSmrg 24395324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 24405324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24415324fb0dSmrg 24425324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 24435324fb0dSmrg amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); 24445324fb0dSmrg amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); 24455324fb0dSmrg } 24465324fb0dSmrg 24475324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 24485324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 24495324fb0dSmrg 24505324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 24515324fb0dSmrg amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); 24525324fb0dSmrg amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); 24535324fb0dSmrg } 24545324fb0dSmrg} 24555324fb0dSmrg 24565324fb0dSmrgstatic int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type) 24575324fb0dSmrg{ 24585324fb0dSmrg int i; 24595324fb0dSmrg uint32_t shader_offset= 256; 24605324fb0dSmrg uint32_t mem_offset, patch_code_offset; 24615324fb0dSmrg uint32_t shader_size, patchinfo_code_size; 24625324fb0dSmrg const uint32_t *shader; 24635324fb0dSmrg const uint32_t *patchinfo_code; 24645324fb0dSmrg const uint32_t *patchcode_offset; 24655324fb0dSmrg 24665324fb0dSmrg switch (ps_type) { 24675324fb0dSmrg case PS_CONST: 24685324fb0dSmrg shader = ps_const_shader_gfx9; 24695324fb0dSmrg shader_size = sizeof(ps_const_shader_gfx9); 24705324fb0dSmrg patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; 24715324fb0dSmrg patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; 24725324fb0dSmrg patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; 24735324fb0dSmrg break; 24745324fb0dSmrg case PS_TEX: 24755324fb0dSmrg shader = ps_tex_shader_gfx9; 24765324fb0dSmrg shader_size = sizeof(ps_tex_shader_gfx9); 24775324fb0dSmrg patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; 24785324fb0dSmrg patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; 24795324fb0dSmrg patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; 24805324fb0dSmrg break; 24815324fb0dSmrg default: 24825324fb0dSmrg return -1; 24835324fb0dSmrg break; 24845324fb0dSmrg } 24855324fb0dSmrg 24865324fb0dSmrg /* write main shader program */ 24875324fb0dSmrg for (i = 0 ; i < 10; i++) { 24885324fb0dSmrg mem_offset = i * shader_offset; 24895324fb0dSmrg memcpy(ptr + mem_offset, shader, shader_size); 24905324fb0dSmrg } 24915324fb0dSmrg 24925324fb0dSmrg /* overwrite patch codes */ 24935324fb0dSmrg for (i = 0 ; i < 10; i++) { 24945324fb0dSmrg mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t); 24955324fb0dSmrg patch_code_offset = i * patchinfo_code_size; 24965324fb0dSmrg memcpy(ptr + mem_offset, 24975324fb0dSmrg patchinfo_code + patch_code_offset, 24985324fb0dSmrg patchinfo_code_size * sizeof(uint32_t)); 24995324fb0dSmrg } 25005324fb0dSmrg 25015324fb0dSmrg return 0; 25025324fb0dSmrg} 25035324fb0dSmrg 25045324fb0dSmrg/* load RectPosTexFast_VS */ 25055324fb0dSmrgstatic int amdgpu_draw_load_vs_shader(uint8_t *ptr) 25065324fb0dSmrg{ 25075324fb0dSmrg const uint32_t *shader; 25085324fb0dSmrg uint32_t shader_size; 25095324fb0dSmrg 25105324fb0dSmrg shader = vs_RectPosTexFast_shader_gfx9; 25115324fb0dSmrg shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); 25125324fb0dSmrg 25135324fb0dSmrg memcpy(ptr, shader, shader_size); 25145324fb0dSmrg 25155324fb0dSmrg return 0; 25165324fb0dSmrg} 25175324fb0dSmrg 25185324fb0dSmrgstatic int amdgpu_draw_init(uint32_t *ptr) 25195324fb0dSmrg{ 25205324fb0dSmrg int i = 0; 25215324fb0dSmrg const uint32_t *preamblecache_ptr; 25225324fb0dSmrg uint32_t preamblecache_size; 25235324fb0dSmrg 25245324fb0dSmrg /* Write context control and load shadowing register if necessary */ 25255324fb0dSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 25265324fb0dSmrg ptr[i++] = 0x80000000; 25275324fb0dSmrg ptr[i++] = 0x80000000; 25285324fb0dSmrg 25295324fb0dSmrg preamblecache_ptr = preamblecache_gfx9; 25305324fb0dSmrg preamblecache_size = sizeof(preamblecache_gfx9); 25315324fb0dSmrg 25325324fb0dSmrg memcpy(ptr + i, preamblecache_ptr, preamblecache_size); 25335324fb0dSmrg return i + preamblecache_size/sizeof(uint32_t); 25345324fb0dSmrg} 25355324fb0dSmrg 25365324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, 25375324fb0dSmrg uint64_t dst_addr) 25385324fb0dSmrg{ 25395324fb0dSmrg int i = 0; 25405324fb0dSmrg 25415324fb0dSmrg /* setup color buffer */ 25425324fb0dSmrg /* offset reg 25435324fb0dSmrg 0xA318 CB_COLOR0_BASE 25445324fb0dSmrg 0xA319 CB_COLOR0_BASE_EXT 25455324fb0dSmrg 0xA31A CB_COLOR0_ATTRIB2 25465324fb0dSmrg 0xA31B CB_COLOR0_VIEW 25475324fb0dSmrg 0xA31C CB_COLOR0_INFO 25485324fb0dSmrg 0xA31D CB_COLOR0_ATTRIB 25495324fb0dSmrg 0xA31E CB_COLOR0_DCC_CONTROL 25505324fb0dSmrg 0xA31F CB_COLOR0_CMASK 25515324fb0dSmrg 0xA320 CB_COLOR0_CMASK_BASE_EXT 25525324fb0dSmrg 0xA321 CB_COLOR0_FMASK 25535324fb0dSmrg 0xA322 CB_COLOR0_FMASK_BASE_EXT 25545324fb0dSmrg 0xA323 CB_COLOR0_CLEAR_WORD0 25555324fb0dSmrg 0xA324 CB_COLOR0_CLEAR_WORD1 25565324fb0dSmrg 0xA325 CB_COLOR0_DCC_BASE 25575324fb0dSmrg 0xA326 CB_COLOR0_DCC_BASE_EXT */ 25585324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); 25595324fb0dSmrg ptr[i++] = 0x318; 25605324fb0dSmrg ptr[i++] = dst_addr >> 8; 25615324fb0dSmrg ptr[i++] = dst_addr >> 40; 25625324fb0dSmrg ptr[i++] = 0x7c01f; 25635324fb0dSmrg ptr[i++] = 0; 25645324fb0dSmrg ptr[i++] = 0x50438; 25655324fb0dSmrg ptr[i++] = 0x10140000; 25665324fb0dSmrg i += 9; 25675324fb0dSmrg 25685324fb0dSmrg /* mmCB_MRT0_EPITCH */ 25695324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 25705324fb0dSmrg ptr[i++] = 0x1e8; 25715324fb0dSmrg ptr[i++] = 0x1f; 25725324fb0dSmrg 25735324fb0dSmrg /* 0xA32B CB_COLOR1_BASE */ 25745324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 25755324fb0dSmrg ptr[i++] = 0x32b; 25765324fb0dSmrg ptr[i++] = 0; 25775324fb0dSmrg 25785324fb0dSmrg /* 0xA33A CB_COLOR1_BASE */ 25795324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 25805324fb0dSmrg ptr[i++] = 0x33a; 25815324fb0dSmrg ptr[i++] = 0; 25825324fb0dSmrg 25835324fb0dSmrg /* SPI_SHADER_COL_FORMAT */ 25845324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 25855324fb0dSmrg ptr[i++] = 0x1c5; 25865324fb0dSmrg ptr[i++] = 9; 25875324fb0dSmrg 25885324fb0dSmrg /* Setup depth buffer */ 25895324fb0dSmrg /* mmDB_Z_INFO */ 25905324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 25915324fb0dSmrg ptr[i++] = 0xe; 25925324fb0dSmrg i += 2; 25935324fb0dSmrg 25945324fb0dSmrg return i; 25955324fb0dSmrg} 25965324fb0dSmrg 25975324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr) 25985324fb0dSmrg{ 25995324fb0dSmrg int i = 0; 26005324fb0dSmrg const uint32_t *cached_cmd_ptr; 26015324fb0dSmrg uint32_t cached_cmd_size; 26025324fb0dSmrg 26035324fb0dSmrg /* mmPA_SC_TILE_STEERING_OVERRIDE */ 26045324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26055324fb0dSmrg ptr[i++] = 0xd7; 26065324fb0dSmrg ptr[i++] = 0; 26075324fb0dSmrg 26085324fb0dSmrg ptr[i++] = 0xffff1000; 26095324fb0dSmrg ptr[i++] = 0xc0021000; 26105324fb0dSmrg 26115324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26125324fb0dSmrg ptr[i++] = 0xd7; 26135324fb0dSmrg ptr[i++] = 1; 26145324fb0dSmrg 26155324fb0dSmrg /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 26165324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); 26175324fb0dSmrg ptr[i++] = 0x2fe; 26185324fb0dSmrg i += 16; 26195324fb0dSmrg 26205324fb0dSmrg /* mmPA_SC_CENTROID_PRIORITY_0 */ 26215324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 26225324fb0dSmrg ptr[i++] = 0x2f5; 26235324fb0dSmrg i += 2; 26245324fb0dSmrg 26255324fb0dSmrg cached_cmd_ptr = cached_cmd_gfx9; 26265324fb0dSmrg cached_cmd_size = sizeof(cached_cmd_gfx9); 26275324fb0dSmrg 26285324fb0dSmrg memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); 26295324fb0dSmrg i += cached_cmd_size/sizeof(uint32_t); 26305324fb0dSmrg 26315324fb0dSmrg return i; 26325324fb0dSmrg} 26335324fb0dSmrg 26345324fb0dSmrgstatic int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, 26355324fb0dSmrg int ps_type, 26365324fb0dSmrg uint64_t shader_addr) 26375324fb0dSmrg{ 26385324fb0dSmrg int i = 0; 26395324fb0dSmrg 26405324fb0dSmrg /* mmPA_CL_VS_OUT_CNTL */ 26415324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26425324fb0dSmrg ptr[i++] = 0x207; 26435324fb0dSmrg ptr[i++] = 0; 26445324fb0dSmrg 26455324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC3_VS */ 26465324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 26475324fb0dSmrg ptr[i++] = 0x46; 26485324fb0dSmrg ptr[i++] = 0xffff; 26495324fb0dSmrg 26505324fb0dSmrg /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ 26515324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 26525324fb0dSmrg ptr[i++] = 0x48; 26535324fb0dSmrg ptr[i++] = shader_addr >> 8; 26545324fb0dSmrg ptr[i++] = shader_addr >> 40; 26555324fb0dSmrg 26565324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC1_VS */ 26575324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 26585324fb0dSmrg ptr[i++] = 0x4a; 26595324fb0dSmrg ptr[i++] = 0xc0081; 26605324fb0dSmrg /* mmSPI_SHADER_PGM_RSRC2_VS */ 26615324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 26625324fb0dSmrg ptr[i++] = 0x4b; 26635324fb0dSmrg ptr[i++] = 0x18; 26645324fb0dSmrg 26655324fb0dSmrg /* mmSPI_VS_OUT_CONFIG */ 26665324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26675324fb0dSmrg ptr[i++] = 0x1b1; 26685324fb0dSmrg ptr[i++] = 2; 26695324fb0dSmrg 26705324fb0dSmrg /* mmSPI_SHADER_POS_FORMAT */ 26715324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 26725324fb0dSmrg ptr[i++] = 0x1c3; 26735324fb0dSmrg ptr[i++] = 4; 26745324fb0dSmrg 26755324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 26765324fb0dSmrg ptr[i++] = 0x4c; 26775324fb0dSmrg i += 2; 26785324fb0dSmrg ptr[i++] = 0x42000000; 26795324fb0dSmrg ptr[i++] = 0x42000000; 26805324fb0dSmrg 26815324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 26825324fb0dSmrg ptr[i++] = 0x50; 26835324fb0dSmrg i += 2; 26845324fb0dSmrg if (ps_type == PS_CONST) { 26855324fb0dSmrg i += 2; 26865324fb0dSmrg } else if (ps_type == PS_TEX) { 26875324fb0dSmrg ptr[i++] = 0x3f800000; 26885324fb0dSmrg ptr[i++] = 0x3f800000; 26895324fb0dSmrg } 26905324fb0dSmrg 26915324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 26925324fb0dSmrg ptr[i++] = 0x54; 26935324fb0dSmrg i += 4; 26945324fb0dSmrg 26955324fb0dSmrg return i; 26965324fb0dSmrg} 26975324fb0dSmrg 26985324fb0dSmrgstatic int amdgpu_draw_ps_write2hw(uint32_t *ptr, 26995324fb0dSmrg int ps_type, 27005324fb0dSmrg uint64_t shader_addr) 27015324fb0dSmrg{ 27025324fb0dSmrg int i, j; 27035324fb0dSmrg const uint32_t *sh_registers; 27045324fb0dSmrg const uint32_t *context_registers; 27055324fb0dSmrg uint32_t num_sh_reg, num_context_reg; 27065324fb0dSmrg 27075324fb0dSmrg if (ps_type == PS_CONST) { 27085324fb0dSmrg sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; 27095324fb0dSmrg context_registers = (const uint32_t *)ps_const_context_reg_gfx9; 27105324fb0dSmrg num_sh_reg = ps_num_sh_registers_gfx9; 27115324fb0dSmrg num_context_reg = ps_num_context_registers_gfx9; 27125324fb0dSmrg } else if (ps_type == PS_TEX) { 27135324fb0dSmrg sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; 27145324fb0dSmrg context_registers = (const uint32_t *)ps_tex_context_reg_gfx9; 27155324fb0dSmrg num_sh_reg = ps_num_sh_registers_gfx9; 27165324fb0dSmrg num_context_reg = ps_num_context_registers_gfx9; 27175324fb0dSmrg } 27185324fb0dSmrg 27195324fb0dSmrg i = 0; 27205324fb0dSmrg 27215324fb0dSmrg /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS 27225324fb0dSmrg 0x2c08 SPI_SHADER_PGM_LO_PS 27235324fb0dSmrg 0x2c09 SPI_SHADER_PGM_HI_PS */ 27245324fb0dSmrg shader_addr += 256 * 9; 27255324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 27265324fb0dSmrg ptr[i++] = 0x7; 27275324fb0dSmrg ptr[i++] = 0xffff; 27285324fb0dSmrg ptr[i++] = shader_addr >> 8; 27295324fb0dSmrg ptr[i++] = shader_addr >> 40; 27305324fb0dSmrg 27315324fb0dSmrg for (j = 0; j < num_sh_reg; j++) { 27325324fb0dSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 27335324fb0dSmrg ptr[i++] = sh_registers[j * 2] - 0x2c00; 27345324fb0dSmrg ptr[i++] = sh_registers[j * 2 + 1]; 27355324fb0dSmrg } 27365324fb0dSmrg 27375324fb0dSmrg for (j = 0; j < num_context_reg; j++) { 27385324fb0dSmrg if (context_registers[j * 2] != 0xA1C5) { 27395324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 27405324fb0dSmrg ptr[i++] = context_registers[j * 2] - 0xa000; 27415324fb0dSmrg ptr[i++] = context_registers[j * 2 + 1]; 27425324fb0dSmrg } 27435324fb0dSmrg 27445324fb0dSmrg if (context_registers[j * 2] == 0xA1B4) { 27455324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 27465324fb0dSmrg ptr[i++] = 0x1b3; 27475324fb0dSmrg ptr[i++] = 2; 27485324fb0dSmrg } 27495324fb0dSmrg } 27505324fb0dSmrg 27515324fb0dSmrg return i; 27525324fb0dSmrg} 27535324fb0dSmrg 27545324fb0dSmrgstatic int amdgpu_draw_draw(uint32_t *ptr) 27555324fb0dSmrg{ 27565324fb0dSmrg int i = 0; 27575324fb0dSmrg 27585324fb0dSmrg /* mmIA_MULTI_VGT_PARAM */ 27595324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 27605324fb0dSmrg ptr[i++] = 0x40000258; 27615324fb0dSmrg ptr[i++] = 0xd00ff; 27625324fb0dSmrg 27635324fb0dSmrg /* mmVGT_PRIMITIVE_TYPE */ 27645324fb0dSmrg ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 27655324fb0dSmrg ptr[i++] = 0x10000242; 27665324fb0dSmrg ptr[i++] = 0x11; 27675324fb0dSmrg 27685324fb0dSmrg ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); 27695324fb0dSmrg ptr[i++] = 3; 27705324fb0dSmrg ptr[i++] = 2; 27715324fb0dSmrg 27725324fb0dSmrg return i; 27735324fb0dSmrg} 27745324fb0dSmrg 27755324fb0dSmrgvoid amdgpu_memset_draw(amdgpu_device_handle device_handle, 27765324fb0dSmrg amdgpu_bo_handle bo_shader_ps, 27775324fb0dSmrg amdgpu_bo_handle bo_shader_vs, 27785324fb0dSmrg uint64_t mc_address_shader_ps, 27795324fb0dSmrg uint64_t mc_address_shader_vs, 27805324fb0dSmrg uint32_t ring_id) 27815324fb0dSmrg{ 27825324fb0dSmrg amdgpu_context_handle context_handle; 27835324fb0dSmrg amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; 27845324fb0dSmrg volatile unsigned char *ptr_dst; 27855324fb0dSmrg uint32_t *ptr_cmd; 27865324fb0dSmrg uint64_t mc_address_dst, mc_address_cmd; 27875324fb0dSmrg amdgpu_va_handle va_dst, va_cmd; 27885324fb0dSmrg int i, r; 27895324fb0dSmrg int bo_dst_size = 16384; 27905324fb0dSmrg int bo_cmd_size = 4096; 27915324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 27925324fb0dSmrg struct amdgpu_cs_ib_info ib_info = {0}; 27935324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 27945324fb0dSmrg uint32_t expired; 27955324fb0dSmrg amdgpu_bo_list_handle bo_list; 27965324fb0dSmrg 27975324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 27985324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 27995324fb0dSmrg 28005324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 28015324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 28025324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 28035324fb0dSmrg &mc_address_cmd, &va_cmd); 28045324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28055324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 28065324fb0dSmrg 28075324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 28085324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 28095324fb0dSmrg &bo_dst, (void **)&ptr_dst, 28105324fb0dSmrg &mc_address_dst, &va_dst); 28115324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28125324fb0dSmrg 28135324fb0dSmrg i = 0; 28145324fb0dSmrg i += amdgpu_draw_init(ptr_cmd + i); 28155324fb0dSmrg 28165324fb0dSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst); 28175324fb0dSmrg 28185324fb0dSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i); 28195324fb0dSmrg 28205324fb0dSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs); 28215324fb0dSmrg 28225324fb0dSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps); 28235324fb0dSmrg 28245324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 28255324fb0dSmrg ptr_cmd[i++] = 0xc; 28265324fb0dSmrg ptr_cmd[i++] = 0x33333333; 28275324fb0dSmrg ptr_cmd[i++] = 0x33333333; 28285324fb0dSmrg ptr_cmd[i++] = 0x33333333; 28295324fb0dSmrg ptr_cmd[i++] = 0x33333333; 28305324fb0dSmrg 28315324fb0dSmrg i += amdgpu_draw_draw(ptr_cmd + i); 28325324fb0dSmrg 28335324fb0dSmrg while (i & 7) 28345324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 28355324fb0dSmrg 28365324fb0dSmrg resources[0] = bo_dst; 28375324fb0dSmrg resources[1] = bo_shader_ps; 28385324fb0dSmrg resources[2] = bo_shader_vs; 28395324fb0dSmrg resources[3] = bo_cmd; 28405324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 28415324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28425324fb0dSmrg 28435324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 28445324fb0dSmrg ib_info.size = i; 28455324fb0dSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 28465324fb0dSmrg ibs_request.ring = ring_id; 28475324fb0dSmrg ibs_request.resources = bo_list; 28485324fb0dSmrg ibs_request.number_of_ibs = 1; 28495324fb0dSmrg ibs_request.ibs = &ib_info; 28505324fb0dSmrg ibs_request.fence_info.handle = NULL; 28515324fb0dSmrg 28525324fb0dSmrg /* submit CS */ 28535324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 28545324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28555324fb0dSmrg 28565324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 28575324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28585324fb0dSmrg 28595324fb0dSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 28605324fb0dSmrg fence_status.ip_instance = 0; 28615324fb0dSmrg fence_status.ring = ring_id; 28625324fb0dSmrg fence_status.context = context_handle; 28635324fb0dSmrg fence_status.fence = ibs_request.seq_no; 28645324fb0dSmrg 28655324fb0dSmrg /* wait for IB accomplished */ 28665324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 28675324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 28685324fb0dSmrg 0, &expired); 28695324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28705324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 28715324fb0dSmrg 28725324fb0dSmrg /* verify if memset test result meets with expected */ 28735324fb0dSmrg i = 0; 28745324fb0dSmrg while(i < bo_dst_size) { 28755324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i++], 0x33); 28765324fb0dSmrg } 28775324fb0dSmrg 28785324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 28795324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28805324fb0dSmrg 28815324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 28825324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28835324fb0dSmrg 28845324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 28855324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 28865324fb0dSmrg} 28875324fb0dSmrg 28885324fb0dSmrgstatic void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, 28895324fb0dSmrg uint32_t ring) 28905324fb0dSmrg{ 28915324fb0dSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 28925324fb0dSmrg void *ptr_shader_ps; 28935324fb0dSmrg void *ptr_shader_vs; 28945324fb0dSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 28955324fb0dSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 28965324fb0dSmrg int r; 28975324fb0dSmrg int bo_shader_size = 4096; 28985324fb0dSmrg 28995324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 29005324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 29015324fb0dSmrg &bo_shader_ps, &ptr_shader_ps, 29025324fb0dSmrg &mc_address_shader_ps, &va_shader_ps); 29035324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29045324fb0dSmrg 29055324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 29065324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 29075324fb0dSmrg &bo_shader_vs, &ptr_shader_vs, 29085324fb0dSmrg &mc_address_shader_vs, &va_shader_vs); 29095324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29105324fb0dSmrg 29115324fb0dSmrg r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST); 29125324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29135324fb0dSmrg 29145324fb0dSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 29155324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29165324fb0dSmrg 29175324fb0dSmrg amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, 29185324fb0dSmrg mc_address_shader_ps, mc_address_shader_vs, ring); 29195324fb0dSmrg 29205324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 29215324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29225324fb0dSmrg 29235324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 29245324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29255324fb0dSmrg} 29265324fb0dSmrg 29275324fb0dSmrgstatic void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, 29285324fb0dSmrg amdgpu_bo_handle bo_shader_ps, 29295324fb0dSmrg amdgpu_bo_handle bo_shader_vs, 29305324fb0dSmrg uint64_t mc_address_shader_ps, 29315324fb0dSmrg uint64_t mc_address_shader_vs, 29325324fb0dSmrg uint32_t ring) 29335324fb0dSmrg{ 29345324fb0dSmrg amdgpu_context_handle context_handle; 29355324fb0dSmrg amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 29365324fb0dSmrg volatile unsigned char *ptr_dst; 29375324fb0dSmrg unsigned char *ptr_src; 29385324fb0dSmrg uint32_t *ptr_cmd; 29395324fb0dSmrg uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 29405324fb0dSmrg amdgpu_va_handle va_dst, va_src, va_cmd; 29415324fb0dSmrg int i, r; 29425324fb0dSmrg int bo_size = 16384; 29435324fb0dSmrg int bo_cmd_size = 4096; 29445324fb0dSmrg struct amdgpu_cs_request ibs_request = {0}; 29455324fb0dSmrg struct amdgpu_cs_ib_info ib_info= {0}; 29465324fb0dSmrg uint32_t hang_state, hangs, expired; 29475324fb0dSmrg amdgpu_bo_list_handle bo_list; 29485324fb0dSmrg struct amdgpu_cs_fence fence_status = {0}; 29495324fb0dSmrg 29505324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 29515324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29525324fb0dSmrg 29535324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 29545324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 29555324fb0dSmrg &bo_cmd, (void **)&ptr_cmd, 29565324fb0dSmrg &mc_address_cmd, &va_cmd); 29575324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29585324fb0dSmrg memset(ptr_cmd, 0, bo_cmd_size); 29595324fb0dSmrg 29605324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 29615324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 29625324fb0dSmrg &bo_src, (void **)&ptr_src, 29635324fb0dSmrg &mc_address_src, &va_src); 29645324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29655324fb0dSmrg 29665324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 29675324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 29685324fb0dSmrg &bo_dst, (void **)&ptr_dst, 29695324fb0dSmrg &mc_address_dst, &va_dst); 29705324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 29715324fb0dSmrg 29725324fb0dSmrg memset(ptr_src, 0x55, bo_size); 29735324fb0dSmrg 29745324fb0dSmrg i = 0; 29755324fb0dSmrg i += amdgpu_draw_init(ptr_cmd + i); 29765324fb0dSmrg 29775324fb0dSmrg i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst); 29785324fb0dSmrg 29795324fb0dSmrg i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i); 29805324fb0dSmrg 29815324fb0dSmrg i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs); 29825324fb0dSmrg 29835324fb0dSmrg i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 29845324fb0dSmrg 29855324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 29865324fb0dSmrg ptr_cmd[i++] = 0xc; 29875324fb0dSmrg ptr_cmd[i++] = mc_address_src >> 8; 29885324fb0dSmrg ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 29895324fb0dSmrg ptr_cmd[i++] = 0x7c01f; 29905324fb0dSmrg ptr_cmd[i++] = 0x90500fac; 29915324fb0dSmrg ptr_cmd[i++] = 0x3e000; 29925324fb0dSmrg i += 3; 29935324fb0dSmrg 29945324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 29955324fb0dSmrg ptr_cmd[i++] = 0x14; 29965324fb0dSmrg ptr_cmd[i++] = 0x92; 29975324fb0dSmrg i += 3; 29985324fb0dSmrg 29995324fb0dSmrg ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 1); 30005324fb0dSmrg ptr_cmd[i++] = 0x191; 30015324fb0dSmrg ptr_cmd[i++] = 0; 30025324fb0dSmrg 30035324fb0dSmrg i += amdgpu_draw_draw(ptr_cmd + i); 30045324fb0dSmrg 30055324fb0dSmrg while (i & 7) 30065324fb0dSmrg ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 30075324fb0dSmrg 30085324fb0dSmrg resources[0] = bo_dst; 30095324fb0dSmrg resources[1] = bo_src; 30105324fb0dSmrg resources[2] = bo_shader_ps; 30115324fb0dSmrg resources[3] = bo_shader_vs; 30125324fb0dSmrg resources[4] = bo_cmd; 30135324fb0dSmrg r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 30145324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30155324fb0dSmrg 30165324fb0dSmrg ib_info.ib_mc_address = mc_address_cmd; 30175324fb0dSmrg ib_info.size = i; 30185324fb0dSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 30195324fb0dSmrg ibs_request.ring = ring; 30205324fb0dSmrg ibs_request.resources = bo_list; 30215324fb0dSmrg ibs_request.number_of_ibs = 1; 30225324fb0dSmrg ibs_request.ibs = &ib_info; 30235324fb0dSmrg ibs_request.fence_info.handle = NULL; 30245324fb0dSmrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 30255324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30265324fb0dSmrg 30275324fb0dSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 30285324fb0dSmrg fence_status.ip_instance = 0; 30295324fb0dSmrg fence_status.ring = ring; 30305324fb0dSmrg fence_status.context = context_handle; 30315324fb0dSmrg fence_status.fence = ibs_request.seq_no; 30325324fb0dSmrg 30335324fb0dSmrg /* wait for IB accomplished */ 30345324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 30355324fb0dSmrg AMDGPU_TIMEOUT_INFINITE, 30365324fb0dSmrg 0, &expired); 30375324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30385324fb0dSmrg CU_ASSERT_EQUAL(expired, true); 30395324fb0dSmrg 30405324fb0dSmrg /* verify if memcpy test result meets with expected */ 30415324fb0dSmrg i = 0; 30425324fb0dSmrg while(i < bo_size) { 30435324fb0dSmrg CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 30445324fb0dSmrg i++; 30455324fb0dSmrg } 30465324fb0dSmrg 30475324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 30485324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30495324fb0dSmrg 30505324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 30515324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30525324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 30535324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30545324fb0dSmrg 30555324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 30565324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30575324fb0dSmrg 30585324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 30595324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30605324fb0dSmrg} 30615324fb0dSmrg 30625324fb0dSmrgstatic void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring) 30635324fb0dSmrg{ 30645324fb0dSmrg amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 30655324fb0dSmrg void *ptr_shader_ps; 30665324fb0dSmrg void *ptr_shader_vs; 30675324fb0dSmrg uint64_t mc_address_shader_ps, mc_address_shader_vs; 30685324fb0dSmrg amdgpu_va_handle va_shader_ps, va_shader_vs; 30695324fb0dSmrg int bo_shader_size = 4096; 30705324fb0dSmrg int r; 30715324fb0dSmrg 30725324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 30735324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 30745324fb0dSmrg &bo_shader_ps, &ptr_shader_ps, 30755324fb0dSmrg &mc_address_shader_ps, &va_shader_ps); 30765324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30775324fb0dSmrg 30785324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 30795324fb0dSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, 30805324fb0dSmrg &bo_shader_vs, &ptr_shader_vs, 30815324fb0dSmrg &mc_address_shader_vs, &va_shader_vs); 30825324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30835324fb0dSmrg 30845324fb0dSmrg r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_TEX); 30855324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30865324fb0dSmrg 30875324fb0dSmrg r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 30885324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30895324fb0dSmrg 30905324fb0dSmrg amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, 30915324fb0dSmrg mc_address_shader_ps, mc_address_shader_vs, ring); 30925324fb0dSmrg 30935324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 30945324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30955324fb0dSmrg 30965324fb0dSmrg r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 30975324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 30985324fb0dSmrg} 30995324fb0dSmrg 31005324fb0dSmrgstatic void amdgpu_draw_test(void) 31015324fb0dSmrg{ 31025324fb0dSmrg int r; 31035324fb0dSmrg struct drm_amdgpu_info_hw_ip info; 31045324fb0dSmrg uint32_t ring_id; 31055324fb0dSmrg 31065324fb0dSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 31075324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 31085324fb0dSmrg 31095324fb0dSmrg for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 31105324fb0dSmrg amdgpu_memset_draw_test(device_handle, ring_id); 31115324fb0dSmrg amdgpu_memcpy_draw_test(device_handle, ring_id); 31125324fb0dSmrg } 31135324fb0dSmrg} 3114