basic_tests.c revision 00a23bda
13f012e29Smrg/* 23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc. 33f012e29Smrg * 43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a 53f012e29Smrg * copy of this software and associated documentation files (the "Software"), 63f012e29Smrg * to deal in the Software without restriction, including without limitation 73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the 93f012e29Smrg * Software is furnished to do so, subject to the following conditions: 103f012e29Smrg * 113f012e29Smrg * The above copyright notice and this permission notice shall be included in 123f012e29Smrg * all copies or substantial portions of the Software. 133f012e29Smrg * 143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE. 213f012e29Smrg * 223f012e29Smrg*/ 233f012e29Smrg 243f012e29Smrg#ifdef HAVE_CONFIG_H 253f012e29Smrg#include "config.h" 263f012e29Smrg#endif 273f012e29Smrg 283f012e29Smrg#include <stdio.h> 293f012e29Smrg#include <stdlib.h> 303f012e29Smrg#include <unistd.h> 313f012e29Smrg#ifdef HAVE_ALLOCA_H 323f012e29Smrg# include <alloca.h> 333f012e29Smrg#endif 3400a23bdaSmrg#include <sys/wait.h> 353f012e29Smrg 363f012e29Smrg#include "CUnit/Basic.h" 373f012e29Smrg 383f012e29Smrg#include "amdgpu_test.h" 393f012e29Smrg#include "amdgpu_drm.h" 403f012e29Smrg 413f012e29Smrgstatic amdgpu_device_handle device_handle; 423f012e29Smrgstatic uint32_t major_version; 433f012e29Smrgstatic uint32_t minor_version; 44d8807b2fSmrgstatic uint32_t family_id; 453f012e29Smrg 463f012e29Smrgstatic void amdgpu_query_info_test(void); 473f012e29Smrgstatic void amdgpu_command_submission_gfx(void); 483f012e29Smrgstatic void amdgpu_command_submission_compute(void); 49d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void); 503f012e29Smrgstatic void amdgpu_command_submission_sdma(void); 513f012e29Smrgstatic void amdgpu_userptr_test(void); 523f012e29Smrgstatic void amdgpu_semaphore_test(void); 5300a23bdaSmrgstatic void amdgpu_sync_dependency_test(void); 5400a23bdaSmrgstatic void amdgpu_bo_eviction_test(void); 553f012e29Smrg 563f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 573f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 583f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 5900a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 6000a23bdaSmrg unsigned ip_type, 6100a23bdaSmrg int instance, int pm4_dw, uint32_t *pm4_src, 6200a23bdaSmrg int res_cnt, amdgpu_bo_handle *resources, 6300a23bdaSmrg struct amdgpu_cs_ib_info *ib_info, 6400a23bdaSmrg struct amdgpu_cs_request *ibs_request); 6500a23bdaSmrg 663f012e29SmrgCU_TestInfo basic_tests[] = { 673f012e29Smrg { "Query Info Test", amdgpu_query_info_test }, 683f012e29Smrg { "Userptr Test", amdgpu_userptr_test }, 6900a23bdaSmrg { "bo eviction Test", amdgpu_bo_eviction_test }, 703f012e29Smrg { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 713f012e29Smrg { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 72d8807b2fSmrg { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 733f012e29Smrg { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 743f012e29Smrg { "SW semaphore Test", amdgpu_semaphore_test }, 7500a23bdaSmrg { "Sync dependency Test", amdgpu_sync_dependency_test }, 763f012e29Smrg CU_TEST_INFO_NULL, 773f012e29Smrg}; 783f012e29Smrg#define BUFFER_SIZE (8 * 1024) 793f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0 803f012e29Smrg#define SDMA_PKT_HEADER_op_mask 0x000000FF 813f012e29Smrg#define SDMA_PKT_HEADER_op_shift 0 823f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 833f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL 11 843f012e29Smrg# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 853f012e29Smrg /* 0 = byte fill 863f012e29Smrg * 2 = DW fill 873f012e29Smrg */ 883f012e29Smrg#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 893f012e29Smrg (((sub_op) & 0xFF) << 8) | \ 903f012e29Smrg (((op) & 0xFF) << 0)) 913f012e29Smrg#define SDMA_OPCODE_WRITE 2 923f012e29Smrg# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 933f012e29Smrg# define SDMA_WRTIE_SUB_OPCODE_TILED 1 943f012e29Smrg 953f012e29Smrg#define SDMA_OPCODE_COPY 1 963f012e29Smrg# define SDMA_COPY_SUB_OPCODE_LINEAR 0 973f012e29Smrg 983f012e29Smrg#define GFX_COMPUTE_NOP 0xffff1000 993f012e29Smrg#define SDMA_NOP 0x0 1003f012e29Smrg 1013f012e29Smrg/* PM4 */ 1023f012e29Smrg#define PACKET_TYPE0 0 1033f012e29Smrg#define PACKET_TYPE1 1 1043f012e29Smrg#define PACKET_TYPE2 2 1053f012e29Smrg#define PACKET_TYPE3 3 1063f012e29Smrg 1073f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 1083f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 1093f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 1103f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 1113f012e29Smrg#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 1123f012e29Smrg ((reg) & 0xFFFF) | \ 1133f012e29Smrg ((n) & 0x3FFF) << 16) 1143f012e29Smrg#define CP_PACKET2 0x80000000 1153f012e29Smrg#define PACKET2_PAD_SHIFT 0 1163f012e29Smrg#define PACKET2_PAD_MASK (0x3fffffff << 0) 1173f012e29Smrg 1183f012e29Smrg#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 1193f012e29Smrg 1203f012e29Smrg#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 1213f012e29Smrg (((op) & 0xFF) << 8) | \ 1223f012e29Smrg ((n) & 0x3FFF) << 16) 1233f012e29Smrg 1243f012e29Smrg/* Packet 3 types */ 1253f012e29Smrg#define PACKET3_NOP 0x10 1263f012e29Smrg 1273f012e29Smrg#define PACKET3_WRITE_DATA 0x37 1283f012e29Smrg#define WRITE_DATA_DST_SEL(x) ((x) << 8) 1293f012e29Smrg /* 0 - register 1303f012e29Smrg * 1 - memory (sync - via GRBM) 1313f012e29Smrg * 2 - gl2 1323f012e29Smrg * 3 - gds 1333f012e29Smrg * 4 - reserved 1343f012e29Smrg * 5 - memory (async - direct) 1353f012e29Smrg */ 1363f012e29Smrg#define WR_ONE_ADDR (1 << 16) 1373f012e29Smrg#define WR_CONFIRM (1 << 20) 1383f012e29Smrg#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 1393f012e29Smrg /* 0 - LRU 1403f012e29Smrg * 1 - Stream 1413f012e29Smrg */ 1423f012e29Smrg#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 1433f012e29Smrg /* 0 - me 1443f012e29Smrg * 1 - pfp 1453f012e29Smrg * 2 - ce 1463f012e29Smrg */ 1473f012e29Smrg 1483f012e29Smrg#define PACKET3_DMA_DATA 0x50 1493f012e29Smrg/* 1. header 1503f012e29Smrg * 2. CONTROL 1513f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0] 1523f012e29Smrg * 4. SRC_ADDR_HI [31:0] 1533f012e29Smrg * 5. DST_ADDR_LO [31:0] 1543f012e29Smrg * 6. DST_ADDR_HI [7:0] 1553f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 1563f012e29Smrg */ 1573f012e29Smrg/* CONTROL */ 1583f012e29Smrg# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 1593f012e29Smrg /* 0 - ME 1603f012e29Smrg * 1 - PFP 1613f012e29Smrg */ 1623f012e29Smrg# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 1633f012e29Smrg /* 0 - LRU 1643f012e29Smrg * 1 - Stream 1653f012e29Smrg * 2 - Bypass 1663f012e29Smrg */ 1673f012e29Smrg# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 1683f012e29Smrg# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 1693f012e29Smrg /* 0 - DST_ADDR using DAS 1703f012e29Smrg * 1 - GDS 1713f012e29Smrg * 3 - DST_ADDR using L2 1723f012e29Smrg */ 1733f012e29Smrg# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 1743f012e29Smrg /* 0 - LRU 1753f012e29Smrg * 1 - Stream 1763f012e29Smrg * 2 - Bypass 1773f012e29Smrg */ 1783f012e29Smrg# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 1793f012e29Smrg# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 1803f012e29Smrg /* 0 - SRC_ADDR using SAS 1813f012e29Smrg * 1 - GDS 1823f012e29Smrg * 2 - DATA 1833f012e29Smrg * 3 - SRC_ADDR using L2 1843f012e29Smrg */ 1853f012e29Smrg# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 1863f012e29Smrg/* COMMAND */ 1873f012e29Smrg# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 1883f012e29Smrg# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 1893f012e29Smrg /* 0 - none 1903f012e29Smrg * 1 - 8 in 16 1913f012e29Smrg * 2 - 8 in 32 1923f012e29Smrg * 3 - 8 in 64 1933f012e29Smrg */ 1943f012e29Smrg# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 1953f012e29Smrg /* 0 - none 1963f012e29Smrg * 1 - 8 in 16 1973f012e29Smrg * 2 - 8 in 32 1983f012e29Smrg * 3 - 8 in 64 1993f012e29Smrg */ 2003f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 2013f012e29Smrg /* 0 - memory 2023f012e29Smrg * 1 - register 2033f012e29Smrg */ 2043f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 2053f012e29Smrg /* 0 - memory 2063f012e29Smrg * 1 - register 2073f012e29Smrg */ 2083f012e29Smrg# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 2093f012e29Smrg# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 2103f012e29Smrg# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 2113f012e29Smrg 212d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 213d8807b2fSmrg (((b) & 0x1) << 26) | \ 214d8807b2fSmrg (((t) & 0x1) << 23) | \ 215d8807b2fSmrg (((s) & 0x1) << 22) | \ 216d8807b2fSmrg (((cnt) & 0xFFFFF) << 0)) 217d8807b2fSmrg#define SDMA_OPCODE_COPY_SI 3 218d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI 13 219d8807b2fSmrg#define SDMA_NOP_SI 0xf 220d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000 221d8807b2fSmrg#define PACKET3_DMA_DATA_SI 0x41 222d8807b2fSmrg# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 223d8807b2fSmrg /* 0 - ME 224d8807b2fSmrg * 1 - PFP 225d8807b2fSmrg */ 226d8807b2fSmrg# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 227d8807b2fSmrg /* 0 - DST_ADDR using DAS 228d8807b2fSmrg * 1 - GDS 229d8807b2fSmrg * 3 - DST_ADDR using L2 230d8807b2fSmrg */ 231d8807b2fSmrg# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 232d8807b2fSmrg /* 0 - SRC_ADDR using SAS 233d8807b2fSmrg * 1 - GDS 234d8807b2fSmrg * 2 - DATA 235d8807b2fSmrg * 3 - SRC_ADDR using L2 236d8807b2fSmrg */ 237d8807b2fSmrg# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 238d8807b2fSmrg 23900a23bdaSmrg 24000a23bdaSmrg#define PKT3_CONTEXT_CONTROL 0x28 24100a23bdaSmrg#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 24200a23bdaSmrg#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 24300a23bdaSmrg#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 24400a23bdaSmrg 24500a23bdaSmrg#define PKT3_CLEAR_STATE 0x12 24600a23bdaSmrg 24700a23bdaSmrg#define PKT3_SET_SH_REG 0x76 24800a23bdaSmrg#define PACKET3_SET_SH_REG_START 0x00002c00 24900a23bdaSmrg 25000a23bdaSmrg#define PACKET3_DISPATCH_DIRECT 0x15 25100a23bdaSmrg 25200a23bdaSmrg 25300a23bdaSmrg/* gfx 8 */ 25400a23bdaSmrg#define mmCOMPUTE_PGM_LO 0x2e0c 25500a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1 0x2e12 25600a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE 0x2e18 25700a23bdaSmrg#define mmCOMPUTE_USER_DATA_0 0x2e40 25800a23bdaSmrg#define mmCOMPUTE_USER_DATA_1 0x2e41 25900a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 26000a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X 0x2e07 26100a23bdaSmrg 26200a23bdaSmrg 26300a23bdaSmrg 26400a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 26500a23bdaSmrg ((num & 0x0000ff00) << 8) | \ 26600a23bdaSmrg ((num & 0x00ff0000) >> 8) | \ 26700a23bdaSmrg ((num & 0x000000ff) << 24)) 26800a23bdaSmrg 26900a23bdaSmrg 27000a23bdaSmrg/* Shader code 27100a23bdaSmrg * void main() 27200a23bdaSmrg{ 27300a23bdaSmrg 27400a23bdaSmrg float x = some_input; 27500a23bdaSmrg for (unsigned i = 0; i < 1000000; i++) 27600a23bdaSmrg x = sin(x); 27700a23bdaSmrg 27800a23bdaSmrg u[0] = 42u; 27900a23bdaSmrg} 28000a23bdaSmrg*/ 28100a23bdaSmrg 28200a23bdaSmrgstatic uint32_t shader_bin[] = { 28300a23bdaSmrg SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 28400a23bdaSmrg SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 28500a23bdaSmrg SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 28600a23bdaSmrg SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 28700a23bdaSmrg}; 28800a23bdaSmrg 28900a23bdaSmrg#define CODE_OFFSET 512 29000a23bdaSmrg#define DATA_OFFSET 1024 29100a23bdaSmrg 29200a23bdaSmrg 2933f012e29Smrgint suite_basic_tests_init(void) 2943f012e29Smrg{ 295d8807b2fSmrg struct amdgpu_gpu_info gpu_info = {0}; 2963f012e29Smrg int r; 2973f012e29Smrg 2983f012e29Smrg r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 2993f012e29Smrg &minor_version, &device_handle); 3003f012e29Smrg 301d8807b2fSmrg if (r) { 302037b3c26Smrg if ((r == -EACCES) && (errno == EACCES)) 303037b3c26Smrg printf("\n\nError:%s. " 304037b3c26Smrg "Hint:Try to run this test program as root.", 305037b3c26Smrg strerror(errno)); 3063f012e29Smrg return CUE_SINIT_FAILED; 307037b3c26Smrg } 308d8807b2fSmrg 309d8807b2fSmrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 310d8807b2fSmrg if (r) 311d8807b2fSmrg return CUE_SINIT_FAILED; 312d8807b2fSmrg 313d8807b2fSmrg family_id = gpu_info.family_id; 314d8807b2fSmrg 315d8807b2fSmrg return CUE_SUCCESS; 3163f012e29Smrg} 3173f012e29Smrg 3183f012e29Smrgint suite_basic_tests_clean(void) 3193f012e29Smrg{ 3203f012e29Smrg int r = amdgpu_device_deinitialize(device_handle); 3213f012e29Smrg 3223f012e29Smrg if (r == 0) 3233f012e29Smrg return CUE_SUCCESS; 3243f012e29Smrg else 3253f012e29Smrg return CUE_SCLEAN_FAILED; 3263f012e29Smrg} 3273f012e29Smrg 3283f012e29Smrgstatic void amdgpu_query_info_test(void) 3293f012e29Smrg{ 3303f012e29Smrg struct amdgpu_gpu_info gpu_info = {0}; 3313f012e29Smrg uint32_t version, feature; 3323f012e29Smrg int r; 3333f012e29Smrg 3343f012e29Smrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 3353f012e29Smrg CU_ASSERT_EQUAL(r, 0); 3363f012e29Smrg 3373f012e29Smrg r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 3383f012e29Smrg 0, &version, &feature); 3393f012e29Smrg CU_ASSERT_EQUAL(r, 0); 3403f012e29Smrg} 3413f012e29Smrg 3423f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void) 3433f012e29Smrg{ 3443f012e29Smrg amdgpu_context_handle context_handle; 3453f012e29Smrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 3463f012e29Smrg void *ib_result_cpu, *ib_result_ce_cpu; 3473f012e29Smrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 3483f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 3493f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 3503f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 3513f012e29Smrg uint32_t *ptr; 3523f012e29Smrg uint32_t expired; 3533f012e29Smrg amdgpu_bo_list_handle bo_list; 3543f012e29Smrg amdgpu_va_handle va_handle, va_handle_ce; 355d8807b2fSmrg int r, i = 0; 3563f012e29Smrg 3573f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3583f012e29Smrg CU_ASSERT_EQUAL(r, 0); 3593f012e29Smrg 3603f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 3613f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 3623f012e29Smrg &ib_result_handle, &ib_result_cpu, 3633f012e29Smrg &ib_result_mc_address, &va_handle); 3643f012e29Smrg CU_ASSERT_EQUAL(r, 0); 3653f012e29Smrg 3663f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 3673f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 3683f012e29Smrg &ib_result_ce_handle, &ib_result_ce_cpu, 3693f012e29Smrg &ib_result_ce_mc_address, &va_handle_ce); 3703f012e29Smrg CU_ASSERT_EQUAL(r, 0); 3713f012e29Smrg 3723f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 3733f012e29Smrg ib_result_ce_handle, &bo_list); 3743f012e29Smrg CU_ASSERT_EQUAL(r, 0); 3753f012e29Smrg 3763f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 3773f012e29Smrg 3783f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 3793f012e29Smrg ptr = ib_result_ce_cpu; 380d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 381d8807b2fSmrg ptr[i++] = 0xc0008900; 382d8807b2fSmrg ptr[i++] = 0; 383d8807b2fSmrg } 384d8807b2fSmrg ptr[i++] = 0xc0008400; 385d8807b2fSmrg ptr[i++] = 1; 3863f012e29Smrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 387d8807b2fSmrg ib_info[0].size = i; 3883f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 3893f012e29Smrg 3903f012e29Smrg /* IT_WAIT_ON_CE_COUNTER */ 3913f012e29Smrg ptr = ib_result_cpu; 3923f012e29Smrg ptr[0] = 0xc0008600; 3933f012e29Smrg ptr[1] = 0x00000001; 3943f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address; 3953f012e29Smrg ib_info[1].size = 2; 3963f012e29Smrg 3973f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3983f012e29Smrg ibs_request.number_of_ibs = 2; 3993f012e29Smrg ibs_request.ibs = ib_info; 4003f012e29Smrg ibs_request.resources = bo_list; 4013f012e29Smrg ibs_request.fence_info.handle = NULL; 4023f012e29Smrg 4033f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 4043f012e29Smrg 4053f012e29Smrg CU_ASSERT_EQUAL(r, 0); 4063f012e29Smrg 4073f012e29Smrg fence_status.context = context_handle; 4083f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 4093f012e29Smrg fence_status.ip_instance = 0; 4103f012e29Smrg fence_status.fence = ibs_request.seq_no; 4113f012e29Smrg 4123f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 4133f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 4143f012e29Smrg 0, &expired); 4153f012e29Smrg CU_ASSERT_EQUAL(r, 0); 4163f012e29Smrg 4173f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 4183f012e29Smrg ib_result_mc_address, 4096); 4193f012e29Smrg CU_ASSERT_EQUAL(r, 0); 4203f012e29Smrg 4213f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 4223f012e29Smrg ib_result_ce_mc_address, 4096); 4233f012e29Smrg CU_ASSERT_EQUAL(r, 0); 4243f012e29Smrg 4253f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 4263f012e29Smrg CU_ASSERT_EQUAL(r, 0); 4273f012e29Smrg 4283f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 4293f012e29Smrg CU_ASSERT_EQUAL(r, 0); 4303f012e29Smrg 4313f012e29Smrg} 4323f012e29Smrg 4333f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void) 4343f012e29Smrg{ 4353f012e29Smrg amdgpu_context_handle context_handle; 4363f012e29Smrg amdgpu_bo_handle ib_result_handle; 4373f012e29Smrg void *ib_result_cpu; 4383f012e29Smrg uint64_t ib_result_mc_address; 4393f012e29Smrg struct amdgpu_cs_request ibs_request = {0}; 4403f012e29Smrg struct amdgpu_cs_ib_info ib_info[2]; 4413f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 4423f012e29Smrg uint32_t *ptr; 4433f012e29Smrg uint32_t expired; 4443f012e29Smrg amdgpu_bo_list_handle bo_list; 4453f012e29Smrg amdgpu_va_handle va_handle; 446d8807b2fSmrg int r, i = 0; 4473f012e29Smrg 4483f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 4493f012e29Smrg CU_ASSERT_EQUAL(r, 0); 4503f012e29Smrg 4513f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 4523f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 4533f012e29Smrg &ib_result_handle, &ib_result_cpu, 4543f012e29Smrg &ib_result_mc_address, &va_handle); 4553f012e29Smrg CU_ASSERT_EQUAL(r, 0); 4563f012e29Smrg 4573f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 4583f012e29Smrg &bo_list); 4593f012e29Smrg CU_ASSERT_EQUAL(r, 0); 4603f012e29Smrg 4613f012e29Smrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 4623f012e29Smrg 4633f012e29Smrg /* IT_SET_CE_DE_COUNTERS */ 4643f012e29Smrg ptr = ib_result_cpu; 465d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 466d8807b2fSmrg ptr[i++] = 0xc0008900; 467d8807b2fSmrg ptr[i++] = 0; 468d8807b2fSmrg } 469d8807b2fSmrg ptr[i++] = 0xc0008400; 470d8807b2fSmrg ptr[i++] = 1; 4713f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address; 472d8807b2fSmrg ib_info[0].size = i; 4733f012e29Smrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 4743f012e29Smrg 4753f012e29Smrg ptr = (uint32_t *)ib_result_cpu + 4; 4763f012e29Smrg ptr[0] = 0xc0008600; 4773f012e29Smrg ptr[1] = 0x00000001; 4783f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address + 16; 4793f012e29Smrg ib_info[1].size = 2; 4803f012e29Smrg 4813f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 4823f012e29Smrg ibs_request.number_of_ibs = 2; 4833f012e29Smrg ibs_request.ibs = ib_info; 4843f012e29Smrg ibs_request.resources = bo_list; 4853f012e29Smrg ibs_request.fence_info.handle = NULL; 4863f012e29Smrg 4873f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 4883f012e29Smrg 4893f012e29Smrg CU_ASSERT_EQUAL(r, 0); 4903f012e29Smrg 4913f012e29Smrg fence_status.context = context_handle; 4923f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 4933f012e29Smrg fence_status.ip_instance = 0; 4943f012e29Smrg fence_status.fence = ibs_request.seq_no; 4953f012e29Smrg 4963f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 4973f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 4983f012e29Smrg 0, &expired); 4993f012e29Smrg CU_ASSERT_EQUAL(r, 0); 5003f012e29Smrg 5013f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 5023f012e29Smrg ib_result_mc_address, 4096); 5033f012e29Smrg CU_ASSERT_EQUAL(r, 0); 5043f012e29Smrg 5053f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 5063f012e29Smrg CU_ASSERT_EQUAL(r, 0); 5073f012e29Smrg 5083f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 5093f012e29Smrg CU_ASSERT_EQUAL(r, 0); 5103f012e29Smrg} 5113f012e29Smrg 5123f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void) 5133f012e29Smrg{ 5143f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 5153f012e29Smrg} 5163f012e29Smrg 5173f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void) 5183f012e29Smrg{ 5193f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 5203f012e29Smrg} 5213f012e29Smrg 5223f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void) 5233f012e29Smrg{ 5243f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 5253f012e29Smrg} 5263f012e29Smrg 52700a23bdaSmrgstatic void amdgpu_bo_eviction_test(void) 52800a23bdaSmrg{ 52900a23bdaSmrg const int sdma_write_length = 1024; 53000a23bdaSmrg const int pm4_dw = 256; 53100a23bdaSmrg amdgpu_context_handle context_handle; 53200a23bdaSmrg amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 53300a23bdaSmrg amdgpu_bo_handle *resources; 53400a23bdaSmrg uint32_t *pm4; 53500a23bdaSmrg struct amdgpu_cs_ib_info *ib_info; 53600a23bdaSmrg struct amdgpu_cs_request *ibs_request; 53700a23bdaSmrg uint64_t bo1_mc, bo2_mc; 53800a23bdaSmrg volatile unsigned char *bo1_cpu, *bo2_cpu; 53900a23bdaSmrg int i, j, r, loop1, loop2; 54000a23bdaSmrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 54100a23bdaSmrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 54200a23bdaSmrg struct amdgpu_heap_info vram_info, gtt_info; 54300a23bdaSmrg 54400a23bdaSmrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 54500a23bdaSmrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 54600a23bdaSmrg 54700a23bdaSmrg ib_info = calloc(1, sizeof(*ib_info)); 54800a23bdaSmrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 54900a23bdaSmrg 55000a23bdaSmrg ibs_request = calloc(1, sizeof(*ibs_request)); 55100a23bdaSmrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 55200a23bdaSmrg 55300a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 55400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 55500a23bdaSmrg 55600a23bdaSmrg /* prepare resource */ 55700a23bdaSmrg resources = calloc(4, sizeof(amdgpu_bo_handle)); 55800a23bdaSmrg CU_ASSERT_NOT_EQUAL(resources, NULL); 55900a23bdaSmrg 56000a23bdaSmrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 56100a23bdaSmrg 0, &vram_info); 56200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 56300a23bdaSmrg 56400a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 56500a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 56600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 56700a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 56800a23bdaSmrg AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 56900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 57000a23bdaSmrg 57100a23bdaSmrg r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 57200a23bdaSmrg 0, >t_info); 57300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 57400a23bdaSmrg 57500a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 57600a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 57700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 57800a23bdaSmrg r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 57900a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 58000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 58100a23bdaSmrg 58200a23bdaSmrg 58300a23bdaSmrg 58400a23bdaSmrg loop1 = loop2 = 0; 58500a23bdaSmrg /* run 9 circle to test all mapping combination */ 58600a23bdaSmrg while(loop1 < 2) { 58700a23bdaSmrg while(loop2 < 2) { 58800a23bdaSmrg /* allocate UC bo1for sDMA use */ 58900a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 59000a23bdaSmrg sdma_write_length, 4096, 59100a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 59200a23bdaSmrg gtt_flags[loop1], &bo1, 59300a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 59400a23bdaSmrg &bo1_va_handle); 59500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 59600a23bdaSmrg 59700a23bdaSmrg /* set bo1 */ 59800a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 59900a23bdaSmrg 60000a23bdaSmrg /* allocate UC bo2 for sDMA use */ 60100a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 60200a23bdaSmrg sdma_write_length, 4096, 60300a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 60400a23bdaSmrg gtt_flags[loop2], &bo2, 60500a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 60600a23bdaSmrg &bo2_va_handle); 60700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 60800a23bdaSmrg 60900a23bdaSmrg /* clear bo2 */ 61000a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 61100a23bdaSmrg 61200a23bdaSmrg resources[0] = bo1; 61300a23bdaSmrg resources[1] = bo2; 61400a23bdaSmrg resources[2] = vram_max[loop2]; 61500a23bdaSmrg resources[3] = gtt_max[loop2]; 61600a23bdaSmrg 61700a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 61800a23bdaSmrg i = j = 0; 61900a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 62000a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 62100a23bdaSmrg sdma_write_length); 62200a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 62300a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 62400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 62500a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 62600a23bdaSmrg } else { 62700a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 62800a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 62900a23bdaSmrg pm4[i++] = sdma_write_length - 1; 63000a23bdaSmrg else 63100a23bdaSmrg pm4[i++] = sdma_write_length; 63200a23bdaSmrg pm4[i++] = 0; 63300a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 63400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 63500a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 63600a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 63700a23bdaSmrg } 63800a23bdaSmrg 63900a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 64000a23bdaSmrg AMDGPU_HW_IP_DMA, 0, 64100a23bdaSmrg i, pm4, 64200a23bdaSmrg 4, resources, 64300a23bdaSmrg ib_info, ibs_request); 64400a23bdaSmrg 64500a23bdaSmrg /* verify if SDMA test result meets with expected */ 64600a23bdaSmrg i = 0; 64700a23bdaSmrg while(i < sdma_write_length) { 64800a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 64900a23bdaSmrg } 65000a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 65100a23bdaSmrg sdma_write_length); 65200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 65300a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 65400a23bdaSmrg sdma_write_length); 65500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 65600a23bdaSmrg loop2++; 65700a23bdaSmrg } 65800a23bdaSmrg loop2 = 0; 65900a23bdaSmrg loop1++; 66000a23bdaSmrg } 66100a23bdaSmrg amdgpu_bo_free(vram_max[0]); 66200a23bdaSmrg amdgpu_bo_free(vram_max[1]); 66300a23bdaSmrg amdgpu_bo_free(gtt_max[0]); 66400a23bdaSmrg amdgpu_bo_free(gtt_max[1]); 66500a23bdaSmrg /* clean resources */ 66600a23bdaSmrg free(resources); 66700a23bdaSmrg free(ibs_request); 66800a23bdaSmrg free(ib_info); 66900a23bdaSmrg free(pm4); 67000a23bdaSmrg 67100a23bdaSmrg /* end of test */ 67200a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle); 67300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 67400a23bdaSmrg} 67500a23bdaSmrg 67600a23bdaSmrg 6773f012e29Smrgstatic void amdgpu_command_submission_gfx(void) 6783f012e29Smrg{ 6793f012e29Smrg /* write data using the CP */ 6803f012e29Smrg amdgpu_command_submission_gfx_cp_write_data(); 6813f012e29Smrg /* const fill using the CP */ 6823f012e29Smrg amdgpu_command_submission_gfx_cp_const_fill(); 6833f012e29Smrg /* copy data using the CP */ 6843f012e29Smrg amdgpu_command_submission_gfx_cp_copy_data(); 6853f012e29Smrg /* separate IB buffers for multi-IB submission */ 6863f012e29Smrg amdgpu_command_submission_gfx_separate_ibs(); 6873f012e29Smrg /* shared IB buffer for multi-IB submission */ 6883f012e29Smrg amdgpu_command_submission_gfx_shared_ib(); 6893f012e29Smrg} 6903f012e29Smrg 6913f012e29Smrgstatic void amdgpu_semaphore_test(void) 6923f012e29Smrg{ 6933f012e29Smrg amdgpu_context_handle context_handle[2]; 6943f012e29Smrg amdgpu_semaphore_handle sem; 6953f012e29Smrg amdgpu_bo_handle ib_result_handle[2]; 6963f012e29Smrg void *ib_result_cpu[2]; 6973f012e29Smrg uint64_t ib_result_mc_address[2]; 6983f012e29Smrg struct amdgpu_cs_request ibs_request[2] = {0}; 6993f012e29Smrg struct amdgpu_cs_ib_info ib_info[2] = {0}; 7003f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 7013f012e29Smrg uint32_t *ptr; 7023f012e29Smrg uint32_t expired; 703d8807b2fSmrg uint32_t sdma_nop, gfx_nop; 7043f012e29Smrg amdgpu_bo_list_handle bo_list[2]; 7053f012e29Smrg amdgpu_va_handle va_handle[2]; 7063f012e29Smrg int r, i; 7073f012e29Smrg 708d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) { 709d8807b2fSmrg sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 710d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP_SI; 711d8807b2fSmrg } else { 712d8807b2fSmrg sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 713d8807b2fSmrg gfx_nop = GFX_COMPUTE_NOP; 714d8807b2fSmrg } 715d8807b2fSmrg 7163f012e29Smrg r = amdgpu_cs_create_semaphore(&sem); 7173f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7183f012e29Smrg for (i = 0; i < 2; i++) { 7193f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 7203f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7213f012e29Smrg 7223f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 7233f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 7243f012e29Smrg &ib_result_handle[i], &ib_result_cpu[i], 7253f012e29Smrg &ib_result_mc_address[i], &va_handle[i]); 7263f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7273f012e29Smrg 7283f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 7293f012e29Smrg NULL, &bo_list[i]); 7303f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7313f012e29Smrg } 7323f012e29Smrg 7333f012e29Smrg /* 1. same context different engine */ 7343f012e29Smrg ptr = ib_result_cpu[0]; 735d8807b2fSmrg ptr[0] = sdma_nop; 7363f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 7373f012e29Smrg ib_info[0].size = 1; 7383f012e29Smrg 7393f012e29Smrg ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 7403f012e29Smrg ibs_request[0].number_of_ibs = 1; 7413f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 7423f012e29Smrg ibs_request[0].resources = bo_list[0]; 7433f012e29Smrg ibs_request[0].fence_info.handle = NULL; 7443f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 7453f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7463f012e29Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 7473f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7483f012e29Smrg 7493f012e29Smrg r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 7503f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7513f012e29Smrg ptr = ib_result_cpu[1]; 752d8807b2fSmrg ptr[0] = gfx_nop; 7533f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 7543f012e29Smrg ib_info[1].size = 1; 7553f012e29Smrg 7563f012e29Smrg ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 7573f012e29Smrg ibs_request[1].number_of_ibs = 1; 7583f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 7593f012e29Smrg ibs_request[1].resources = bo_list[1]; 7603f012e29Smrg ibs_request[1].fence_info.handle = NULL; 7613f012e29Smrg 7623f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 7633f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7643f012e29Smrg 7653f012e29Smrg fence_status.context = context_handle[0]; 7663f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 7673f012e29Smrg fence_status.ip_instance = 0; 7683f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 7693f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 7703f012e29Smrg 500000000, 0, &expired); 7713f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7723f012e29Smrg CU_ASSERT_EQUAL(expired, true); 7733f012e29Smrg 7743f012e29Smrg /* 2. same engine different context */ 7753f012e29Smrg ptr = ib_result_cpu[0]; 776d8807b2fSmrg ptr[0] = gfx_nop; 7773f012e29Smrg ib_info[0].ib_mc_address = ib_result_mc_address[0]; 7783f012e29Smrg ib_info[0].size = 1; 7793f012e29Smrg 7803f012e29Smrg ibs_request[0].ip_type = AMDGPU_HW_IP_GFX; 7813f012e29Smrg ibs_request[0].number_of_ibs = 1; 7823f012e29Smrg ibs_request[0].ibs = &ib_info[0]; 7833f012e29Smrg ibs_request[0].resources = bo_list[0]; 7843f012e29Smrg ibs_request[0].fence_info.handle = NULL; 7853f012e29Smrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 7863f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7873f012e29Smrg r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 7883f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7893f012e29Smrg 7903f012e29Smrg r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem); 7913f012e29Smrg CU_ASSERT_EQUAL(r, 0); 7923f012e29Smrg ptr = ib_result_cpu[1]; 793d8807b2fSmrg ptr[0] = gfx_nop; 7943f012e29Smrg ib_info[1].ib_mc_address = ib_result_mc_address[1]; 7953f012e29Smrg ib_info[1].size = 1; 7963f012e29Smrg 7973f012e29Smrg ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 7983f012e29Smrg ibs_request[1].number_of_ibs = 1; 7993f012e29Smrg ibs_request[1].ibs = &ib_info[1]; 8003f012e29Smrg ibs_request[1].resources = bo_list[1]; 8013f012e29Smrg ibs_request[1].fence_info.handle = NULL; 8023f012e29Smrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 8033f012e29Smrg 8043f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8053f012e29Smrg 8063f012e29Smrg fence_status.context = context_handle[1]; 8073f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 8083f012e29Smrg fence_status.ip_instance = 0; 8093f012e29Smrg fence_status.fence = ibs_request[1].seq_no; 8103f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 8113f012e29Smrg 500000000, 0, &expired); 8123f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8133f012e29Smrg CU_ASSERT_EQUAL(expired, true); 814d8807b2fSmrg 8153f012e29Smrg for (i = 0; i < 2; i++) { 8163f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 8173f012e29Smrg ib_result_mc_address[i], 4096); 8183f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8193f012e29Smrg 8203f012e29Smrg r = amdgpu_bo_list_destroy(bo_list[i]); 8213f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8223f012e29Smrg 8233f012e29Smrg r = amdgpu_cs_ctx_free(context_handle[i]); 8243f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8253f012e29Smrg } 8263f012e29Smrg 8273f012e29Smrg r = amdgpu_cs_destroy_semaphore(sem); 8283f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8293f012e29Smrg} 8303f012e29Smrg 8313f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void) 8323f012e29Smrg{ 8333f012e29Smrg amdgpu_context_handle context_handle; 8343f012e29Smrg amdgpu_bo_handle ib_result_handle; 8353f012e29Smrg void *ib_result_cpu; 8363f012e29Smrg uint64_t ib_result_mc_address; 8373f012e29Smrg struct amdgpu_cs_request ibs_request; 8383f012e29Smrg struct amdgpu_cs_ib_info ib_info; 8393f012e29Smrg struct amdgpu_cs_fence fence_status; 8403f012e29Smrg uint32_t *ptr; 8413f012e29Smrg uint32_t expired; 84200a23bdaSmrg int r, instance; 8433f012e29Smrg amdgpu_bo_list_handle bo_list; 8443f012e29Smrg amdgpu_va_handle va_handle; 845d8807b2fSmrg struct drm_amdgpu_info_hw_ip info; 846d8807b2fSmrg 847d8807b2fSmrg r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 848d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 8493f012e29Smrg 8503f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 8513f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8523f012e29Smrg 853d8807b2fSmrg for (instance = 0; (1 << instance) & info.available_rings; instance++) { 8543f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 8553f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 8563f012e29Smrg &ib_result_handle, &ib_result_cpu, 8573f012e29Smrg &ib_result_mc_address, &va_handle); 8583f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8593f012e29Smrg 8603f012e29Smrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 8613f012e29Smrg &bo_list); 8623f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8633f012e29Smrg 8643f012e29Smrg ptr = ib_result_cpu; 865d8807b2fSmrg memset(ptr, 0, 16); 866d8807b2fSmrg ptr[0]=PACKET3(PACKET3_NOP, 14); 8673f012e29Smrg 8683f012e29Smrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 8693f012e29Smrg ib_info.ib_mc_address = ib_result_mc_address; 8703f012e29Smrg ib_info.size = 16; 8713f012e29Smrg 8723f012e29Smrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 8733f012e29Smrg ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 8743f012e29Smrg ibs_request.ring = instance; 8753f012e29Smrg ibs_request.number_of_ibs = 1; 8763f012e29Smrg ibs_request.ibs = &ib_info; 8773f012e29Smrg ibs_request.resources = bo_list; 8783f012e29Smrg ibs_request.fence_info.handle = NULL; 8793f012e29Smrg 8803f012e29Smrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 8813f012e29Smrg r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 8823f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8833f012e29Smrg 8843f012e29Smrg fence_status.context = context_handle; 8853f012e29Smrg fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 8863f012e29Smrg fence_status.ip_instance = 0; 8873f012e29Smrg fence_status.ring = instance; 8883f012e29Smrg fence_status.fence = ibs_request.seq_no; 8893f012e29Smrg 8903f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 8913f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 8923f012e29Smrg 0, &expired); 8933f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8943f012e29Smrg 8953f012e29Smrg r = amdgpu_bo_list_destroy(bo_list); 8963f012e29Smrg CU_ASSERT_EQUAL(r, 0); 8973f012e29Smrg 8983f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 8993f012e29Smrg ib_result_mc_address, 4096); 9003f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9013f012e29Smrg } 9023f012e29Smrg 9033f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 9043f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9053f012e29Smrg} 9063f012e29Smrg 9073f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void) 9083f012e29Smrg{ 9093f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 9103f012e29Smrg} 9113f012e29Smrg 9123f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void) 9133f012e29Smrg{ 9143f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 9153f012e29Smrg} 9163f012e29Smrg 9173f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void) 9183f012e29Smrg{ 9193f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 9203f012e29Smrg} 9213f012e29Smrg 9223f012e29Smrgstatic void amdgpu_command_submission_compute(void) 9233f012e29Smrg{ 9243f012e29Smrg /* write data using the CP */ 9253f012e29Smrg amdgpu_command_submission_compute_cp_write_data(); 9263f012e29Smrg /* const fill using the CP */ 9273f012e29Smrg amdgpu_command_submission_compute_cp_const_fill(); 9283f012e29Smrg /* copy data using the CP */ 9293f012e29Smrg amdgpu_command_submission_compute_cp_copy_data(); 9303f012e29Smrg /* nop test */ 9313f012e29Smrg amdgpu_command_submission_compute_nop(); 9323f012e29Smrg} 9333f012e29Smrg 9343f012e29Smrg/* 9353f012e29Smrg * caller need create/release: 9363f012e29Smrg * pm4_src, resources, ib_info, and ibs_request 9373f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished 9383f012e29Smrg */ 9393f012e29Smrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 9403f012e29Smrg unsigned ip_type, 9413f012e29Smrg int instance, int pm4_dw, uint32_t *pm4_src, 9423f012e29Smrg int res_cnt, amdgpu_bo_handle *resources, 9433f012e29Smrg struct amdgpu_cs_ib_info *ib_info, 9443f012e29Smrg struct amdgpu_cs_request *ibs_request) 9453f012e29Smrg{ 9463f012e29Smrg int r; 9473f012e29Smrg uint32_t expired; 9483f012e29Smrg uint32_t *ring_ptr; 9493f012e29Smrg amdgpu_bo_handle ib_result_handle; 9503f012e29Smrg void *ib_result_cpu; 9513f012e29Smrg uint64_t ib_result_mc_address; 9523f012e29Smrg struct amdgpu_cs_fence fence_status = {0}; 9533f012e29Smrg amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 9543f012e29Smrg amdgpu_va_handle va_handle; 9553f012e29Smrg 9563f012e29Smrg /* prepare CS */ 9573f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 9583f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 9593f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 9603f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 9613f012e29Smrg CU_ASSERT_TRUE(pm4_dw <= 1024); 9623f012e29Smrg 9633f012e29Smrg /* allocate IB */ 9643f012e29Smrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 9653f012e29Smrg AMDGPU_GEM_DOMAIN_GTT, 0, 9663f012e29Smrg &ib_result_handle, &ib_result_cpu, 9673f012e29Smrg &ib_result_mc_address, &va_handle); 9683f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9693f012e29Smrg 9703f012e29Smrg /* copy PM4 packet to ring from caller */ 9713f012e29Smrg ring_ptr = ib_result_cpu; 9723f012e29Smrg memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 9733f012e29Smrg 9743f012e29Smrg ib_info->ib_mc_address = ib_result_mc_address; 9753f012e29Smrg ib_info->size = pm4_dw; 9763f012e29Smrg 9773f012e29Smrg ibs_request->ip_type = ip_type; 9783f012e29Smrg ibs_request->ring = instance; 9793f012e29Smrg ibs_request->number_of_ibs = 1; 9803f012e29Smrg ibs_request->ibs = ib_info; 9813f012e29Smrg ibs_request->fence_info.handle = NULL; 9823f012e29Smrg 9833f012e29Smrg memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 9843f012e29Smrg all_res[res_cnt] = ib_result_handle; 9853f012e29Smrg 9863f012e29Smrg r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 9873f012e29Smrg NULL, &ibs_request->resources); 9883f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9893f012e29Smrg 9903f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 9913f012e29Smrg 9923f012e29Smrg /* submit CS */ 9933f012e29Smrg r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 9943f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9953f012e29Smrg 9963f012e29Smrg r = amdgpu_bo_list_destroy(ibs_request->resources); 9973f012e29Smrg CU_ASSERT_EQUAL(r, 0); 9983f012e29Smrg 9993f012e29Smrg fence_status.ip_type = ip_type; 10003f012e29Smrg fence_status.ip_instance = 0; 10013f012e29Smrg fence_status.ring = ibs_request->ring; 10023f012e29Smrg fence_status.context = context_handle; 10033f012e29Smrg fence_status.fence = ibs_request->seq_no; 10043f012e29Smrg 10053f012e29Smrg /* wait for IB accomplished */ 10063f012e29Smrg r = amdgpu_cs_query_fence_status(&fence_status, 10073f012e29Smrg AMDGPU_TIMEOUT_INFINITE, 10083f012e29Smrg 0, &expired); 10093f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10103f012e29Smrg CU_ASSERT_EQUAL(expired, true); 10113f012e29Smrg 10123f012e29Smrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 10133f012e29Smrg ib_result_mc_address, 4096); 10143f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10153f012e29Smrg} 10163f012e29Smrg 10173f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 10183f012e29Smrg{ 10193f012e29Smrg const int sdma_write_length = 128; 10203f012e29Smrg const int pm4_dw = 256; 10213f012e29Smrg amdgpu_context_handle context_handle; 10223f012e29Smrg amdgpu_bo_handle bo; 10233f012e29Smrg amdgpu_bo_handle *resources; 10243f012e29Smrg uint32_t *pm4; 10253f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 10263f012e29Smrg struct amdgpu_cs_request *ibs_request; 10273f012e29Smrg uint64_t bo_mc; 10283f012e29Smrg volatile uint32_t *bo_cpu; 102900a23bdaSmrg int i, j, r, loop, ring_id; 10303f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 10313f012e29Smrg amdgpu_va_handle va_handle; 103200a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 10333f012e29Smrg 10343f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 10353f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 10363f012e29Smrg 10373f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 10383f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 10393f012e29Smrg 10403f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 10413f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 10423f012e29Smrg 104300a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 104400a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 104500a23bdaSmrg 10463f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 10473f012e29Smrg CU_ASSERT_EQUAL(r, 0); 10483f012e29Smrg 10493f012e29Smrg /* prepare resource */ 10503f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 10513f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 10523f012e29Smrg 105300a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 105400a23bdaSmrg loop = 0; 105500a23bdaSmrg while(loop < 2) { 105600a23bdaSmrg /* allocate UC bo for sDMA use */ 105700a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 105800a23bdaSmrg sdma_write_length * sizeof(uint32_t), 105900a23bdaSmrg 4096, AMDGPU_GEM_DOMAIN_GTT, 106000a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 106100a23bdaSmrg &bo_mc, &va_handle); 106200a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 10633f012e29Smrg 106400a23bdaSmrg /* clear bo */ 106500a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 10663f012e29Smrg 106700a23bdaSmrg resources[0] = bo; 10683f012e29Smrg 106900a23bdaSmrg /* fulfill PM4: test DMA write-linear */ 107000a23bdaSmrg i = j = 0; 107100a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 107200a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) 107300a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 107400a23bdaSmrg sdma_write_length); 107500a23bdaSmrg else 107600a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 107700a23bdaSmrg SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 107800a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 107900a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 108000a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 108100a23bdaSmrg pm4[i++] = sdma_write_length - 1; 108200a23bdaSmrg else if (family_id != AMDGPU_FAMILY_SI) 108300a23bdaSmrg pm4[i++] = sdma_write_length; 108400a23bdaSmrg while(j++ < sdma_write_length) 108500a23bdaSmrg pm4[i++] = 0xdeadbeaf; 108600a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 108700a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 108800a23bdaSmrg pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 108900a23bdaSmrg pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 109000a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 109100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 109200a23bdaSmrg while(j++ < sdma_write_length) 109300a23bdaSmrg pm4[i++] = 0xdeadbeaf; 109400a23bdaSmrg } 10953f012e29Smrg 109600a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 109700a23bdaSmrg ip_type, ring_id, 109800a23bdaSmrg i, pm4, 109900a23bdaSmrg 1, resources, 110000a23bdaSmrg ib_info, ibs_request); 11013f012e29Smrg 110200a23bdaSmrg /* verify if SDMA test result meets with expected */ 110300a23bdaSmrg i = 0; 110400a23bdaSmrg while(i < sdma_write_length) { 110500a23bdaSmrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 110600a23bdaSmrg } 11073f012e29Smrg 110800a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 110900a23bdaSmrg sdma_write_length * sizeof(uint32_t)); 111000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 111100a23bdaSmrg loop++; 11123f012e29Smrg } 11133f012e29Smrg } 11143f012e29Smrg /* clean resources */ 11153f012e29Smrg free(resources); 11163f012e29Smrg free(ibs_request); 11173f012e29Smrg free(ib_info); 11183f012e29Smrg free(pm4); 11193f012e29Smrg 11203f012e29Smrg /* end of test */ 11213f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 11223f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11233f012e29Smrg} 11243f012e29Smrg 11253f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void) 11263f012e29Smrg{ 11273f012e29Smrg amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 11283f012e29Smrg} 11293f012e29Smrg 11303f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 11313f012e29Smrg{ 11323f012e29Smrg const int sdma_write_length = 1024 * 1024; 11333f012e29Smrg const int pm4_dw = 256; 11343f012e29Smrg amdgpu_context_handle context_handle; 11353f012e29Smrg amdgpu_bo_handle bo; 11363f012e29Smrg amdgpu_bo_handle *resources; 11373f012e29Smrg uint32_t *pm4; 11383f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 11393f012e29Smrg struct amdgpu_cs_request *ibs_request; 11403f012e29Smrg uint64_t bo_mc; 11413f012e29Smrg volatile uint32_t *bo_cpu; 114200a23bdaSmrg int i, j, r, loop, ring_id; 11433f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 11443f012e29Smrg amdgpu_va_handle va_handle; 114500a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 11463f012e29Smrg 11473f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 11483f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 11493f012e29Smrg 11503f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 11513f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 11523f012e29Smrg 11533f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 11543f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 11553f012e29Smrg 115600a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 115700a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 115800a23bdaSmrg 11593f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 11603f012e29Smrg CU_ASSERT_EQUAL(r, 0); 11613f012e29Smrg 11623f012e29Smrg /* prepare resource */ 11633f012e29Smrg resources = calloc(1, sizeof(amdgpu_bo_handle)); 11643f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 11653f012e29Smrg 116600a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 116700a23bdaSmrg loop = 0; 116800a23bdaSmrg while(loop < 2) { 116900a23bdaSmrg /* allocate UC bo for sDMA use */ 117000a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 117100a23bdaSmrg sdma_write_length, 4096, 117200a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 117300a23bdaSmrg gtt_flags[loop], &bo, (void**)&bo_cpu, 117400a23bdaSmrg &bo_mc, &va_handle); 117500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 11763f012e29Smrg 117700a23bdaSmrg /* clear bo */ 117800a23bdaSmrg memset((void*)bo_cpu, 0, sdma_write_length); 11793f012e29Smrg 118000a23bdaSmrg resources[0] = bo; 11813f012e29Smrg 118200a23bdaSmrg /* fulfill PM4: test DMA const fill */ 118300a23bdaSmrg i = j = 0; 118400a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 118500a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 118600a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 118700a23bdaSmrg 0, 0, 0, 118800a23bdaSmrg sdma_write_length / 4); 118900a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 119000a23bdaSmrg pm4[i++] = 0xdeadbeaf; 119100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 119200a23bdaSmrg } else { 119300a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 119400a23bdaSmrg SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 119500a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 119600a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 119700a23bdaSmrg pm4[i++] = 0xdeadbeaf; 119800a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 119900a23bdaSmrg pm4[i++] = sdma_write_length - 1; 120000a23bdaSmrg else 120100a23bdaSmrg pm4[i++] = sdma_write_length; 120200a23bdaSmrg } 120300a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 120400a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 120500a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 120600a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 120700a23bdaSmrg pm4[i++] = 0xdeadbeaf; 120800a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 120900a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 121000a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(2) | 121100a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC; 121200a23bdaSmrg pm4[i++] = 0xffffffff & bo_mc; 121300a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1214d8807b2fSmrg pm4[i++] = sdma_write_length; 121500a23bdaSmrg } else { 121600a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 121700a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 121800a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 121900a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(2) | 122000a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 122100a23bdaSmrg pm4[i++] = 0xdeadbeaf; 122200a23bdaSmrg pm4[i++] = 0; 122300a23bdaSmrg pm4[i++] = 0xfffffffc & bo_mc; 122400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 122500a23bdaSmrg pm4[i++] = sdma_write_length; 122600a23bdaSmrg } 1227d8807b2fSmrg } 12283f012e29Smrg 122900a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 123000a23bdaSmrg ip_type, ring_id, 123100a23bdaSmrg i, pm4, 123200a23bdaSmrg 1, resources, 123300a23bdaSmrg ib_info, ibs_request); 12343f012e29Smrg 123500a23bdaSmrg /* verify if SDMA test result meets with expected */ 123600a23bdaSmrg i = 0; 123700a23bdaSmrg while(i < (sdma_write_length / 4)) { 123800a23bdaSmrg CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 123900a23bdaSmrg } 12403f012e29Smrg 124100a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 124200a23bdaSmrg sdma_write_length); 124300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 124400a23bdaSmrg loop++; 124500a23bdaSmrg } 12463f012e29Smrg } 12473f012e29Smrg /* clean resources */ 12483f012e29Smrg free(resources); 12493f012e29Smrg free(ibs_request); 12503f012e29Smrg free(ib_info); 12513f012e29Smrg free(pm4); 12523f012e29Smrg 12533f012e29Smrg /* end of test */ 12543f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 12553f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12563f012e29Smrg} 12573f012e29Smrg 12583f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void) 12593f012e29Smrg{ 12603f012e29Smrg amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 12613f012e29Smrg} 12623f012e29Smrg 12633f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 12643f012e29Smrg{ 12653f012e29Smrg const int sdma_write_length = 1024; 12663f012e29Smrg const int pm4_dw = 256; 12673f012e29Smrg amdgpu_context_handle context_handle; 12683f012e29Smrg amdgpu_bo_handle bo1, bo2; 12693f012e29Smrg amdgpu_bo_handle *resources; 12703f012e29Smrg uint32_t *pm4; 12713f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 12723f012e29Smrg struct amdgpu_cs_request *ibs_request; 12733f012e29Smrg uint64_t bo1_mc, bo2_mc; 12743f012e29Smrg volatile unsigned char *bo1_cpu, *bo2_cpu; 127500a23bdaSmrg int i, j, r, loop1, loop2, ring_id; 12763f012e29Smrg uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 12773f012e29Smrg amdgpu_va_handle bo1_va_handle, bo2_va_handle; 127800a23bdaSmrg struct drm_amdgpu_info_hw_ip hw_ip_info; 12793f012e29Smrg 12803f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 12813f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 12823f012e29Smrg 12833f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 12843f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 12853f012e29Smrg 12863f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 12873f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 12883f012e29Smrg 128900a23bdaSmrg r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 129000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 129100a23bdaSmrg 12923f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 12933f012e29Smrg CU_ASSERT_EQUAL(r, 0); 12943f012e29Smrg 12953f012e29Smrg /* prepare resource */ 12963f012e29Smrg resources = calloc(2, sizeof(amdgpu_bo_handle)); 12973f012e29Smrg CU_ASSERT_NOT_EQUAL(resources, NULL); 12983f012e29Smrg 129900a23bdaSmrg for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 130000a23bdaSmrg loop1 = loop2 = 0; 130100a23bdaSmrg /* run 9 circle to test all mapping combination */ 130200a23bdaSmrg while(loop1 < 2) { 130300a23bdaSmrg while(loop2 < 2) { 130400a23bdaSmrg /* allocate UC bo1for sDMA use */ 130500a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 130600a23bdaSmrg sdma_write_length, 4096, 130700a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 130800a23bdaSmrg gtt_flags[loop1], &bo1, 130900a23bdaSmrg (void**)&bo1_cpu, &bo1_mc, 131000a23bdaSmrg &bo1_va_handle); 131100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 131200a23bdaSmrg 131300a23bdaSmrg /* set bo1 */ 131400a23bdaSmrg memset((void*)bo1_cpu, 0xaa, sdma_write_length); 131500a23bdaSmrg 131600a23bdaSmrg /* allocate UC bo2 for sDMA use */ 131700a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 131800a23bdaSmrg sdma_write_length, 4096, 131900a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 132000a23bdaSmrg gtt_flags[loop2], &bo2, 132100a23bdaSmrg (void**)&bo2_cpu, &bo2_mc, 132200a23bdaSmrg &bo2_va_handle); 132300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 132400a23bdaSmrg 132500a23bdaSmrg /* clear bo2 */ 132600a23bdaSmrg memset((void*)bo2_cpu, 0, sdma_write_length); 132700a23bdaSmrg 132800a23bdaSmrg resources[0] = bo1; 132900a23bdaSmrg resources[1] = bo2; 133000a23bdaSmrg 133100a23bdaSmrg /* fulfill PM4: test DMA copy linear */ 133200a23bdaSmrg i = j = 0; 133300a23bdaSmrg if (ip_type == AMDGPU_HW_IP_DMA) { 133400a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 133500a23bdaSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 133600a23bdaSmrg 0, 0, 0, 133700a23bdaSmrg sdma_write_length); 133800a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 133900a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 134000a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 134100a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 134200a23bdaSmrg } else { 134300a23bdaSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 134400a23bdaSmrg SDMA_COPY_SUB_OPCODE_LINEAR, 134500a23bdaSmrg 0); 134600a23bdaSmrg if (family_id >= AMDGPU_FAMILY_AI) 134700a23bdaSmrg pm4[i++] = sdma_write_length - 1; 134800a23bdaSmrg else 134900a23bdaSmrg pm4[i++] = sdma_write_length; 135000a23bdaSmrg pm4[i++] = 0; 135100a23bdaSmrg pm4[i++] = 0xffffffff & bo1_mc; 135200a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 135300a23bdaSmrg pm4[i++] = 0xffffffff & bo2_mc; 135400a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 135500a23bdaSmrg } 135600a23bdaSmrg } else if ((ip_type == AMDGPU_HW_IP_GFX) || 135700a23bdaSmrg (ip_type == AMDGPU_HW_IP_COMPUTE)) { 135800a23bdaSmrg if (family_id == AMDGPU_FAMILY_SI) { 135900a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 136000a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 136100a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 136200a23bdaSmrg PACKET3_DMA_DATA_SI_DST_SEL(0) | 136300a23bdaSmrg PACKET3_DMA_DATA_SI_SRC_SEL(0) | 136400a23bdaSmrg PACKET3_DMA_DATA_SI_CP_SYNC | 136500a23bdaSmrg (0xffff00000000 & bo1_mc) >> 32; 136600a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 136700a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1368d8807b2fSmrg pm4[i++] = sdma_write_length; 136900a23bdaSmrg } else { 137000a23bdaSmrg pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 137100a23bdaSmrg pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 137200a23bdaSmrg PACKET3_DMA_DATA_DST_SEL(0) | 137300a23bdaSmrg PACKET3_DMA_DATA_SRC_SEL(0) | 137400a23bdaSmrg PACKET3_DMA_DATA_CP_SYNC; 137500a23bdaSmrg pm4[i++] = 0xfffffffc & bo1_mc; 137600a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 137700a23bdaSmrg pm4[i++] = 0xfffffffc & bo2_mc; 137800a23bdaSmrg pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 137900a23bdaSmrg pm4[i++] = sdma_write_length; 138000a23bdaSmrg } 1381d8807b2fSmrg } 13823f012e29Smrg 138300a23bdaSmrg amdgpu_test_exec_cs_helper(context_handle, 138400a23bdaSmrg ip_type, ring_id, 138500a23bdaSmrg i, pm4, 138600a23bdaSmrg 2, resources, 138700a23bdaSmrg ib_info, ibs_request); 13883f012e29Smrg 138900a23bdaSmrg /* verify if SDMA test result meets with expected */ 139000a23bdaSmrg i = 0; 139100a23bdaSmrg while(i < sdma_write_length) { 139200a23bdaSmrg CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 139300a23bdaSmrg } 139400a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 139500a23bdaSmrg sdma_write_length); 139600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 139700a23bdaSmrg r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 139800a23bdaSmrg sdma_write_length); 139900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 140000a23bdaSmrg loop2++; 14013f012e29Smrg } 140200a23bdaSmrg loop1++; 14033f012e29Smrg } 14043f012e29Smrg } 14053f012e29Smrg /* clean resources */ 14063f012e29Smrg free(resources); 14073f012e29Smrg free(ibs_request); 14083f012e29Smrg free(ib_info); 14093f012e29Smrg free(pm4); 14103f012e29Smrg 14113f012e29Smrg /* end of test */ 14123f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 14133f012e29Smrg CU_ASSERT_EQUAL(r, 0); 14143f012e29Smrg} 14153f012e29Smrg 14163f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void) 14173f012e29Smrg{ 14183f012e29Smrg amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 14193f012e29Smrg} 14203f012e29Smrg 14213f012e29Smrgstatic void amdgpu_command_submission_sdma(void) 14223f012e29Smrg{ 14233f012e29Smrg amdgpu_command_submission_sdma_write_linear(); 14243f012e29Smrg amdgpu_command_submission_sdma_const_fill(); 14253f012e29Smrg amdgpu_command_submission_sdma_copy_linear(); 14263f012e29Smrg} 14273f012e29Smrg 1428d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1429d8807b2fSmrg{ 1430d8807b2fSmrg amdgpu_context_handle context_handle; 1431d8807b2fSmrg amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1432d8807b2fSmrg void *ib_result_cpu, *ib_result_ce_cpu; 1433d8807b2fSmrg uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1434d8807b2fSmrg struct amdgpu_cs_request ibs_request[2] = {0}; 1435d8807b2fSmrg struct amdgpu_cs_ib_info ib_info[2]; 1436d8807b2fSmrg struct amdgpu_cs_fence fence_status[2] = {0}; 1437d8807b2fSmrg uint32_t *ptr; 1438d8807b2fSmrg uint32_t expired; 1439d8807b2fSmrg amdgpu_bo_list_handle bo_list; 1440d8807b2fSmrg amdgpu_va_handle va_handle, va_handle_ce; 1441d8807b2fSmrg int r; 1442d8807b2fSmrg int i = 0, ib_cs_num = 2; 1443d8807b2fSmrg 1444d8807b2fSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1445d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1446d8807b2fSmrg 1447d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1448d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1449d8807b2fSmrg &ib_result_handle, &ib_result_cpu, 1450d8807b2fSmrg &ib_result_mc_address, &va_handle); 1451d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1452d8807b2fSmrg 1453d8807b2fSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1454d8807b2fSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1455d8807b2fSmrg &ib_result_ce_handle, &ib_result_ce_cpu, 1456d8807b2fSmrg &ib_result_ce_mc_address, &va_handle_ce); 1457d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1458d8807b2fSmrg 1459d8807b2fSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1460d8807b2fSmrg ib_result_ce_handle, &bo_list); 1461d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1462d8807b2fSmrg 1463d8807b2fSmrg memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1464d8807b2fSmrg 1465d8807b2fSmrg /* IT_SET_CE_DE_COUNTERS */ 1466d8807b2fSmrg ptr = ib_result_ce_cpu; 1467d8807b2fSmrg if (family_id != AMDGPU_FAMILY_SI) { 1468d8807b2fSmrg ptr[i++] = 0xc0008900; 1469d8807b2fSmrg ptr[i++] = 0; 1470d8807b2fSmrg } 1471d8807b2fSmrg ptr[i++] = 0xc0008400; 1472d8807b2fSmrg ptr[i++] = 1; 1473d8807b2fSmrg ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1474d8807b2fSmrg ib_info[0].size = i; 1475d8807b2fSmrg ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1476d8807b2fSmrg 1477d8807b2fSmrg /* IT_WAIT_ON_CE_COUNTER */ 1478d8807b2fSmrg ptr = ib_result_cpu; 1479d8807b2fSmrg ptr[0] = 0xc0008600; 1480d8807b2fSmrg ptr[1] = 0x00000001; 1481d8807b2fSmrg ib_info[1].ib_mc_address = ib_result_mc_address; 1482d8807b2fSmrg ib_info[1].size = 2; 1483d8807b2fSmrg 1484d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 1485d8807b2fSmrg ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1486d8807b2fSmrg ibs_request[i].number_of_ibs = 2; 1487d8807b2fSmrg ibs_request[i].ibs = ib_info; 1488d8807b2fSmrg ibs_request[i].resources = bo_list; 1489d8807b2fSmrg ibs_request[i].fence_info.handle = NULL; 1490d8807b2fSmrg } 1491d8807b2fSmrg 1492d8807b2fSmrg r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1493d8807b2fSmrg 1494d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1495d8807b2fSmrg 1496d8807b2fSmrg for (i = 0; i < ib_cs_num; i++) { 1497d8807b2fSmrg fence_status[i].context = context_handle; 1498d8807b2fSmrg fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1499d8807b2fSmrg fence_status[i].fence = ibs_request[i].seq_no; 1500d8807b2fSmrg } 1501d8807b2fSmrg 1502d8807b2fSmrg r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1503d8807b2fSmrg AMDGPU_TIMEOUT_INFINITE, 1504d8807b2fSmrg &expired, NULL); 1505d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1506d8807b2fSmrg 1507d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1508d8807b2fSmrg ib_result_mc_address, 4096); 1509d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1510d8807b2fSmrg 1511d8807b2fSmrg r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 1512d8807b2fSmrg ib_result_ce_mc_address, 4096); 1513d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1514d8807b2fSmrg 1515d8807b2fSmrg r = amdgpu_bo_list_destroy(bo_list); 1516d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1517d8807b2fSmrg 1518d8807b2fSmrg r = amdgpu_cs_ctx_free(context_handle); 1519d8807b2fSmrg CU_ASSERT_EQUAL(r, 0); 1520d8807b2fSmrg} 1521d8807b2fSmrg 1522d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void) 1523d8807b2fSmrg{ 1524d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(true); 1525d8807b2fSmrg amdgpu_command_submission_multi_fence_wait_all(false); 1526d8807b2fSmrg} 1527d8807b2fSmrg 15283f012e29Smrgstatic void amdgpu_userptr_test(void) 15293f012e29Smrg{ 15303f012e29Smrg int i, r, j; 15313f012e29Smrg uint32_t *pm4 = NULL; 15323f012e29Smrg uint64_t bo_mc; 15333f012e29Smrg void *ptr = NULL; 15343f012e29Smrg int pm4_dw = 256; 15353f012e29Smrg int sdma_write_length = 4; 15363f012e29Smrg amdgpu_bo_handle handle; 15373f012e29Smrg amdgpu_context_handle context_handle; 15383f012e29Smrg struct amdgpu_cs_ib_info *ib_info; 15393f012e29Smrg struct amdgpu_cs_request *ibs_request; 15403f012e29Smrg amdgpu_bo_handle buf_handle; 15413f012e29Smrg amdgpu_va_handle va_handle; 15423f012e29Smrg 15433f012e29Smrg pm4 = calloc(pm4_dw, sizeof(*pm4)); 15443f012e29Smrg CU_ASSERT_NOT_EQUAL(pm4, NULL); 15453f012e29Smrg 15463f012e29Smrg ib_info = calloc(1, sizeof(*ib_info)); 15473f012e29Smrg CU_ASSERT_NOT_EQUAL(ib_info, NULL); 15483f012e29Smrg 15493f012e29Smrg ibs_request = calloc(1, sizeof(*ibs_request)); 15503f012e29Smrg CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 15513f012e29Smrg 15523f012e29Smrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 15533f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15543f012e29Smrg 15553f012e29Smrg posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 15563f012e29Smrg CU_ASSERT_NOT_EQUAL(ptr, NULL); 15573f012e29Smrg memset(ptr, 0, BUFFER_SIZE); 15583f012e29Smrg 15593f012e29Smrg r = amdgpu_create_bo_from_user_mem(device_handle, 15603f012e29Smrg ptr, BUFFER_SIZE, &buf_handle); 15613f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15623f012e29Smrg 15633f012e29Smrg r = amdgpu_va_range_alloc(device_handle, 15643f012e29Smrg amdgpu_gpu_va_range_general, 15653f012e29Smrg BUFFER_SIZE, 1, 0, &bo_mc, 15663f012e29Smrg &va_handle, 0); 15673f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15683f012e29Smrg 15693f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 15703f012e29Smrg CU_ASSERT_EQUAL(r, 0); 15713f012e29Smrg 15723f012e29Smrg handle = buf_handle; 15733f012e29Smrg 15743f012e29Smrg j = i = 0; 1575d8807b2fSmrg 1576d8807b2fSmrg if (family_id == AMDGPU_FAMILY_SI) 1577d8807b2fSmrg pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1578d8807b2fSmrg sdma_write_length); 1579d8807b2fSmrg else 1580d8807b2fSmrg pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1581d8807b2fSmrg SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 15823f012e29Smrg pm4[i++] = 0xffffffff & bo_mc; 15833f012e29Smrg pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1584d8807b2fSmrg if (family_id >= AMDGPU_FAMILY_AI) 1585d8807b2fSmrg pm4[i++] = sdma_write_length - 1; 1586d8807b2fSmrg else if (family_id != AMDGPU_FAMILY_SI) 1587d8807b2fSmrg pm4[i++] = sdma_write_length; 15883f012e29Smrg 15893f012e29Smrg while (j++ < sdma_write_length) 15903f012e29Smrg pm4[i++] = 0xdeadbeaf; 15913f012e29Smrg 159200a23bdaSmrg if (!fork()) { 159300a23bdaSmrg pm4[0] = 0x0; 159400a23bdaSmrg exit(0); 159500a23bdaSmrg } 159600a23bdaSmrg 15973f012e29Smrg amdgpu_test_exec_cs_helper(context_handle, 15983f012e29Smrg AMDGPU_HW_IP_DMA, 0, 15993f012e29Smrg i, pm4, 16003f012e29Smrg 1, &handle, 16013f012e29Smrg ib_info, ibs_request); 16023f012e29Smrg i = 0; 16033f012e29Smrg while (i < sdma_write_length) { 16043f012e29Smrg CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 16053f012e29Smrg } 16063f012e29Smrg free(ibs_request); 16073f012e29Smrg free(ib_info); 16083f012e29Smrg free(pm4); 16093f012e29Smrg 16103f012e29Smrg r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 16113f012e29Smrg CU_ASSERT_EQUAL(r, 0); 16123f012e29Smrg r = amdgpu_va_range_free(va_handle); 16133f012e29Smrg CU_ASSERT_EQUAL(r, 0); 16143f012e29Smrg r = amdgpu_bo_free(buf_handle); 16153f012e29Smrg CU_ASSERT_EQUAL(r, 0); 16163f012e29Smrg free(ptr); 16173f012e29Smrg 16183f012e29Smrg r = amdgpu_cs_ctx_free(context_handle); 16193f012e29Smrg CU_ASSERT_EQUAL(r, 0); 162000a23bdaSmrg 162100a23bdaSmrg wait(NULL); 162200a23bdaSmrg} 162300a23bdaSmrg 162400a23bdaSmrgstatic void amdgpu_sync_dependency_test(void) 162500a23bdaSmrg{ 162600a23bdaSmrg amdgpu_context_handle context_handle[2]; 162700a23bdaSmrg amdgpu_bo_handle ib_result_handle; 162800a23bdaSmrg void *ib_result_cpu; 162900a23bdaSmrg uint64_t ib_result_mc_address; 163000a23bdaSmrg struct amdgpu_cs_request ibs_request; 163100a23bdaSmrg struct amdgpu_cs_ib_info ib_info; 163200a23bdaSmrg struct amdgpu_cs_fence fence_status; 163300a23bdaSmrg uint32_t expired; 163400a23bdaSmrg int i, j, r; 163500a23bdaSmrg amdgpu_bo_list_handle bo_list; 163600a23bdaSmrg amdgpu_va_handle va_handle; 163700a23bdaSmrg static uint32_t *ptr; 163800a23bdaSmrg uint64_t seq_no; 163900a23bdaSmrg 164000a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 164100a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 164200a23bdaSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 164300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 164400a23bdaSmrg 164500a23bdaSmrg r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 164600a23bdaSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 164700a23bdaSmrg &ib_result_handle, &ib_result_cpu, 164800a23bdaSmrg &ib_result_mc_address, &va_handle); 164900a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 165000a23bdaSmrg 165100a23bdaSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 165200a23bdaSmrg &bo_list); 165300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 165400a23bdaSmrg 165500a23bdaSmrg ptr = ib_result_cpu; 165600a23bdaSmrg i = 0; 165700a23bdaSmrg 165800a23bdaSmrg memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 165900a23bdaSmrg 166000a23bdaSmrg /* Dispatch minimal init config and verify it's executed */ 166100a23bdaSmrg ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 166200a23bdaSmrg ptr[i++] = 0x80000000; 166300a23bdaSmrg ptr[i++] = 0x80000000; 166400a23bdaSmrg 166500a23bdaSmrg ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 166600a23bdaSmrg ptr[i++] = 0x80000000; 166700a23bdaSmrg 166800a23bdaSmrg 166900a23bdaSmrg /* Program compute regs */ 167000a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 167100a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 167200a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 167300a23bdaSmrg ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 167400a23bdaSmrg 167500a23bdaSmrg 167600a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 167700a23bdaSmrg ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 167800a23bdaSmrg /* 167900a23bdaSmrg * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 168000a23bdaSmrg SGPRS = 1 168100a23bdaSmrg PRIORITY = 0 168200a23bdaSmrg FLOAT_MODE = 192 (0xc0) 168300a23bdaSmrg PRIV = 0 168400a23bdaSmrg DX10_CLAMP = 1 168500a23bdaSmrg DEBUG_MODE = 0 168600a23bdaSmrg IEEE_MODE = 0 168700a23bdaSmrg BULKY = 0 168800a23bdaSmrg CDBG_USER = 0 168900a23bdaSmrg * 169000a23bdaSmrg */ 169100a23bdaSmrg ptr[i++] = 0x002c0040; 169200a23bdaSmrg 169300a23bdaSmrg 169400a23bdaSmrg /* 169500a23bdaSmrg * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 169600a23bdaSmrg USER_SGPR = 8 169700a23bdaSmrg TRAP_PRESENT = 0 169800a23bdaSmrg TGID_X_EN = 0 169900a23bdaSmrg TGID_Y_EN = 0 170000a23bdaSmrg TGID_Z_EN = 0 170100a23bdaSmrg TG_SIZE_EN = 0 170200a23bdaSmrg TIDIG_COMP_CNT = 0 170300a23bdaSmrg EXCP_EN_MSB = 0 170400a23bdaSmrg LDS_SIZE = 0 170500a23bdaSmrg EXCP_EN = 0 170600a23bdaSmrg * 170700a23bdaSmrg */ 170800a23bdaSmrg ptr[i++] = 0x00000010; 170900a23bdaSmrg 171000a23bdaSmrg 171100a23bdaSmrg/* 171200a23bdaSmrg * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 171300a23bdaSmrg WAVESIZE = 0 171400a23bdaSmrg * 171500a23bdaSmrg */ 171600a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 171700a23bdaSmrg ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 171800a23bdaSmrg ptr[i++] = 0x00000100; 171900a23bdaSmrg 172000a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 172100a23bdaSmrg ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 172200a23bdaSmrg ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 172300a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 172400a23bdaSmrg 172500a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 172600a23bdaSmrg ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 172700a23bdaSmrg ptr[i++] = 0; 172800a23bdaSmrg 172900a23bdaSmrg ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 173000a23bdaSmrg ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 173100a23bdaSmrg ptr[i++] = 1; 173200a23bdaSmrg ptr[i++] = 1; 173300a23bdaSmrg ptr[i++] = 1; 173400a23bdaSmrg 173500a23bdaSmrg 173600a23bdaSmrg /* Dispatch */ 173700a23bdaSmrg ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 173800a23bdaSmrg ptr[i++] = 1; 173900a23bdaSmrg ptr[i++] = 1; 174000a23bdaSmrg ptr[i++] = 1; 174100a23bdaSmrg ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 174200a23bdaSmrg 174300a23bdaSmrg 174400a23bdaSmrg while (i & 7) 174500a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 174600a23bdaSmrg 174700a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 174800a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address; 174900a23bdaSmrg ib_info.size = i; 175000a23bdaSmrg 175100a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 175200a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 175300a23bdaSmrg ibs_request.ring = 0; 175400a23bdaSmrg ibs_request.number_of_ibs = 1; 175500a23bdaSmrg ibs_request.ibs = &ib_info; 175600a23bdaSmrg ibs_request.resources = bo_list; 175700a23bdaSmrg ibs_request.fence_info.handle = NULL; 175800a23bdaSmrg 175900a23bdaSmrg r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 176000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 176100a23bdaSmrg seq_no = ibs_request.seq_no; 176200a23bdaSmrg 176300a23bdaSmrg 176400a23bdaSmrg 176500a23bdaSmrg /* Prepare second command with dependency on the first */ 176600a23bdaSmrg j = i; 176700a23bdaSmrg ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 176800a23bdaSmrg ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 176900a23bdaSmrg ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 177000a23bdaSmrg ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 177100a23bdaSmrg ptr[i++] = 99; 177200a23bdaSmrg 177300a23bdaSmrg while (i & 7) 177400a23bdaSmrg ptr[i++] = 0xffff1000; /* type3 nop packet */ 177500a23bdaSmrg 177600a23bdaSmrg memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 177700a23bdaSmrg ib_info.ib_mc_address = ib_result_mc_address + j * 4; 177800a23bdaSmrg ib_info.size = i - j; 177900a23bdaSmrg 178000a23bdaSmrg memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 178100a23bdaSmrg ibs_request.ip_type = AMDGPU_HW_IP_GFX; 178200a23bdaSmrg ibs_request.ring = 0; 178300a23bdaSmrg ibs_request.number_of_ibs = 1; 178400a23bdaSmrg ibs_request.ibs = &ib_info; 178500a23bdaSmrg ibs_request.resources = bo_list; 178600a23bdaSmrg ibs_request.fence_info.handle = NULL; 178700a23bdaSmrg 178800a23bdaSmrg ibs_request.number_of_dependencies = 1; 178900a23bdaSmrg 179000a23bdaSmrg ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 179100a23bdaSmrg ibs_request.dependencies[0].context = context_handle[1]; 179200a23bdaSmrg ibs_request.dependencies[0].ip_instance = 0; 179300a23bdaSmrg ibs_request.dependencies[0].ring = 0; 179400a23bdaSmrg ibs_request.dependencies[0].fence = seq_no; 179500a23bdaSmrg 179600a23bdaSmrg 179700a23bdaSmrg r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 179800a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 179900a23bdaSmrg 180000a23bdaSmrg 180100a23bdaSmrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 180200a23bdaSmrg fence_status.context = context_handle[0]; 180300a23bdaSmrg fence_status.ip_type = AMDGPU_HW_IP_GFX; 180400a23bdaSmrg fence_status.ip_instance = 0; 180500a23bdaSmrg fence_status.ring = 0; 180600a23bdaSmrg fence_status.fence = ibs_request.seq_no; 180700a23bdaSmrg 180800a23bdaSmrg r = amdgpu_cs_query_fence_status(&fence_status, 180900a23bdaSmrg AMDGPU_TIMEOUT_INFINITE,0, &expired); 181000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 181100a23bdaSmrg 181200a23bdaSmrg /* Expect the second command to wait for shader to complete */ 181300a23bdaSmrg CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 181400a23bdaSmrg 181500a23bdaSmrg r = amdgpu_bo_list_destroy(bo_list); 181600a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 181700a23bdaSmrg 181800a23bdaSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 181900a23bdaSmrg ib_result_mc_address, 4096); 182000a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 182100a23bdaSmrg 182200a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[0]); 182300a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 182400a23bdaSmrg r = amdgpu_cs_ctx_free(context_handle[1]); 182500a23bdaSmrg CU_ASSERT_EQUAL(r, 0); 182600a23bdaSmrg 182700a23bdaSmrg free(ibs_request.dependencies); 18283f012e29Smrg} 1829