basic_tests.c revision 4babd585
1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22*/ 23 24#include <stdio.h> 25#include <stdlib.h> 26#include <unistd.h> 27#include <sys/types.h> 28#ifdef MAJOR_IN_SYSMACROS 29#include <sys/sysmacros.h> 30#endif 31#include <sys/stat.h> 32#include <fcntl.h> 33#if HAVE_ALLOCA_H 34# include <alloca.h> 35#endif 36#include <sys/wait.h> 37 38#include "CUnit/Basic.h" 39 40#include "amdgpu_test.h" 41#include "amdgpu_drm.h" 42#include "amdgpu_internal.h" 43#include "util_math.h" 44 45static amdgpu_device_handle device_handle; 46static uint32_t major_version; 47static uint32_t minor_version; 48static uint32_t family_id; 49static uint32_t chip_id; 50static uint32_t chip_rev; 51 52static void amdgpu_query_info_test(void); 53static void amdgpu_command_submission_gfx(void); 54static void amdgpu_command_submission_compute(void); 55static void amdgpu_command_submission_multi_fence(void); 56static void amdgpu_command_submission_sdma(void); 57static void amdgpu_userptr_test(void); 58static void amdgpu_semaphore_test(void); 59static void amdgpu_sync_dependency_test(void); 60static void amdgpu_bo_eviction_test(void); 61static void amdgpu_compute_dispatch_test(void); 62static void amdgpu_gfx_dispatch_test(void); 63static void amdgpu_draw_test(void); 64static void amdgpu_gpu_reset_test(void); 65 66static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 67static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 68static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 69static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 70 unsigned ip_type, 71 int instance, int pm4_dw, uint32_t *pm4_src, 72 int res_cnt, amdgpu_bo_handle *resources, 73 struct amdgpu_cs_ib_info *ib_info, 74 struct amdgpu_cs_request *ibs_request); 75 76CU_TestInfo basic_tests[] = { 77 { "Query Info Test", amdgpu_query_info_test }, 78 { "Userptr Test", amdgpu_userptr_test }, 79 { "bo eviction Test", amdgpu_bo_eviction_test }, 80 { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 81 { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 82 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 83 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 84 { "SW semaphore Test", amdgpu_semaphore_test }, 85 { "Sync dependency Test", amdgpu_sync_dependency_test }, 86 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test }, 87 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test }, 88 { "Draw Test", amdgpu_draw_test }, 89 { "GPU reset Test", amdgpu_gpu_reset_test }, 90 CU_TEST_INFO_NULL, 91}; 92#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize())) 93#define SDMA_PKT_HEADER_op_offset 0 94#define SDMA_PKT_HEADER_op_mask 0x000000FF 95#define SDMA_PKT_HEADER_op_shift 0 96#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 97#define SDMA_OPCODE_CONSTANT_FILL 11 98# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 99 /* 0 = byte fill 100 * 2 = DW fill 101 */ 102#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 103 (((sub_op) & 0xFF) << 8) | \ 104 (((op) & 0xFF) << 0)) 105#define SDMA_OPCODE_WRITE 2 106# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 107# define SDMA_WRTIE_SUB_OPCODE_TILED 1 108 109#define SDMA_OPCODE_COPY 1 110# define SDMA_COPY_SUB_OPCODE_LINEAR 0 111 112#define SDMA_OPCODE_ATOMIC 10 113# define SDMA_ATOMIC_LOOP(x) ((x) << 0) 114 /* 0 - single_pass_atomic. 115 * 1 - loop_until_compare_satisfied. 116 */ 117# define SDMA_ATOMIC_TMZ(x) ((x) << 2) 118 /* 0 - non-TMZ. 119 * 1 - TMZ. 120 */ 121# define SDMA_ATOMIC_OPCODE(x) ((x) << 9) 122 /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 123 * same as Packet 3 124 */ 125 126#define GFX_COMPUTE_NOP 0xffff1000 127#define SDMA_NOP 0x0 128 129/* PM4 */ 130#define PACKET_TYPE0 0 131#define PACKET_TYPE1 1 132#define PACKET_TYPE2 2 133#define PACKET_TYPE3 3 134 135#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 136#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 137#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 138#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 139#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 140 ((reg) & 0xFFFF) | \ 141 ((n) & 0x3FFF) << 16) 142#define CP_PACKET2 0x80000000 143#define PACKET2_PAD_SHIFT 0 144#define PACKET2_PAD_MASK (0x3fffffff << 0) 145 146#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 147 148#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 149 (((op) & 0xFF) << 8) | \ 150 ((n) & 0x3FFF) << 16) 151#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) 152 153/* Packet 3 types */ 154#define PACKET3_NOP 0x10 155 156#define PACKET3_WRITE_DATA 0x37 157#define WRITE_DATA_DST_SEL(x) ((x) << 8) 158 /* 0 - register 159 * 1 - memory (sync - via GRBM) 160 * 2 - gl2 161 * 3 - gds 162 * 4 - reserved 163 * 5 - memory (async - direct) 164 */ 165#define WR_ONE_ADDR (1 << 16) 166#define WR_CONFIRM (1 << 20) 167#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 168 /* 0 - LRU 169 * 1 - Stream 170 */ 171#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 172 /* 0 - me 173 * 1 - pfp 174 * 2 - ce 175 */ 176 177#define PACKET3_ATOMIC_MEM 0x1E 178#define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 179#define ATOMIC_MEM_COMMAND(x) ((x) << 8) 180 /* 0 - single_pass_atomic. 181 * 1 - loop_until_compare_satisfied. 182 */ 183#define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25) 184 /* 0 - lru. 185 * 1 - stream. 186 */ 187#define ATOMIC_MEM_ENGINESEL(x) ((x) << 30) 188 /* 0 - micro_engine. 189 */ 190 191#define PACKET3_DMA_DATA 0x50 192/* 1. header 193 * 2. CONTROL 194 * 3. SRC_ADDR_LO or DATA [31:0] 195 * 4. SRC_ADDR_HI [31:0] 196 * 5. DST_ADDR_LO [31:0] 197 * 6. DST_ADDR_HI [7:0] 198 * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 199 */ 200/* CONTROL */ 201# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 202 /* 0 - ME 203 * 1 - PFP 204 */ 205# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 206 /* 0 - LRU 207 * 1 - Stream 208 * 2 - Bypass 209 */ 210# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 211# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 212 /* 0 - DST_ADDR using DAS 213 * 1 - GDS 214 * 3 - DST_ADDR using L2 215 */ 216# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 217 /* 0 - LRU 218 * 1 - Stream 219 * 2 - Bypass 220 */ 221# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 222# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 223 /* 0 - SRC_ADDR using SAS 224 * 1 - GDS 225 * 2 - DATA 226 * 3 - SRC_ADDR using L2 227 */ 228# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 229/* COMMAND */ 230# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 231# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 232 /* 0 - none 233 * 1 - 8 in 16 234 * 2 - 8 in 32 235 * 3 - 8 in 64 236 */ 237# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 238 /* 0 - none 239 * 1 - 8 in 16 240 * 2 - 8 in 32 241 * 3 - 8 in 64 242 */ 243# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 244 /* 0 - memory 245 * 1 - register 246 */ 247# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 248 /* 0 - memory 249 * 1 - register 250 */ 251# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 252# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 253# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 254 255#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 256 (((b) & 0x1) << 26) | \ 257 (((t) & 0x1) << 23) | \ 258 (((s) & 0x1) << 22) | \ 259 (((cnt) & 0xFFFFF) << 0)) 260#define SDMA_OPCODE_COPY_SI 3 261#define SDMA_OPCODE_CONSTANT_FILL_SI 13 262#define SDMA_NOP_SI 0xf 263#define GFX_COMPUTE_NOP_SI 0x80000000 264#define PACKET3_DMA_DATA_SI 0x41 265# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 266 /* 0 - ME 267 * 1 - PFP 268 */ 269# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 270 /* 0 - DST_ADDR using DAS 271 * 1 - GDS 272 * 3 - DST_ADDR using L2 273 */ 274# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 275 /* 0 - SRC_ADDR using SAS 276 * 1 - GDS 277 * 2 - DATA 278 * 3 - SRC_ADDR using L2 279 */ 280# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 281 282 283#define PKT3_CONTEXT_CONTROL 0x28 284#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 285#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 286#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 287 288#define PKT3_CLEAR_STATE 0x12 289 290#define PKT3_SET_SH_REG 0x76 291#define PACKET3_SET_SH_REG_START 0x00002c00 292 293#define PACKET3_DISPATCH_DIRECT 0x15 294#define PACKET3_EVENT_WRITE 0x46 295#define PACKET3_ACQUIRE_MEM 0x58 296#define PACKET3_SET_CONTEXT_REG 0x69 297#define PACKET3_SET_UCONFIG_REG 0x79 298#define PACKET3_DRAW_INDEX_AUTO 0x2D 299/* gfx 8 */ 300#define mmCOMPUTE_PGM_LO 0x2e0c 301#define mmCOMPUTE_PGM_RSRC1 0x2e12 302#define mmCOMPUTE_TMPRING_SIZE 0x2e18 303#define mmCOMPUTE_USER_DATA_0 0x2e40 304#define mmCOMPUTE_USER_DATA_1 0x2e41 305#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 306#define mmCOMPUTE_NUM_THREAD_X 0x2e07 307 308 309 310#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 311 ((num & 0x0000ff00) << 8) | \ 312 ((num & 0x00ff0000) >> 8) | \ 313 ((num & 0x000000ff) << 24)) 314 315 316/* Shader code 317 * void main() 318{ 319 320 float x = some_input; 321 for (unsigned i = 0; i < 1000000; i++) 322 x = sin(x); 323 324 u[0] = 42u; 325} 326*/ 327 328static uint32_t shader_bin[] = { 329 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 330 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 331 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 332 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 333}; 334 335#define CODE_OFFSET 512 336#define DATA_OFFSET 1024 337 338enum cs_type { 339 CS_BUFFERCLEAR, 340 CS_BUFFERCOPY, 341 CS_HANG, 342 CS_HANG_SLOW 343}; 344 345static const uint32_t bufferclear_cs_shader_gfx9[] = { 346 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08, 347 0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206, 348 0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000, 349 0xbf810000 350}; 351 352static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { 353 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, 354 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, 355 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, 356 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, 357 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } 358}; 359 360static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5; 361 362static const uint32_t buffercopy_cs_shader_gfx9[] = { 363 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08, 364 0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70, 365 0xe01c2000, 0x80010200, 0xbf810000 366}; 367 368static const uint32_t preamblecache_gfx9[] = { 369 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 370 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 371 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 372 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 373 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 374 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 375 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 376 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 377 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20, 378 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 379 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, 380 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 381 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 382 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 383 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 384 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 385 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 386 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, 387 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 388 0xc0017900, 0x24b, 0x0 389}; 390 391enum ps_type { 392 PS_CONST, 393 PS_TEX, 394 PS_HANG, 395 PS_HANG_SLOW 396}; 397 398static const uint32_t ps_const_shader_gfx9[] = { 399 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 400 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 401 0xC4001C0F, 0x00000100, 0xBF810000 402}; 403 404static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; 405 406static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { 407 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 408 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, 409 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, 410 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, 411 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, 412 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, 413 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, 414 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, 415 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, 416 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } 417 } 418}; 419 420static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { 421 0x00000004 422}; 423 424static const uint32_t ps_num_sh_registers_gfx9 = 2; 425 426static const uint32_t ps_const_sh_registers_gfx9[][2] = { 427 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, 428 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 429}; 430 431static const uint32_t ps_num_context_registers_gfx9 = 7; 432 433static const uint32_t ps_const_context_reg_gfx9[][2] = { 434 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 435 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, 436 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 437 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 438 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 439 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 440 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 441}; 442 443static const uint32_t ps_tex_shader_gfx9[] = { 444 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 445 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, 446 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, 447 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 448 0x00000100, 0xBF810000 449}; 450 451static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { 452 0x0000000B 453}; 454 455static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; 456 457static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { 458 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 459 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, 460 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, 461 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, 462 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 463 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 464 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 465 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 466 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 467 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } 468 } 469}; 470 471static const uint32_t ps_tex_sh_registers_gfx9[][2] = { 472 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, 473 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } 474}; 475 476static const uint32_t ps_tex_context_reg_gfx9[][2] = { 477 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 478 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, 479 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 480 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 481 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 482 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 483 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 484}; 485 486static const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 487 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 488 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, 489 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, 490 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, 491 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, 492 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, 493 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, 494 0xC400020F, 0x05060403, 0xBF810000 495}; 496 497static const uint32_t cached_cmd_gfx9[] = { 498 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 499 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 500 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 501 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12, 502 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 503 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 504 0xc0026900, 0x292, 0x20, 0x60201b8, 505 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 506}; 507 508unsigned int memcpy_ps_hang[] = { 509 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100, 510 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001, 511 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002, 512 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000, 513 0xF800180F, 0x03020100, 0xBF810000 514}; 515 516struct amdgpu_test_shader { 517 uint32_t *shader; 518 uint32_t header_length; 519 uint32_t body_length; 520 uint32_t foot_length; 521}; 522 523unsigned int memcpy_cs_hang_slow_ai_codes[] = { 524 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100, 525 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000 526}; 527 528struct amdgpu_test_shader memcpy_cs_hang_slow_ai = { 529 memcpy_cs_hang_slow_ai_codes, 530 4, 531 3, 532 1 533}; 534 535unsigned int memcpy_cs_hang_slow_rv_codes[] = { 536 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100, 537 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000 538}; 539 540struct amdgpu_test_shader memcpy_cs_hang_slow_rv = { 541 memcpy_cs_hang_slow_rv_codes, 542 4, 543 3, 544 1 545}; 546 547unsigned int memcpy_ps_hang_slow_ai_codes[] = { 548 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000, 549 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00, 550 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000, 551 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f, 552 0x03020100, 0xbf810000 553}; 554 555struct amdgpu_test_shader memcpy_ps_hang_slow_ai = { 556 memcpy_ps_hang_slow_ai_codes, 557 7, 558 2, 559 9 560}; 561 562int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, 563 unsigned alignment, unsigned heap, uint64_t alloc_flags, 564 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, 565 uint64_t *mc_address, 566 amdgpu_va_handle *va_handle) 567{ 568 struct amdgpu_bo_alloc_request request = {}; 569 amdgpu_bo_handle buf_handle; 570 amdgpu_va_handle handle; 571 uint64_t vmc_addr; 572 int r; 573 574 request.alloc_size = size; 575 request.phys_alignment = alignment; 576 request.preferred_heap = heap; 577 request.flags = alloc_flags; 578 579 r = amdgpu_bo_alloc(dev, &request, &buf_handle); 580 if (r) 581 return r; 582 583 r = amdgpu_va_range_alloc(dev, 584 amdgpu_gpu_va_range_general, 585 size, alignment, 0, &vmc_addr, 586 &handle, 0); 587 if (r) 588 goto error_va_alloc; 589 590 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr, 591 AMDGPU_VM_PAGE_READABLE | 592 AMDGPU_VM_PAGE_WRITEABLE | 593 AMDGPU_VM_PAGE_EXECUTABLE | 594 mapping_flags, 595 AMDGPU_VA_OP_MAP); 596 if (r) 597 goto error_va_map; 598 599 r = amdgpu_bo_cpu_map(buf_handle, cpu); 600 if (r) 601 goto error_cpu_map; 602 603 *bo = buf_handle; 604 *mc_address = vmc_addr; 605 *va_handle = handle; 606 607 return 0; 608 609 error_cpu_map: 610 amdgpu_bo_cpu_unmap(buf_handle); 611 612 error_va_map: 613 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 614 615 error_va_alloc: 616 amdgpu_bo_free(buf_handle); 617 return r; 618} 619 620 621 622CU_BOOL suite_basic_tests_enable(void) 623{ 624 625 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, 626 &minor_version, &device_handle)) 627 return CU_FALSE; 628 629 630 family_id = device_handle->info.family_id; 631 chip_id = device_handle->info.chip_external_rev; 632 chip_rev = device_handle->info.chip_rev; 633 634 if (amdgpu_device_deinitialize(device_handle)) 635 return CU_FALSE; 636 637 /* disable gfx engine basic test cases for some asics have no CPG */ 638 if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) { 639 if (amdgpu_set_test_active("Basic Tests", 640 "Command submission Test (GFX)", 641 CU_FALSE)) 642 fprintf(stderr, "test deactivation failed - %s\n", 643 CU_get_error_msg()); 644 645 if (amdgpu_set_test_active("Basic Tests", 646 "Command submission Test (Multi-Fence)", 647 CU_FALSE)) 648 fprintf(stderr, "test deactivation failed - %s\n", 649 CU_get_error_msg()); 650 651 if (amdgpu_set_test_active("Basic Tests", 652 "Sync dependency Test", 653 CU_FALSE)) 654 fprintf(stderr, "test deactivation failed - %s\n", 655 CU_get_error_msg()); 656 } 657 658 return CU_TRUE; 659} 660 661int suite_basic_tests_init(void) 662{ 663 struct amdgpu_gpu_info gpu_info = {0}; 664 int r; 665 666 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 667 &minor_version, &device_handle); 668 669 if (r) { 670 if ((r == -EACCES) && (errno == EACCES)) 671 printf("\n\nError:%s. " 672 "Hint:Try to run this test program as root.", 673 strerror(errno)); 674 return CUE_SINIT_FAILED; 675 } 676 677 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 678 if (r) 679 return CUE_SINIT_FAILED; 680 681 family_id = gpu_info.family_id; 682 683 return CUE_SUCCESS; 684} 685 686int suite_basic_tests_clean(void) 687{ 688 int r = amdgpu_device_deinitialize(device_handle); 689 690 if (r == 0) 691 return CUE_SUCCESS; 692 else 693 return CUE_SCLEAN_FAILED; 694} 695 696static void amdgpu_query_info_test(void) 697{ 698 struct amdgpu_gpu_info gpu_info = {0}; 699 uint32_t version, feature; 700 int r; 701 702 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 703 CU_ASSERT_EQUAL(r, 0); 704 705 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 706 0, &version, &feature); 707 CU_ASSERT_EQUAL(r, 0); 708} 709 710static void amdgpu_command_submission_gfx_separate_ibs(void) 711{ 712 amdgpu_context_handle context_handle; 713 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 714 void *ib_result_cpu, *ib_result_ce_cpu; 715 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 716 struct amdgpu_cs_request ibs_request = {0}; 717 struct amdgpu_cs_ib_info ib_info[2]; 718 struct amdgpu_cs_fence fence_status = {0}; 719 uint32_t *ptr; 720 uint32_t expired; 721 amdgpu_bo_list_handle bo_list; 722 amdgpu_va_handle va_handle, va_handle_ce; 723 int r, i = 0; 724 725 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 726 CU_ASSERT_EQUAL(r, 0); 727 728 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 729 AMDGPU_GEM_DOMAIN_GTT, 0, 730 &ib_result_handle, &ib_result_cpu, 731 &ib_result_mc_address, &va_handle); 732 CU_ASSERT_EQUAL(r, 0); 733 734 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 735 AMDGPU_GEM_DOMAIN_GTT, 0, 736 &ib_result_ce_handle, &ib_result_ce_cpu, 737 &ib_result_ce_mc_address, &va_handle_ce); 738 CU_ASSERT_EQUAL(r, 0); 739 740 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 741 ib_result_ce_handle, &bo_list); 742 CU_ASSERT_EQUAL(r, 0); 743 744 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 745 746 /* IT_SET_CE_DE_COUNTERS */ 747 ptr = ib_result_ce_cpu; 748 if (family_id != AMDGPU_FAMILY_SI) { 749 ptr[i++] = 0xc0008900; 750 ptr[i++] = 0; 751 } 752 ptr[i++] = 0xc0008400; 753 ptr[i++] = 1; 754 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 755 ib_info[0].size = i; 756 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 757 758 /* IT_WAIT_ON_CE_COUNTER */ 759 ptr = ib_result_cpu; 760 ptr[0] = 0xc0008600; 761 ptr[1] = 0x00000001; 762 ib_info[1].ib_mc_address = ib_result_mc_address; 763 ib_info[1].size = 2; 764 765 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 766 ibs_request.number_of_ibs = 2; 767 ibs_request.ibs = ib_info; 768 ibs_request.resources = bo_list; 769 ibs_request.fence_info.handle = NULL; 770 771 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 772 773 CU_ASSERT_EQUAL(r, 0); 774 775 fence_status.context = context_handle; 776 fence_status.ip_type = AMDGPU_HW_IP_GFX; 777 fence_status.ip_instance = 0; 778 fence_status.fence = ibs_request.seq_no; 779 780 r = amdgpu_cs_query_fence_status(&fence_status, 781 AMDGPU_TIMEOUT_INFINITE, 782 0, &expired); 783 CU_ASSERT_EQUAL(r, 0); 784 785 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 786 ib_result_mc_address, 4096); 787 CU_ASSERT_EQUAL(r, 0); 788 789 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 790 ib_result_ce_mc_address, 4096); 791 CU_ASSERT_EQUAL(r, 0); 792 793 r = amdgpu_bo_list_destroy(bo_list); 794 CU_ASSERT_EQUAL(r, 0); 795 796 r = amdgpu_cs_ctx_free(context_handle); 797 CU_ASSERT_EQUAL(r, 0); 798 799} 800 801static void amdgpu_command_submission_gfx_shared_ib(void) 802{ 803 amdgpu_context_handle context_handle; 804 amdgpu_bo_handle ib_result_handle; 805 void *ib_result_cpu; 806 uint64_t ib_result_mc_address; 807 struct amdgpu_cs_request ibs_request = {0}; 808 struct amdgpu_cs_ib_info ib_info[2]; 809 struct amdgpu_cs_fence fence_status = {0}; 810 uint32_t *ptr; 811 uint32_t expired; 812 amdgpu_bo_list_handle bo_list; 813 amdgpu_va_handle va_handle; 814 int r, i = 0; 815 816 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 817 CU_ASSERT_EQUAL(r, 0); 818 819 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 820 AMDGPU_GEM_DOMAIN_GTT, 0, 821 &ib_result_handle, &ib_result_cpu, 822 &ib_result_mc_address, &va_handle); 823 CU_ASSERT_EQUAL(r, 0); 824 825 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 826 &bo_list); 827 CU_ASSERT_EQUAL(r, 0); 828 829 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 830 831 /* IT_SET_CE_DE_COUNTERS */ 832 ptr = ib_result_cpu; 833 if (family_id != AMDGPU_FAMILY_SI) { 834 ptr[i++] = 0xc0008900; 835 ptr[i++] = 0; 836 } 837 ptr[i++] = 0xc0008400; 838 ptr[i++] = 1; 839 ib_info[0].ib_mc_address = ib_result_mc_address; 840 ib_info[0].size = i; 841 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 842 843 ptr = (uint32_t *)ib_result_cpu + 4; 844 ptr[0] = 0xc0008600; 845 ptr[1] = 0x00000001; 846 ib_info[1].ib_mc_address = ib_result_mc_address + 16; 847 ib_info[1].size = 2; 848 849 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 850 ibs_request.number_of_ibs = 2; 851 ibs_request.ibs = ib_info; 852 ibs_request.resources = bo_list; 853 ibs_request.fence_info.handle = NULL; 854 855 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 856 857 CU_ASSERT_EQUAL(r, 0); 858 859 fence_status.context = context_handle; 860 fence_status.ip_type = AMDGPU_HW_IP_GFX; 861 fence_status.ip_instance = 0; 862 fence_status.fence = ibs_request.seq_no; 863 864 r = amdgpu_cs_query_fence_status(&fence_status, 865 AMDGPU_TIMEOUT_INFINITE, 866 0, &expired); 867 CU_ASSERT_EQUAL(r, 0); 868 869 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 870 ib_result_mc_address, 4096); 871 CU_ASSERT_EQUAL(r, 0); 872 873 r = amdgpu_bo_list_destroy(bo_list); 874 CU_ASSERT_EQUAL(r, 0); 875 876 r = amdgpu_cs_ctx_free(context_handle); 877 CU_ASSERT_EQUAL(r, 0); 878} 879 880static void amdgpu_command_submission_gfx_cp_write_data(void) 881{ 882 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 883} 884 885static void amdgpu_command_submission_gfx_cp_const_fill(void) 886{ 887 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 888} 889 890static void amdgpu_command_submission_gfx_cp_copy_data(void) 891{ 892 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 893} 894 895static void amdgpu_bo_eviction_test(void) 896{ 897 const int sdma_write_length = 1024; 898 const int pm4_dw = 256; 899 amdgpu_context_handle context_handle; 900 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 901 amdgpu_bo_handle *resources; 902 uint32_t *pm4; 903 struct amdgpu_cs_ib_info *ib_info; 904 struct amdgpu_cs_request *ibs_request; 905 uint64_t bo1_mc, bo2_mc; 906 volatile unsigned char *bo1_cpu, *bo2_cpu; 907 int i, j, r, loop1, loop2; 908 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 909 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 910 struct amdgpu_heap_info vram_info, gtt_info; 911 912 pm4 = calloc(pm4_dw, sizeof(*pm4)); 913 CU_ASSERT_NOT_EQUAL(pm4, NULL); 914 915 ib_info = calloc(1, sizeof(*ib_info)); 916 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 917 918 ibs_request = calloc(1, sizeof(*ibs_request)); 919 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 920 921 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 922 CU_ASSERT_EQUAL(r, 0); 923 924 /* prepare resource */ 925 resources = calloc(4, sizeof(amdgpu_bo_handle)); 926 CU_ASSERT_NOT_EQUAL(resources, NULL); 927 928 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 929 0, &vram_info); 930 CU_ASSERT_EQUAL(r, 0); 931 932 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 933 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 934 CU_ASSERT_EQUAL(r, 0); 935 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 936 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 937 CU_ASSERT_EQUAL(r, 0); 938 939 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 940 0, >t_info); 941 CU_ASSERT_EQUAL(r, 0); 942 943 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 944 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 945 CU_ASSERT_EQUAL(r, 0); 946 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 947 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 948 CU_ASSERT_EQUAL(r, 0); 949 950 951 952 loop1 = loop2 = 0; 953 /* run 9 circle to test all mapping combination */ 954 while(loop1 < 2) { 955 while(loop2 < 2) { 956 /* allocate UC bo1for sDMA use */ 957 r = amdgpu_bo_alloc_and_map(device_handle, 958 sdma_write_length, 4096, 959 AMDGPU_GEM_DOMAIN_GTT, 960 gtt_flags[loop1], &bo1, 961 (void**)&bo1_cpu, &bo1_mc, 962 &bo1_va_handle); 963 CU_ASSERT_EQUAL(r, 0); 964 965 /* set bo1 */ 966 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 967 968 /* allocate UC bo2 for sDMA use */ 969 r = amdgpu_bo_alloc_and_map(device_handle, 970 sdma_write_length, 4096, 971 AMDGPU_GEM_DOMAIN_GTT, 972 gtt_flags[loop2], &bo2, 973 (void**)&bo2_cpu, &bo2_mc, 974 &bo2_va_handle); 975 CU_ASSERT_EQUAL(r, 0); 976 977 /* clear bo2 */ 978 memset((void*)bo2_cpu, 0, sdma_write_length); 979 980 resources[0] = bo1; 981 resources[1] = bo2; 982 resources[2] = vram_max[loop2]; 983 resources[3] = gtt_max[loop2]; 984 985 /* fulfill PM4: test DMA copy linear */ 986 i = j = 0; 987 if (family_id == AMDGPU_FAMILY_SI) { 988 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 989 sdma_write_length); 990 pm4[i++] = 0xffffffff & bo2_mc; 991 pm4[i++] = 0xffffffff & bo1_mc; 992 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 993 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 994 } else { 995 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 996 if (family_id >= AMDGPU_FAMILY_AI) 997 pm4[i++] = sdma_write_length - 1; 998 else 999 pm4[i++] = sdma_write_length; 1000 pm4[i++] = 0; 1001 pm4[i++] = 0xffffffff & bo1_mc; 1002 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1003 pm4[i++] = 0xffffffff & bo2_mc; 1004 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1005 } 1006 1007 amdgpu_test_exec_cs_helper(context_handle, 1008 AMDGPU_HW_IP_DMA, 0, 1009 i, pm4, 1010 4, resources, 1011 ib_info, ibs_request); 1012 1013 /* verify if SDMA test result meets with expected */ 1014 i = 0; 1015 while(i < sdma_write_length) { 1016 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 1017 } 1018 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 1019 sdma_write_length); 1020 CU_ASSERT_EQUAL(r, 0); 1021 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 1022 sdma_write_length); 1023 CU_ASSERT_EQUAL(r, 0); 1024 loop2++; 1025 } 1026 loop2 = 0; 1027 loop1++; 1028 } 1029 amdgpu_bo_free(vram_max[0]); 1030 amdgpu_bo_free(vram_max[1]); 1031 amdgpu_bo_free(gtt_max[0]); 1032 amdgpu_bo_free(gtt_max[1]); 1033 /* clean resources */ 1034 free(resources); 1035 free(ibs_request); 1036 free(ib_info); 1037 free(pm4); 1038 1039 /* end of test */ 1040 r = amdgpu_cs_ctx_free(context_handle); 1041 CU_ASSERT_EQUAL(r, 0); 1042} 1043 1044 1045static void amdgpu_command_submission_gfx(void) 1046{ 1047 /* write data using the CP */ 1048 amdgpu_command_submission_gfx_cp_write_data(); 1049 /* const fill using the CP */ 1050 amdgpu_command_submission_gfx_cp_const_fill(); 1051 /* copy data using the CP */ 1052 amdgpu_command_submission_gfx_cp_copy_data(); 1053 /* separate IB buffers for multi-IB submission */ 1054 amdgpu_command_submission_gfx_separate_ibs(); 1055 /* shared IB buffer for multi-IB submission */ 1056 amdgpu_command_submission_gfx_shared_ib(); 1057} 1058 1059static void amdgpu_semaphore_test(void) 1060{ 1061 amdgpu_context_handle context_handle[2]; 1062 amdgpu_semaphore_handle sem; 1063 amdgpu_bo_handle ib_result_handle[2]; 1064 void *ib_result_cpu[2]; 1065 uint64_t ib_result_mc_address[2]; 1066 struct amdgpu_cs_request ibs_request[2] = {0}; 1067 struct amdgpu_cs_ib_info ib_info[2] = {0}; 1068 struct amdgpu_cs_fence fence_status = {0}; 1069 uint32_t *ptr; 1070 uint32_t expired; 1071 uint32_t sdma_nop, gfx_nop; 1072 amdgpu_bo_list_handle bo_list[2]; 1073 amdgpu_va_handle va_handle[2]; 1074 int r, i; 1075 struct amdgpu_gpu_info gpu_info = {0}; 1076 unsigned gc_ip_type; 1077 1078 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 1079 CU_ASSERT_EQUAL(r, 0); 1080 1081 gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ? 1082 AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX; 1083 1084 if (family_id == AMDGPU_FAMILY_SI) { 1085 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 1086 gfx_nop = GFX_COMPUTE_NOP_SI; 1087 } else { 1088 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 1089 gfx_nop = GFX_COMPUTE_NOP; 1090 } 1091 1092 r = amdgpu_cs_create_semaphore(&sem); 1093 CU_ASSERT_EQUAL(r, 0); 1094 for (i = 0; i < 2; i++) { 1095 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 1096 CU_ASSERT_EQUAL(r, 0); 1097 1098 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1099 AMDGPU_GEM_DOMAIN_GTT, 0, 1100 &ib_result_handle[i], &ib_result_cpu[i], 1101 &ib_result_mc_address[i], &va_handle[i]); 1102 CU_ASSERT_EQUAL(r, 0); 1103 1104 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 1105 NULL, &bo_list[i]); 1106 CU_ASSERT_EQUAL(r, 0); 1107 } 1108 1109 /* 1. same context different engine */ 1110 ptr = ib_result_cpu[0]; 1111 ptr[0] = sdma_nop; 1112 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 1113 ib_info[0].size = 1; 1114 1115 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 1116 ibs_request[0].number_of_ibs = 1; 1117 ibs_request[0].ibs = &ib_info[0]; 1118 ibs_request[0].resources = bo_list[0]; 1119 ibs_request[0].fence_info.handle = NULL; 1120 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 1121 CU_ASSERT_EQUAL(r, 0); 1122 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 1123 CU_ASSERT_EQUAL(r, 0); 1124 1125 r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem); 1126 CU_ASSERT_EQUAL(r, 0); 1127 ptr = ib_result_cpu[1]; 1128 ptr[0] = gfx_nop; 1129 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 1130 ib_info[1].size = 1; 1131 1132 ibs_request[1].ip_type = gc_ip_type; 1133 ibs_request[1].number_of_ibs = 1; 1134 ibs_request[1].ibs = &ib_info[1]; 1135 ibs_request[1].resources = bo_list[1]; 1136 ibs_request[1].fence_info.handle = NULL; 1137 1138 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 1139 CU_ASSERT_EQUAL(r, 0); 1140 1141 fence_status.context = context_handle[0]; 1142 fence_status.ip_type = gc_ip_type; 1143 fence_status.ip_instance = 0; 1144 fence_status.fence = ibs_request[1].seq_no; 1145 r = amdgpu_cs_query_fence_status(&fence_status, 1146 500000000, 0, &expired); 1147 CU_ASSERT_EQUAL(r, 0); 1148 CU_ASSERT_EQUAL(expired, true); 1149 1150 /* 2. same engine different context */ 1151 ptr = ib_result_cpu[0]; 1152 ptr[0] = gfx_nop; 1153 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 1154 ib_info[0].size = 1; 1155 1156 ibs_request[0].ip_type = gc_ip_type; 1157 ibs_request[0].number_of_ibs = 1; 1158 ibs_request[0].ibs = &ib_info[0]; 1159 ibs_request[0].resources = bo_list[0]; 1160 ibs_request[0].fence_info.handle = NULL; 1161 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 1162 CU_ASSERT_EQUAL(r, 0); 1163 r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem); 1164 CU_ASSERT_EQUAL(r, 0); 1165 1166 r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem); 1167 CU_ASSERT_EQUAL(r, 0); 1168 ptr = ib_result_cpu[1]; 1169 ptr[0] = gfx_nop; 1170 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 1171 ib_info[1].size = 1; 1172 1173 ibs_request[1].ip_type = gc_ip_type; 1174 ibs_request[1].number_of_ibs = 1; 1175 ibs_request[1].ibs = &ib_info[1]; 1176 ibs_request[1].resources = bo_list[1]; 1177 ibs_request[1].fence_info.handle = NULL; 1178 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 1179 1180 CU_ASSERT_EQUAL(r, 0); 1181 1182 fence_status.context = context_handle[1]; 1183 fence_status.ip_type = gc_ip_type; 1184 fence_status.ip_instance = 0; 1185 fence_status.fence = ibs_request[1].seq_no; 1186 r = amdgpu_cs_query_fence_status(&fence_status, 1187 500000000, 0, &expired); 1188 CU_ASSERT_EQUAL(r, 0); 1189 CU_ASSERT_EQUAL(expired, true); 1190 1191 for (i = 0; i < 2; i++) { 1192 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 1193 ib_result_mc_address[i], 4096); 1194 CU_ASSERT_EQUAL(r, 0); 1195 1196 r = amdgpu_bo_list_destroy(bo_list[i]); 1197 CU_ASSERT_EQUAL(r, 0); 1198 1199 r = amdgpu_cs_ctx_free(context_handle[i]); 1200 CU_ASSERT_EQUAL(r, 0); 1201 } 1202 1203 r = amdgpu_cs_destroy_semaphore(sem); 1204 CU_ASSERT_EQUAL(r, 0); 1205} 1206 1207static void amdgpu_command_submission_compute_nop(void) 1208{ 1209 amdgpu_context_handle context_handle; 1210 amdgpu_bo_handle ib_result_handle; 1211 void *ib_result_cpu; 1212 uint64_t ib_result_mc_address; 1213 struct amdgpu_cs_request ibs_request; 1214 struct amdgpu_cs_ib_info ib_info; 1215 struct amdgpu_cs_fence fence_status; 1216 uint32_t *ptr; 1217 uint32_t expired; 1218 int r, instance; 1219 amdgpu_bo_list_handle bo_list; 1220 amdgpu_va_handle va_handle; 1221 struct drm_amdgpu_info_hw_ip info; 1222 1223 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 1224 CU_ASSERT_EQUAL(r, 0); 1225 1226 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1227 CU_ASSERT_EQUAL(r, 0); 1228 1229 for (instance = 0; (1 << instance) & info.available_rings; instance++) { 1230 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1231 AMDGPU_GEM_DOMAIN_GTT, 0, 1232 &ib_result_handle, &ib_result_cpu, 1233 &ib_result_mc_address, &va_handle); 1234 CU_ASSERT_EQUAL(r, 0); 1235 1236 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 1237 &bo_list); 1238 CU_ASSERT_EQUAL(r, 0); 1239 1240 ptr = ib_result_cpu; 1241 memset(ptr, 0, 16); 1242 ptr[0]=PACKET3(PACKET3_NOP, 14); 1243 1244 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 1245 ib_info.ib_mc_address = ib_result_mc_address; 1246 ib_info.size = 16; 1247 1248 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 1249 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 1250 ibs_request.ring = instance; 1251 ibs_request.number_of_ibs = 1; 1252 ibs_request.ibs = &ib_info; 1253 ibs_request.resources = bo_list; 1254 ibs_request.fence_info.handle = NULL; 1255 1256 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 1257 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 1258 CU_ASSERT_EQUAL(r, 0); 1259 1260 fence_status.context = context_handle; 1261 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 1262 fence_status.ip_instance = 0; 1263 fence_status.ring = instance; 1264 fence_status.fence = ibs_request.seq_no; 1265 1266 r = amdgpu_cs_query_fence_status(&fence_status, 1267 AMDGPU_TIMEOUT_INFINITE, 1268 0, &expired); 1269 CU_ASSERT_EQUAL(r, 0); 1270 1271 r = amdgpu_bo_list_destroy(bo_list); 1272 CU_ASSERT_EQUAL(r, 0); 1273 1274 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1275 ib_result_mc_address, 4096); 1276 CU_ASSERT_EQUAL(r, 0); 1277 } 1278 1279 r = amdgpu_cs_ctx_free(context_handle); 1280 CU_ASSERT_EQUAL(r, 0); 1281} 1282 1283static void amdgpu_command_submission_compute_cp_write_data(void) 1284{ 1285 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 1286} 1287 1288static void amdgpu_command_submission_compute_cp_const_fill(void) 1289{ 1290 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 1291} 1292 1293static void amdgpu_command_submission_compute_cp_copy_data(void) 1294{ 1295 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 1296} 1297 1298static void amdgpu_command_submission_compute(void) 1299{ 1300 /* write data using the CP */ 1301 amdgpu_command_submission_compute_cp_write_data(); 1302 /* const fill using the CP */ 1303 amdgpu_command_submission_compute_cp_const_fill(); 1304 /* copy data using the CP */ 1305 amdgpu_command_submission_compute_cp_copy_data(); 1306 /* nop test */ 1307 amdgpu_command_submission_compute_nop(); 1308} 1309 1310/* 1311 * caller need create/release: 1312 * pm4_src, resources, ib_info, and ibs_request 1313 * submit command stream described in ibs_request and wait for this IB accomplished 1314 */ 1315void 1316amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle, 1317 amdgpu_context_handle context_handle, 1318 unsigned ip_type, int instance, int pm4_dw, 1319 uint32_t *pm4_src, int res_cnt, 1320 amdgpu_bo_handle *resources, 1321 struct amdgpu_cs_ib_info *ib_info, 1322 struct amdgpu_cs_request *ibs_request, 1323 bool secure) 1324{ 1325 int r; 1326 uint32_t expired; 1327 uint32_t *ring_ptr; 1328 amdgpu_bo_handle ib_result_handle; 1329 void *ib_result_cpu; 1330 uint64_t ib_result_mc_address; 1331 struct amdgpu_cs_fence fence_status = {0}; 1332 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 1333 amdgpu_va_handle va_handle; 1334 1335 /* prepare CS */ 1336 CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 1337 CU_ASSERT_NOT_EQUAL(resources, NULL); 1338 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1339 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1340 CU_ASSERT_TRUE(pm4_dw <= 1024); 1341 1342 /* allocate IB */ 1343 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1344 AMDGPU_GEM_DOMAIN_GTT, 0, 1345 &ib_result_handle, &ib_result_cpu, 1346 &ib_result_mc_address, &va_handle); 1347 CU_ASSERT_EQUAL(r, 0); 1348 1349 /* copy PM4 packet to ring from caller */ 1350 ring_ptr = ib_result_cpu; 1351 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 1352 1353 ib_info->ib_mc_address = ib_result_mc_address; 1354 ib_info->size = pm4_dw; 1355 if (secure) 1356 ib_info->flags |= AMDGPU_IB_FLAGS_SECURE; 1357 1358 ibs_request->ip_type = ip_type; 1359 ibs_request->ring = instance; 1360 ibs_request->number_of_ibs = 1; 1361 ibs_request->ibs = ib_info; 1362 ibs_request->fence_info.handle = NULL; 1363 1364 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 1365 all_res[res_cnt] = ib_result_handle; 1366 1367 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 1368 NULL, &ibs_request->resources); 1369 CU_ASSERT_EQUAL(r, 0); 1370 1371 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1372 1373 /* submit CS */ 1374 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 1375 CU_ASSERT_EQUAL(r, 0); 1376 1377 r = amdgpu_bo_list_destroy(ibs_request->resources); 1378 CU_ASSERT_EQUAL(r, 0); 1379 1380 fence_status.ip_type = ip_type; 1381 fence_status.ip_instance = 0; 1382 fence_status.ring = ibs_request->ring; 1383 fence_status.context = context_handle; 1384 fence_status.fence = ibs_request->seq_no; 1385 1386 /* wait for IB accomplished */ 1387 r = amdgpu_cs_query_fence_status(&fence_status, 1388 AMDGPU_TIMEOUT_INFINITE, 1389 0, &expired); 1390 CU_ASSERT_EQUAL(r, 0); 1391 CU_ASSERT_EQUAL(expired, true); 1392 1393 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1394 ib_result_mc_address, 4096); 1395 CU_ASSERT_EQUAL(r, 0); 1396} 1397 1398static void 1399amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 1400 unsigned ip_type, int instance, int pm4_dw, 1401 uint32_t *pm4_src, int res_cnt, 1402 amdgpu_bo_handle *resources, 1403 struct amdgpu_cs_ib_info *ib_info, 1404 struct amdgpu_cs_request *ibs_request) 1405{ 1406 amdgpu_test_exec_cs_helper_raw(device_handle, context_handle, 1407 ip_type, instance, pm4_dw, pm4_src, 1408 res_cnt, resources, ib_info, 1409 ibs_request, false); 1410} 1411 1412void 1413amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle 1414 device, unsigned 1415 ip_type, bool secure) 1416{ 1417 const int sdma_write_length = 128; 1418 const int pm4_dw = 256; 1419 amdgpu_context_handle context_handle; 1420 amdgpu_bo_handle bo; 1421 amdgpu_bo_handle *resources; 1422 uint32_t *pm4; 1423 struct amdgpu_cs_ib_info *ib_info; 1424 struct amdgpu_cs_request *ibs_request; 1425 uint64_t bo_mc; 1426 volatile uint32_t *bo_cpu; 1427 uint32_t bo_cpu_origin; 1428 int i, j, r, loop, ring_id; 1429 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1430 amdgpu_va_handle va_handle; 1431 struct drm_amdgpu_info_hw_ip hw_ip_info; 1432 1433 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1434 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1435 1436 ib_info = calloc(1, sizeof(*ib_info)); 1437 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1438 1439 ibs_request = calloc(1, sizeof(*ibs_request)); 1440 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1441 1442 r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info); 1443 CU_ASSERT_EQUAL(r, 0); 1444 1445 for (i = 0; secure && (i < 2); i++) 1446 gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED; 1447 1448 r = amdgpu_cs_ctx_create(device, &context_handle); 1449 1450 CU_ASSERT_EQUAL(r, 0); 1451 1452 /* prepare resource */ 1453 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1454 CU_ASSERT_NOT_EQUAL(resources, NULL); 1455 1456 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1457 loop = 0; 1458 while(loop < 2) { 1459 /* allocate UC bo for sDMA use */ 1460 r = amdgpu_bo_alloc_and_map(device, 1461 sdma_write_length * sizeof(uint32_t), 1462 4096, AMDGPU_GEM_DOMAIN_GTT, 1463 gtt_flags[loop], &bo, (void**)&bo_cpu, 1464 &bo_mc, &va_handle); 1465 CU_ASSERT_EQUAL(r, 0); 1466 1467 /* clear bo */ 1468 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 1469 1470 resources[0] = bo; 1471 1472 /* fulfill PM4: test DMA write-linear */ 1473 i = j = 0; 1474 if (ip_type == AMDGPU_HW_IP_DMA) { 1475 if (family_id == AMDGPU_FAMILY_SI) 1476 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1477 sdma_write_length); 1478 else 1479 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1480 SDMA_WRITE_SUB_OPCODE_LINEAR, 1481 secure ? SDMA_ATOMIC_TMZ(1) : 0); 1482 pm4[i++] = 0xfffffffc & bo_mc; 1483 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1484 if (family_id >= AMDGPU_FAMILY_AI) 1485 pm4[i++] = sdma_write_length - 1; 1486 else if (family_id != AMDGPU_FAMILY_SI) 1487 pm4[i++] = sdma_write_length; 1488 while(j++ < sdma_write_length) 1489 pm4[i++] = 0xdeadbeaf; 1490 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1491 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1492 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 1493 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1494 pm4[i++] = 0xfffffffc & bo_mc; 1495 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1496 while(j++ < sdma_write_length) 1497 pm4[i++] = 0xdeadbeaf; 1498 } 1499 1500 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1501 ip_type, ring_id, i, pm4, 1502 1, resources, ib_info, 1503 ibs_request, secure); 1504 1505 /* verify if SDMA test result meets with expected */ 1506 i = 0; 1507 if (!secure) { 1508 while(i < sdma_write_length) { 1509 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1510 } 1511 } else if (ip_type == AMDGPU_HW_IP_GFX) { 1512 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 1513 pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7); 1514 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 1515 * command, 1-loop_until_compare_satisfied. 1516 * single_pass_atomic, 0-lru 1517 * engine_sel, 0-micro_engine 1518 */ 1519 pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 | 1520 ATOMIC_MEM_COMMAND(1) | 1521 ATOMIC_MEM_CACHEPOLICAY(0) | 1522 ATOMIC_MEM_ENGINESEL(0)); 1523 pm4[i++] = 0xfffffffc & bo_mc; 1524 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1525 pm4[i++] = 0x12345678; 1526 pm4[i++] = 0x0; 1527 pm4[i++] = 0xdeadbeaf; 1528 pm4[i++] = 0x0; 1529 pm4[i++] = 0x100; 1530 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1531 ip_type, ring_id, i, pm4, 1532 1, resources, ib_info, 1533 ibs_request, true); 1534 } else if (ip_type == AMDGPU_HW_IP_DMA) { 1535 /* restore the bo_cpu to compare */ 1536 bo_cpu_origin = bo_cpu[0]; 1537 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 1538 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 1539 * loop, 1-loop_until_compare_satisfied. 1540 * single_pass_atomic, 0-lru 1541 */ 1542 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 1543 0, 1544 SDMA_ATOMIC_LOOP(1) | 1545 SDMA_ATOMIC_TMZ(1) | 1546 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 1547 pm4[i++] = 0xfffffffc & bo_mc; 1548 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1549 pm4[i++] = 0x12345678; 1550 pm4[i++] = 0x0; 1551 pm4[i++] = 0xdeadbeaf; 1552 pm4[i++] = 0x0; 1553 pm4[i++] = 0x100; 1554 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1555 ip_type, ring_id, i, pm4, 1556 1, resources, ib_info, 1557 ibs_request, true); 1558 /* DMA's atomic behavir is unlike GFX 1559 * If the comparing data is not equal to destination data, 1560 * For GFX, loop again till gfx timeout(system hang). 1561 * For DMA, loop again till timer expired and then send interrupt. 1562 * So testcase can't use interrupt mechanism. 1563 * We take another way to verify. When the comparing data is not 1564 * equal to destination data, overwrite the source data to the destination 1565 * buffer. Otherwise, original destination data unchanged. 1566 * So if the bo_cpu data is overwritten, the result is passed. 1567 */ 1568 CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin); 1569 1570 /* compare again for the case of dest_data != cmp_data */ 1571 i = 0; 1572 /* restore again, here dest_data should be */ 1573 bo_cpu_origin = bo_cpu[0]; 1574 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 1575 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 1576 0, 1577 SDMA_ATOMIC_LOOP(1) | 1578 SDMA_ATOMIC_TMZ(1) | 1579 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 1580 pm4[i++] = 0xfffffffc & bo_mc; 1581 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1582 pm4[i++] = 0x87654321; 1583 pm4[i++] = 0x0; 1584 pm4[i++] = 0xdeadbeaf; 1585 pm4[i++] = 0x0; 1586 pm4[i++] = 0x100; 1587 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1588 ip_type, ring_id, i, pm4, 1589 1, resources, ib_info, 1590 ibs_request, true); 1591 /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/ 1592 CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin); 1593 } 1594 1595 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1596 sdma_write_length * sizeof(uint32_t)); 1597 CU_ASSERT_EQUAL(r, 0); 1598 loop++; 1599 } 1600 } 1601 /* clean resources */ 1602 free(resources); 1603 free(ibs_request); 1604 free(ib_info); 1605 free(pm4); 1606 1607 /* end of test */ 1608 r = amdgpu_cs_ctx_free(context_handle); 1609 CU_ASSERT_EQUAL(r, 0); 1610} 1611 1612static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 1613{ 1614 amdgpu_command_submission_write_linear_helper_with_secure(device_handle, 1615 ip_type, 1616 false); 1617} 1618 1619static void amdgpu_command_submission_sdma_write_linear(void) 1620{ 1621 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 1622} 1623 1624static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 1625{ 1626 const int sdma_write_length = 1024 * 1024; 1627 const int pm4_dw = 256; 1628 amdgpu_context_handle context_handle; 1629 amdgpu_bo_handle bo; 1630 amdgpu_bo_handle *resources; 1631 uint32_t *pm4; 1632 struct amdgpu_cs_ib_info *ib_info; 1633 struct amdgpu_cs_request *ibs_request; 1634 uint64_t bo_mc; 1635 volatile uint32_t *bo_cpu; 1636 int i, j, r, loop, ring_id; 1637 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1638 amdgpu_va_handle va_handle; 1639 struct drm_amdgpu_info_hw_ip hw_ip_info; 1640 1641 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1642 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1643 1644 ib_info = calloc(1, sizeof(*ib_info)); 1645 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1646 1647 ibs_request = calloc(1, sizeof(*ibs_request)); 1648 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1649 1650 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1651 CU_ASSERT_EQUAL(r, 0); 1652 1653 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1654 CU_ASSERT_EQUAL(r, 0); 1655 1656 /* prepare resource */ 1657 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1658 CU_ASSERT_NOT_EQUAL(resources, NULL); 1659 1660 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1661 loop = 0; 1662 while(loop < 2) { 1663 /* allocate UC bo for sDMA use */ 1664 r = amdgpu_bo_alloc_and_map(device_handle, 1665 sdma_write_length, 4096, 1666 AMDGPU_GEM_DOMAIN_GTT, 1667 gtt_flags[loop], &bo, (void**)&bo_cpu, 1668 &bo_mc, &va_handle); 1669 CU_ASSERT_EQUAL(r, 0); 1670 1671 /* clear bo */ 1672 memset((void*)bo_cpu, 0, sdma_write_length); 1673 1674 resources[0] = bo; 1675 1676 /* fulfill PM4: test DMA const fill */ 1677 i = j = 0; 1678 if (ip_type == AMDGPU_HW_IP_DMA) { 1679 if (family_id == AMDGPU_FAMILY_SI) { 1680 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 1681 0, 0, 0, 1682 sdma_write_length / 4); 1683 pm4[i++] = 0xfffffffc & bo_mc; 1684 pm4[i++] = 0xdeadbeaf; 1685 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 1686 } else { 1687 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 1688 SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 1689 pm4[i++] = 0xffffffff & bo_mc; 1690 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1691 pm4[i++] = 0xdeadbeaf; 1692 if (family_id >= AMDGPU_FAMILY_AI) 1693 pm4[i++] = sdma_write_length - 1; 1694 else 1695 pm4[i++] = sdma_write_length; 1696 } 1697 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1698 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1699 if (family_id == AMDGPU_FAMILY_SI) { 1700 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1701 pm4[i++] = 0xdeadbeaf; 1702 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1703 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1704 PACKET3_DMA_DATA_SI_SRC_SEL(2) | 1705 PACKET3_DMA_DATA_SI_CP_SYNC; 1706 pm4[i++] = 0xffffffff & bo_mc; 1707 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1708 pm4[i++] = sdma_write_length; 1709 } else { 1710 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1711 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1712 PACKET3_DMA_DATA_DST_SEL(0) | 1713 PACKET3_DMA_DATA_SRC_SEL(2) | 1714 PACKET3_DMA_DATA_CP_SYNC; 1715 pm4[i++] = 0xdeadbeaf; 1716 pm4[i++] = 0; 1717 pm4[i++] = 0xfffffffc & bo_mc; 1718 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1719 pm4[i++] = sdma_write_length; 1720 } 1721 } 1722 1723 amdgpu_test_exec_cs_helper(context_handle, 1724 ip_type, ring_id, 1725 i, pm4, 1726 1, resources, 1727 ib_info, ibs_request); 1728 1729 /* verify if SDMA test result meets with expected */ 1730 i = 0; 1731 while(i < (sdma_write_length / 4)) { 1732 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1733 } 1734 1735 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1736 sdma_write_length); 1737 CU_ASSERT_EQUAL(r, 0); 1738 loop++; 1739 } 1740 } 1741 /* clean resources */ 1742 free(resources); 1743 free(ibs_request); 1744 free(ib_info); 1745 free(pm4); 1746 1747 /* end of test */ 1748 r = amdgpu_cs_ctx_free(context_handle); 1749 CU_ASSERT_EQUAL(r, 0); 1750} 1751 1752static void amdgpu_command_submission_sdma_const_fill(void) 1753{ 1754 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 1755} 1756 1757static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 1758{ 1759 const int sdma_write_length = 1024; 1760 const int pm4_dw = 256; 1761 amdgpu_context_handle context_handle; 1762 amdgpu_bo_handle bo1, bo2; 1763 amdgpu_bo_handle *resources; 1764 uint32_t *pm4; 1765 struct amdgpu_cs_ib_info *ib_info; 1766 struct amdgpu_cs_request *ibs_request; 1767 uint64_t bo1_mc, bo2_mc; 1768 volatile unsigned char *bo1_cpu, *bo2_cpu; 1769 int i, j, r, loop1, loop2, ring_id; 1770 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1771 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 1772 struct drm_amdgpu_info_hw_ip hw_ip_info; 1773 1774 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1775 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1776 1777 ib_info = calloc(1, sizeof(*ib_info)); 1778 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1779 1780 ibs_request = calloc(1, sizeof(*ibs_request)); 1781 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1782 1783 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1784 CU_ASSERT_EQUAL(r, 0); 1785 1786 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1787 CU_ASSERT_EQUAL(r, 0); 1788 1789 /* prepare resource */ 1790 resources = calloc(2, sizeof(amdgpu_bo_handle)); 1791 CU_ASSERT_NOT_EQUAL(resources, NULL); 1792 1793 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1794 loop1 = loop2 = 0; 1795 /* run 9 circle to test all mapping combination */ 1796 while(loop1 < 2) { 1797 while(loop2 < 2) { 1798 /* allocate UC bo1for sDMA use */ 1799 r = amdgpu_bo_alloc_and_map(device_handle, 1800 sdma_write_length, 4096, 1801 AMDGPU_GEM_DOMAIN_GTT, 1802 gtt_flags[loop1], &bo1, 1803 (void**)&bo1_cpu, &bo1_mc, 1804 &bo1_va_handle); 1805 CU_ASSERT_EQUAL(r, 0); 1806 1807 /* set bo1 */ 1808 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 1809 1810 /* allocate UC bo2 for sDMA use */ 1811 r = amdgpu_bo_alloc_and_map(device_handle, 1812 sdma_write_length, 4096, 1813 AMDGPU_GEM_DOMAIN_GTT, 1814 gtt_flags[loop2], &bo2, 1815 (void**)&bo2_cpu, &bo2_mc, 1816 &bo2_va_handle); 1817 CU_ASSERT_EQUAL(r, 0); 1818 1819 /* clear bo2 */ 1820 memset((void*)bo2_cpu, 0, sdma_write_length); 1821 1822 resources[0] = bo1; 1823 resources[1] = bo2; 1824 1825 /* fulfill PM4: test DMA copy linear */ 1826 i = j = 0; 1827 if (ip_type == AMDGPU_HW_IP_DMA) { 1828 if (family_id == AMDGPU_FAMILY_SI) { 1829 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 1830 0, 0, 0, 1831 sdma_write_length); 1832 pm4[i++] = 0xffffffff & bo2_mc; 1833 pm4[i++] = 0xffffffff & bo1_mc; 1834 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1835 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1836 } else { 1837 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 1838 SDMA_COPY_SUB_OPCODE_LINEAR, 1839 0); 1840 if (family_id >= AMDGPU_FAMILY_AI) 1841 pm4[i++] = sdma_write_length - 1; 1842 else 1843 pm4[i++] = sdma_write_length; 1844 pm4[i++] = 0; 1845 pm4[i++] = 0xffffffff & bo1_mc; 1846 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1847 pm4[i++] = 0xffffffff & bo2_mc; 1848 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1849 } 1850 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1851 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1852 if (family_id == AMDGPU_FAMILY_SI) { 1853 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1854 pm4[i++] = 0xfffffffc & bo1_mc; 1855 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1856 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1857 PACKET3_DMA_DATA_SI_SRC_SEL(0) | 1858 PACKET3_DMA_DATA_SI_CP_SYNC | 1859 (0xffff00000000 & bo1_mc) >> 32; 1860 pm4[i++] = 0xfffffffc & bo2_mc; 1861 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1862 pm4[i++] = sdma_write_length; 1863 } else { 1864 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1865 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1866 PACKET3_DMA_DATA_DST_SEL(0) | 1867 PACKET3_DMA_DATA_SRC_SEL(0) | 1868 PACKET3_DMA_DATA_CP_SYNC; 1869 pm4[i++] = 0xfffffffc & bo1_mc; 1870 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1871 pm4[i++] = 0xfffffffc & bo2_mc; 1872 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1873 pm4[i++] = sdma_write_length; 1874 } 1875 } 1876 1877 amdgpu_test_exec_cs_helper(context_handle, 1878 ip_type, ring_id, 1879 i, pm4, 1880 2, resources, 1881 ib_info, ibs_request); 1882 1883 /* verify if SDMA test result meets with expected */ 1884 i = 0; 1885 while(i < sdma_write_length) { 1886 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 1887 } 1888 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 1889 sdma_write_length); 1890 CU_ASSERT_EQUAL(r, 0); 1891 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 1892 sdma_write_length); 1893 CU_ASSERT_EQUAL(r, 0); 1894 loop2++; 1895 } 1896 loop1++; 1897 } 1898 } 1899 /* clean resources */ 1900 free(resources); 1901 free(ibs_request); 1902 free(ib_info); 1903 free(pm4); 1904 1905 /* end of test */ 1906 r = amdgpu_cs_ctx_free(context_handle); 1907 CU_ASSERT_EQUAL(r, 0); 1908} 1909 1910static void amdgpu_command_submission_sdma_copy_linear(void) 1911{ 1912 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 1913} 1914 1915static void amdgpu_command_submission_sdma(void) 1916{ 1917 amdgpu_command_submission_sdma_write_linear(); 1918 amdgpu_command_submission_sdma_const_fill(); 1919 amdgpu_command_submission_sdma_copy_linear(); 1920} 1921 1922static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1923{ 1924 amdgpu_context_handle context_handle; 1925 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1926 void *ib_result_cpu, *ib_result_ce_cpu; 1927 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1928 struct amdgpu_cs_request ibs_request[2] = {0}; 1929 struct amdgpu_cs_ib_info ib_info[2]; 1930 struct amdgpu_cs_fence fence_status[2] = {0}; 1931 uint32_t *ptr; 1932 uint32_t expired; 1933 amdgpu_bo_list_handle bo_list; 1934 amdgpu_va_handle va_handle, va_handle_ce; 1935 int r; 1936 int i = 0, ib_cs_num = 2; 1937 1938 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1939 CU_ASSERT_EQUAL(r, 0); 1940 1941 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1942 AMDGPU_GEM_DOMAIN_GTT, 0, 1943 &ib_result_handle, &ib_result_cpu, 1944 &ib_result_mc_address, &va_handle); 1945 CU_ASSERT_EQUAL(r, 0); 1946 1947 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1948 AMDGPU_GEM_DOMAIN_GTT, 0, 1949 &ib_result_ce_handle, &ib_result_ce_cpu, 1950 &ib_result_ce_mc_address, &va_handle_ce); 1951 CU_ASSERT_EQUAL(r, 0); 1952 1953 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1954 ib_result_ce_handle, &bo_list); 1955 CU_ASSERT_EQUAL(r, 0); 1956 1957 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1958 1959 /* IT_SET_CE_DE_COUNTERS */ 1960 ptr = ib_result_ce_cpu; 1961 if (family_id != AMDGPU_FAMILY_SI) { 1962 ptr[i++] = 0xc0008900; 1963 ptr[i++] = 0; 1964 } 1965 ptr[i++] = 0xc0008400; 1966 ptr[i++] = 1; 1967 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1968 ib_info[0].size = i; 1969 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1970 1971 /* IT_WAIT_ON_CE_COUNTER */ 1972 ptr = ib_result_cpu; 1973 ptr[0] = 0xc0008600; 1974 ptr[1] = 0x00000001; 1975 ib_info[1].ib_mc_address = ib_result_mc_address; 1976 ib_info[1].size = 2; 1977 1978 for (i = 0; i < ib_cs_num; i++) { 1979 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1980 ibs_request[i].number_of_ibs = 2; 1981 ibs_request[i].ibs = ib_info; 1982 ibs_request[i].resources = bo_list; 1983 ibs_request[i].fence_info.handle = NULL; 1984 } 1985 1986 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1987 1988 CU_ASSERT_EQUAL(r, 0); 1989 1990 for (i = 0; i < ib_cs_num; i++) { 1991 fence_status[i].context = context_handle; 1992 fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1993 fence_status[i].fence = ibs_request[i].seq_no; 1994 } 1995 1996 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1997 AMDGPU_TIMEOUT_INFINITE, 1998 &expired, NULL); 1999 CU_ASSERT_EQUAL(r, 0); 2000 2001 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 2002 ib_result_mc_address, 4096); 2003 CU_ASSERT_EQUAL(r, 0); 2004 2005 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 2006 ib_result_ce_mc_address, 4096); 2007 CU_ASSERT_EQUAL(r, 0); 2008 2009 r = amdgpu_bo_list_destroy(bo_list); 2010 CU_ASSERT_EQUAL(r, 0); 2011 2012 r = amdgpu_cs_ctx_free(context_handle); 2013 CU_ASSERT_EQUAL(r, 0); 2014} 2015 2016static void amdgpu_command_submission_multi_fence(void) 2017{ 2018 amdgpu_command_submission_multi_fence_wait_all(true); 2019 amdgpu_command_submission_multi_fence_wait_all(false); 2020} 2021 2022static void amdgpu_userptr_test(void) 2023{ 2024 int i, r, j; 2025 uint32_t *pm4 = NULL; 2026 uint64_t bo_mc; 2027 void *ptr = NULL; 2028 int pm4_dw = 256; 2029 int sdma_write_length = 4; 2030 amdgpu_bo_handle handle; 2031 amdgpu_context_handle context_handle; 2032 struct amdgpu_cs_ib_info *ib_info; 2033 struct amdgpu_cs_request *ibs_request; 2034 amdgpu_bo_handle buf_handle; 2035 amdgpu_va_handle va_handle; 2036 2037 pm4 = calloc(pm4_dw, sizeof(*pm4)); 2038 CU_ASSERT_NOT_EQUAL(pm4, NULL); 2039 2040 ib_info = calloc(1, sizeof(*ib_info)); 2041 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 2042 2043 ibs_request = calloc(1, sizeof(*ibs_request)); 2044 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 2045 2046 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2047 CU_ASSERT_EQUAL(r, 0); 2048 2049 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 2050 CU_ASSERT_NOT_EQUAL(ptr, NULL); 2051 memset(ptr, 0, BUFFER_SIZE); 2052 2053 r = amdgpu_create_bo_from_user_mem(device_handle, 2054 ptr, BUFFER_SIZE, &buf_handle); 2055 CU_ASSERT_EQUAL(r, 0); 2056 2057 r = amdgpu_va_range_alloc(device_handle, 2058 amdgpu_gpu_va_range_general, 2059 BUFFER_SIZE, 1, 0, &bo_mc, 2060 &va_handle, 0); 2061 CU_ASSERT_EQUAL(r, 0); 2062 2063 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 2064 CU_ASSERT_EQUAL(r, 0); 2065 2066 handle = buf_handle; 2067 2068 j = i = 0; 2069 2070 if (family_id == AMDGPU_FAMILY_SI) 2071 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 2072 sdma_write_length); 2073 else 2074 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 2075 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 2076 pm4[i++] = 0xffffffff & bo_mc; 2077 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 2078 if (family_id >= AMDGPU_FAMILY_AI) 2079 pm4[i++] = sdma_write_length - 1; 2080 else if (family_id != AMDGPU_FAMILY_SI) 2081 pm4[i++] = sdma_write_length; 2082 2083 while (j++ < sdma_write_length) 2084 pm4[i++] = 0xdeadbeaf; 2085 2086 if (!fork()) { 2087 pm4[0] = 0x0; 2088 exit(0); 2089 } 2090 2091 amdgpu_test_exec_cs_helper(context_handle, 2092 AMDGPU_HW_IP_DMA, 0, 2093 i, pm4, 2094 1, &handle, 2095 ib_info, ibs_request); 2096 i = 0; 2097 while (i < sdma_write_length) { 2098 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 2099 } 2100 free(ibs_request); 2101 free(ib_info); 2102 free(pm4); 2103 2104 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 2105 CU_ASSERT_EQUAL(r, 0); 2106 r = amdgpu_va_range_free(va_handle); 2107 CU_ASSERT_EQUAL(r, 0); 2108 r = amdgpu_bo_free(buf_handle); 2109 CU_ASSERT_EQUAL(r, 0); 2110 free(ptr); 2111 2112 r = amdgpu_cs_ctx_free(context_handle); 2113 CU_ASSERT_EQUAL(r, 0); 2114 2115 wait(NULL); 2116} 2117 2118static void amdgpu_sync_dependency_test(void) 2119{ 2120 amdgpu_context_handle context_handle[2]; 2121 amdgpu_bo_handle ib_result_handle; 2122 void *ib_result_cpu; 2123 uint64_t ib_result_mc_address; 2124 struct amdgpu_cs_request ibs_request; 2125 struct amdgpu_cs_ib_info ib_info; 2126 struct amdgpu_cs_fence fence_status; 2127 uint32_t expired; 2128 int i, j, r; 2129 amdgpu_bo_list_handle bo_list; 2130 amdgpu_va_handle va_handle; 2131 static uint32_t *ptr; 2132 uint64_t seq_no; 2133 2134 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 2135 CU_ASSERT_EQUAL(r, 0); 2136 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 2137 CU_ASSERT_EQUAL(r, 0); 2138 2139 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 2140 AMDGPU_GEM_DOMAIN_GTT, 0, 2141 &ib_result_handle, &ib_result_cpu, 2142 &ib_result_mc_address, &va_handle); 2143 CU_ASSERT_EQUAL(r, 0); 2144 2145 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 2146 &bo_list); 2147 CU_ASSERT_EQUAL(r, 0); 2148 2149 ptr = ib_result_cpu; 2150 i = 0; 2151 2152 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 2153 2154 /* Dispatch minimal init config and verify it's executed */ 2155 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 2156 ptr[i++] = 0x80000000; 2157 ptr[i++] = 0x80000000; 2158 2159 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 2160 ptr[i++] = 0x80000000; 2161 2162 2163 /* Program compute regs */ 2164 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2165 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 2166 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 2167 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 2168 2169 2170 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2171 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 2172 /* 2173 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 2174 SGPRS = 1 2175 PRIORITY = 0 2176 FLOAT_MODE = 192 (0xc0) 2177 PRIV = 0 2178 DX10_CLAMP = 1 2179 DEBUG_MODE = 0 2180 IEEE_MODE = 0 2181 BULKY = 0 2182 CDBG_USER = 0 2183 * 2184 */ 2185 ptr[i++] = 0x002c0040; 2186 2187 2188 /* 2189 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 2190 USER_SGPR = 8 2191 TRAP_PRESENT = 0 2192 TGID_X_EN = 0 2193 TGID_Y_EN = 0 2194 TGID_Z_EN = 0 2195 TG_SIZE_EN = 0 2196 TIDIG_COMP_CNT = 0 2197 EXCP_EN_MSB = 0 2198 LDS_SIZE = 0 2199 EXCP_EN = 0 2200 * 2201 */ 2202 ptr[i++] = 0x00000010; 2203 2204 2205/* 2206 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 2207 WAVESIZE = 0 2208 * 2209 */ 2210 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2211 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 2212 ptr[i++] = 0x00000100; 2213 2214 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2215 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 2216 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 2217 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 2218 2219 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2220 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 2221 ptr[i++] = 0; 2222 2223 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 2224 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 2225 ptr[i++] = 1; 2226 ptr[i++] = 1; 2227 ptr[i++] = 1; 2228 2229 2230 /* Dispatch */ 2231 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 2232 ptr[i++] = 1; 2233 ptr[i++] = 1; 2234 ptr[i++] = 1; 2235 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 2236 2237 2238 while (i & 7) 2239 ptr[i++] = 0xffff1000; /* type3 nop packet */ 2240 2241 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 2242 ib_info.ib_mc_address = ib_result_mc_address; 2243 ib_info.size = i; 2244 2245 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 2246 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 2247 ibs_request.ring = 0; 2248 ibs_request.number_of_ibs = 1; 2249 ibs_request.ibs = &ib_info; 2250 ibs_request.resources = bo_list; 2251 ibs_request.fence_info.handle = NULL; 2252 2253 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 2254 CU_ASSERT_EQUAL(r, 0); 2255 seq_no = ibs_request.seq_no; 2256 2257 2258 2259 /* Prepare second command with dependency on the first */ 2260 j = i; 2261 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 2262 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 2263 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 2264 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 2265 ptr[i++] = 99; 2266 2267 while (i & 7) 2268 ptr[i++] = 0xffff1000; /* type3 nop packet */ 2269 2270 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 2271 ib_info.ib_mc_address = ib_result_mc_address + j * 4; 2272 ib_info.size = i - j; 2273 2274 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 2275 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 2276 ibs_request.ring = 0; 2277 ibs_request.number_of_ibs = 1; 2278 ibs_request.ibs = &ib_info; 2279 ibs_request.resources = bo_list; 2280 ibs_request.fence_info.handle = NULL; 2281 2282 ibs_request.number_of_dependencies = 1; 2283 2284 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 2285 ibs_request.dependencies[0].context = context_handle[1]; 2286 ibs_request.dependencies[0].ip_instance = 0; 2287 ibs_request.dependencies[0].ring = 0; 2288 ibs_request.dependencies[0].fence = seq_no; 2289 2290 2291 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 2292 CU_ASSERT_EQUAL(r, 0); 2293 2294 2295 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 2296 fence_status.context = context_handle[0]; 2297 fence_status.ip_type = AMDGPU_HW_IP_GFX; 2298 fence_status.ip_instance = 0; 2299 fence_status.ring = 0; 2300 fence_status.fence = ibs_request.seq_no; 2301 2302 r = amdgpu_cs_query_fence_status(&fence_status, 2303 AMDGPU_TIMEOUT_INFINITE,0, &expired); 2304 CU_ASSERT_EQUAL(r, 0); 2305 2306 /* Expect the second command to wait for shader to complete */ 2307 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 2308 2309 r = amdgpu_bo_list_destroy(bo_list); 2310 CU_ASSERT_EQUAL(r, 0); 2311 2312 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 2313 ib_result_mc_address, 4096); 2314 CU_ASSERT_EQUAL(r, 0); 2315 2316 r = amdgpu_cs_ctx_free(context_handle[0]); 2317 CU_ASSERT_EQUAL(r, 0); 2318 r = amdgpu_cs_ctx_free(context_handle[1]); 2319 CU_ASSERT_EQUAL(r, 0); 2320 2321 free(ibs_request.dependencies); 2322} 2323 2324static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family) 2325{ 2326 struct amdgpu_test_shader *shader; 2327 int i, loop = 0x10000; 2328 2329 switch (family) { 2330 case AMDGPU_FAMILY_AI: 2331 shader = &memcpy_cs_hang_slow_ai; 2332 break; 2333 case AMDGPU_FAMILY_RV: 2334 shader = &memcpy_cs_hang_slow_rv; 2335 break; 2336 default: 2337 return -1; 2338 break; 2339 } 2340 2341 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 2342 2343 for (i = 0; i < loop; i++) 2344 memcpy(ptr + shader->header_length + shader->body_length * i, 2345 shader->shader + shader->header_length, 2346 shader->body_length * sizeof(uint32_t)); 2347 2348 memcpy(ptr + shader->header_length + shader->body_length * loop, 2349 shader->shader + shader->header_length + shader->body_length, 2350 shader->foot_length * sizeof(uint32_t)); 2351 2352 return 0; 2353} 2354 2355static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, 2356 int cs_type) 2357{ 2358 uint32_t shader_size; 2359 const uint32_t *shader; 2360 2361 switch (cs_type) { 2362 case CS_BUFFERCLEAR: 2363 shader = bufferclear_cs_shader_gfx9; 2364 shader_size = sizeof(bufferclear_cs_shader_gfx9); 2365 break; 2366 case CS_BUFFERCOPY: 2367 shader = buffercopy_cs_shader_gfx9; 2368 shader_size = sizeof(buffercopy_cs_shader_gfx9); 2369 break; 2370 case CS_HANG: 2371 shader = memcpy_ps_hang; 2372 shader_size = sizeof(memcpy_ps_hang); 2373 break; 2374 default: 2375 return -1; 2376 break; 2377 } 2378 2379 memcpy(ptr, shader, shader_size); 2380 return 0; 2381} 2382 2383static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type) 2384{ 2385 int i = 0; 2386 2387 /* Write context control and load shadowing register if necessary */ 2388 if (ip_type == AMDGPU_HW_IP_GFX) { 2389 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 2390 ptr[i++] = 0x80000000; 2391 ptr[i++] = 0x80000000; 2392 } 2393 2394 /* Issue commands to set default compute state. */ 2395 /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ 2396 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3); 2397 ptr[i++] = 0x204; 2398 i += 3; 2399 2400 /* clear mmCOMPUTE_TMPRING_SIZE */ 2401 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2402 ptr[i++] = 0x218; 2403 ptr[i++] = 0; 2404 2405 return i; 2406} 2407 2408static int amdgpu_dispatch_write_cumask(uint32_t *ptr) 2409{ 2410 int i = 0; 2411 2412 /* Issue commands to set cu mask used in current dispatch */ 2413 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 2414 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2415 ptr[i++] = 0x216; 2416 ptr[i++] = 0xffffffff; 2417 ptr[i++] = 0xffffffff; 2418 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 2419 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2420 ptr[i++] = 0x219; 2421 ptr[i++] = 0xffffffff; 2422 ptr[i++] = 0xffffffff; 2423 2424 return i; 2425} 2426 2427static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr) 2428{ 2429 int i, j; 2430 2431 i = 0; 2432 2433 /* Writes shader state to HW */ 2434 /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ 2435 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2436 ptr[i++] = 0x20c; 2437 ptr[i++] = (shader_addr >> 8); 2438 ptr[i++] = (shader_addr >> 40); 2439 /* write sh regs*/ 2440 for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) { 2441 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2442 /* - Gfx9ShRegBase */ 2443 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00; 2444 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; 2445 } 2446 2447 return i; 2448} 2449 2450static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, 2451 uint32_t ip_type, 2452 uint32_t ring) 2453{ 2454 amdgpu_context_handle context_handle; 2455 amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; 2456 volatile unsigned char *ptr_dst; 2457 void *ptr_shader; 2458 uint32_t *ptr_cmd; 2459 uint64_t mc_address_dst, mc_address_shader, mc_address_cmd; 2460 amdgpu_va_handle va_dst, va_shader, va_cmd; 2461 int i, r; 2462 int bo_dst_size = 16384; 2463 int bo_shader_size = 4096; 2464 int bo_cmd_size = 4096; 2465 struct amdgpu_cs_request ibs_request = {0}; 2466 struct amdgpu_cs_ib_info ib_info= {0}; 2467 amdgpu_bo_list_handle bo_list; 2468 struct amdgpu_cs_fence fence_status = {0}; 2469 uint32_t expired; 2470 2471 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2472 CU_ASSERT_EQUAL(r, 0); 2473 2474 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2475 AMDGPU_GEM_DOMAIN_GTT, 0, 2476 &bo_cmd, (void **)&ptr_cmd, 2477 &mc_address_cmd, &va_cmd); 2478 CU_ASSERT_EQUAL(r, 0); 2479 memset(ptr_cmd, 0, bo_cmd_size); 2480 2481 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2482 AMDGPU_GEM_DOMAIN_VRAM, 0, 2483 &bo_shader, &ptr_shader, 2484 &mc_address_shader, &va_shader); 2485 CU_ASSERT_EQUAL(r, 0); 2486 memset(ptr_shader, 0, bo_shader_size); 2487 2488 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR); 2489 CU_ASSERT_EQUAL(r, 0); 2490 2491 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2492 AMDGPU_GEM_DOMAIN_VRAM, 0, 2493 &bo_dst, (void **)&ptr_dst, 2494 &mc_address_dst, &va_dst); 2495 CU_ASSERT_EQUAL(r, 0); 2496 2497 i = 0; 2498 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 2499 2500 /* Issue commands to set cu mask used in current dispatch */ 2501 i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 2502 2503 /* Writes shader state to HW */ 2504 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 2505 2506 /* Write constant data */ 2507 /* Writes the UAV constant data to the SGPRs. */ 2508 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2509 ptr_cmd[i++] = 0x240; 2510 ptr_cmd[i++] = mc_address_dst; 2511 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2512 ptr_cmd[i++] = 0x400; 2513 ptr_cmd[i++] = 0x74fac; 2514 2515 /* Sets a range of pixel shader constants */ 2516 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2517 ptr_cmd[i++] = 0x244; 2518 ptr_cmd[i++] = 0x22222222; 2519 ptr_cmd[i++] = 0x22222222; 2520 ptr_cmd[i++] = 0x22222222; 2521 ptr_cmd[i++] = 0x22222222; 2522 2523 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 2524 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2525 ptr_cmd[i++] = 0x215; 2526 ptr_cmd[i++] = 0; 2527 2528 /* dispatch direct command */ 2529 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2530 ptr_cmd[i++] = 0x10; 2531 ptr_cmd[i++] = 1; 2532 ptr_cmd[i++] = 1; 2533 ptr_cmd[i++] = 1; 2534 2535 while (i & 7) 2536 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2537 2538 resources[0] = bo_dst; 2539 resources[1] = bo_shader; 2540 resources[2] = bo_cmd; 2541 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 2542 CU_ASSERT_EQUAL(r, 0); 2543 2544 ib_info.ib_mc_address = mc_address_cmd; 2545 ib_info.size = i; 2546 ibs_request.ip_type = ip_type; 2547 ibs_request.ring = ring; 2548 ibs_request.resources = bo_list; 2549 ibs_request.number_of_ibs = 1; 2550 ibs_request.ibs = &ib_info; 2551 ibs_request.fence_info.handle = NULL; 2552 2553 /* submit CS */ 2554 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2555 CU_ASSERT_EQUAL(r, 0); 2556 2557 r = amdgpu_bo_list_destroy(bo_list); 2558 CU_ASSERT_EQUAL(r, 0); 2559 2560 fence_status.ip_type = ip_type; 2561 fence_status.ip_instance = 0; 2562 fence_status.ring = ring; 2563 fence_status.context = context_handle; 2564 fence_status.fence = ibs_request.seq_no; 2565 2566 /* wait for IB accomplished */ 2567 r = amdgpu_cs_query_fence_status(&fence_status, 2568 AMDGPU_TIMEOUT_INFINITE, 2569 0, &expired); 2570 CU_ASSERT_EQUAL(r, 0); 2571 CU_ASSERT_EQUAL(expired, true); 2572 2573 /* verify if memset test result meets with expected */ 2574 i = 0; 2575 while(i < bo_dst_size) { 2576 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22); 2577 } 2578 2579 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2580 CU_ASSERT_EQUAL(r, 0); 2581 2582 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2583 CU_ASSERT_EQUAL(r, 0); 2584 2585 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2586 CU_ASSERT_EQUAL(r, 0); 2587 2588 r = amdgpu_cs_ctx_free(context_handle); 2589 CU_ASSERT_EQUAL(r, 0); 2590} 2591 2592static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, 2593 uint32_t ip_type, 2594 uint32_t ring, 2595 int hang) 2596{ 2597 amdgpu_context_handle context_handle; 2598 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 2599 volatile unsigned char *ptr_dst; 2600 void *ptr_shader; 2601 unsigned char *ptr_src; 2602 uint32_t *ptr_cmd; 2603 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 2604 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 2605 int i, r; 2606 int bo_dst_size = 16384; 2607 int bo_shader_size = 4096; 2608 int bo_cmd_size = 4096; 2609 struct amdgpu_cs_request ibs_request = {0}; 2610 struct amdgpu_cs_ib_info ib_info= {0}; 2611 uint32_t expired, hang_state, hangs; 2612 enum cs_type cs_type; 2613 amdgpu_bo_list_handle bo_list; 2614 struct amdgpu_cs_fence fence_status = {0}; 2615 2616 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2617 CU_ASSERT_EQUAL(r, 0); 2618 2619 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2620 AMDGPU_GEM_DOMAIN_GTT, 0, 2621 &bo_cmd, (void **)&ptr_cmd, 2622 &mc_address_cmd, &va_cmd); 2623 CU_ASSERT_EQUAL(r, 0); 2624 memset(ptr_cmd, 0, bo_cmd_size); 2625 2626 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2627 AMDGPU_GEM_DOMAIN_VRAM, 0, 2628 &bo_shader, &ptr_shader, 2629 &mc_address_shader, &va_shader); 2630 CU_ASSERT_EQUAL(r, 0); 2631 memset(ptr_shader, 0, bo_shader_size); 2632 2633 cs_type = hang ? CS_HANG : CS_BUFFERCOPY; 2634 r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type); 2635 CU_ASSERT_EQUAL(r, 0); 2636 2637 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2638 AMDGPU_GEM_DOMAIN_VRAM, 0, 2639 &bo_src, (void **)&ptr_src, 2640 &mc_address_src, &va_src); 2641 CU_ASSERT_EQUAL(r, 0); 2642 2643 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2644 AMDGPU_GEM_DOMAIN_VRAM, 0, 2645 &bo_dst, (void **)&ptr_dst, 2646 &mc_address_dst, &va_dst); 2647 CU_ASSERT_EQUAL(r, 0); 2648 2649 memset(ptr_src, 0x55, bo_dst_size); 2650 2651 i = 0; 2652 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 2653 2654 /* Issue commands to set cu mask used in current dispatch */ 2655 i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 2656 2657 /* Writes shader state to HW */ 2658 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 2659 2660 /* Write constant data */ 2661 /* Writes the texture resource constants data to the SGPRs */ 2662 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2663 ptr_cmd[i++] = 0x240; 2664 ptr_cmd[i++] = mc_address_src; 2665 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 2666 ptr_cmd[i++] = 0x400; 2667 ptr_cmd[i++] = 0x74fac; 2668 2669 /* Writes the UAV constant data to the SGPRs. */ 2670 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2671 ptr_cmd[i++] = 0x244; 2672 ptr_cmd[i++] = mc_address_dst; 2673 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2674 ptr_cmd[i++] = 0x400; 2675 ptr_cmd[i++] = 0x74fac; 2676 2677 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 2678 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2679 ptr_cmd[i++] = 0x215; 2680 ptr_cmd[i++] = 0; 2681 2682 /* dispatch direct command */ 2683 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2684 ptr_cmd[i++] = 0x10; 2685 ptr_cmd[i++] = 1; 2686 ptr_cmd[i++] = 1; 2687 ptr_cmd[i++] = 1; 2688 2689 while (i & 7) 2690 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2691 2692 resources[0] = bo_shader; 2693 resources[1] = bo_src; 2694 resources[2] = bo_dst; 2695 resources[3] = bo_cmd; 2696 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 2697 CU_ASSERT_EQUAL(r, 0); 2698 2699 ib_info.ib_mc_address = mc_address_cmd; 2700 ib_info.size = i; 2701 ibs_request.ip_type = ip_type; 2702 ibs_request.ring = ring; 2703 ibs_request.resources = bo_list; 2704 ibs_request.number_of_ibs = 1; 2705 ibs_request.ibs = &ib_info; 2706 ibs_request.fence_info.handle = NULL; 2707 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2708 CU_ASSERT_EQUAL(r, 0); 2709 2710 fence_status.ip_type = ip_type; 2711 fence_status.ip_instance = 0; 2712 fence_status.ring = ring; 2713 fence_status.context = context_handle; 2714 fence_status.fence = ibs_request.seq_no; 2715 2716 /* wait for IB accomplished */ 2717 r = amdgpu_cs_query_fence_status(&fence_status, 2718 AMDGPU_TIMEOUT_INFINITE, 2719 0, &expired); 2720 2721 if (!hang) { 2722 CU_ASSERT_EQUAL(r, 0); 2723 CU_ASSERT_EQUAL(expired, true); 2724 2725 /* verify if memcpy test result meets with expected */ 2726 i = 0; 2727 while(i < bo_dst_size) { 2728 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 2729 i++; 2730 } 2731 } else { 2732 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 2733 CU_ASSERT_EQUAL(r, 0); 2734 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 2735 } 2736 2737 r = amdgpu_bo_list_destroy(bo_list); 2738 CU_ASSERT_EQUAL(r, 0); 2739 2740 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 2741 CU_ASSERT_EQUAL(r, 0); 2742 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2743 CU_ASSERT_EQUAL(r, 0); 2744 2745 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2746 CU_ASSERT_EQUAL(r, 0); 2747 2748 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2749 CU_ASSERT_EQUAL(r, 0); 2750 2751 r = amdgpu_cs_ctx_free(context_handle); 2752 CU_ASSERT_EQUAL(r, 0); 2753} 2754 2755static void amdgpu_compute_dispatch_test(void) 2756{ 2757 int r; 2758 struct drm_amdgpu_info_hw_ip info; 2759 uint32_t ring_id; 2760 2761 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 2762 CU_ASSERT_EQUAL(r, 0); 2763 if (!info.available_rings) 2764 printf("SKIP ... as there's no compute ring\n"); 2765 2766 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2767 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); 2768 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0); 2769 } 2770} 2771 2772static void amdgpu_gfx_dispatch_test(void) 2773{ 2774 int r; 2775 struct drm_amdgpu_info_hw_ip info; 2776 uint32_t ring_id; 2777 2778 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 2779 CU_ASSERT_EQUAL(r, 0); 2780 if (!info.available_rings) 2781 printf("SKIP ... as there's no graphics ring\n"); 2782 2783 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2784 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); 2785 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0); 2786 } 2787} 2788 2789void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 2790{ 2791 int r; 2792 struct drm_amdgpu_info_hw_ip info; 2793 uint32_t ring_id; 2794 2795 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 2796 CU_ASSERT_EQUAL(r, 0); 2797 if (!info.available_rings) 2798 printf("SKIP ... as there's no ring for ip %d\n", ip_type); 2799 2800 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2801 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 2802 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1); 2803 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 2804 } 2805} 2806 2807static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle, 2808 uint32_t ip_type, uint32_t ring) 2809{ 2810 amdgpu_context_handle context_handle; 2811 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 2812 volatile unsigned char *ptr_dst; 2813 void *ptr_shader; 2814 unsigned char *ptr_src; 2815 uint32_t *ptr_cmd; 2816 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 2817 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 2818 int i, r; 2819 int bo_dst_size = 0x4000000; 2820 int bo_shader_size = 0x400000; 2821 int bo_cmd_size = 4096; 2822 struct amdgpu_cs_request ibs_request = {0}; 2823 struct amdgpu_cs_ib_info ib_info= {0}; 2824 uint32_t hang_state, hangs, expired; 2825 struct amdgpu_gpu_info gpu_info = {0}; 2826 amdgpu_bo_list_handle bo_list; 2827 struct amdgpu_cs_fence fence_status = {0}; 2828 2829 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 2830 CU_ASSERT_EQUAL(r, 0); 2831 2832 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2833 CU_ASSERT_EQUAL(r, 0); 2834 2835 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2836 AMDGPU_GEM_DOMAIN_GTT, 0, 2837 &bo_cmd, (void **)&ptr_cmd, 2838 &mc_address_cmd, &va_cmd); 2839 CU_ASSERT_EQUAL(r, 0); 2840 memset(ptr_cmd, 0, bo_cmd_size); 2841 2842 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2843 AMDGPU_GEM_DOMAIN_VRAM, 0, 2844 &bo_shader, &ptr_shader, 2845 &mc_address_shader, &va_shader); 2846 CU_ASSERT_EQUAL(r, 0); 2847 memset(ptr_shader, 0, bo_shader_size); 2848 2849 r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id); 2850 CU_ASSERT_EQUAL(r, 0); 2851 2852 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2853 AMDGPU_GEM_DOMAIN_VRAM, 0, 2854 &bo_src, (void **)&ptr_src, 2855 &mc_address_src, &va_src); 2856 CU_ASSERT_EQUAL(r, 0); 2857 2858 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2859 AMDGPU_GEM_DOMAIN_VRAM, 0, 2860 &bo_dst, (void **)&ptr_dst, 2861 &mc_address_dst, &va_dst); 2862 CU_ASSERT_EQUAL(r, 0); 2863 2864 memset(ptr_src, 0x55, bo_dst_size); 2865 2866 i = 0; 2867 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 2868 2869 /* Issue commands to set cu mask used in current dispatch */ 2870 i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 2871 2872 /* Writes shader state to HW */ 2873 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 2874 2875 /* Write constant data */ 2876 /* Writes the texture resource constants data to the SGPRs */ 2877 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2878 ptr_cmd[i++] = 0x240; 2879 ptr_cmd[i++] = mc_address_src; 2880 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 2881 ptr_cmd[i++] = 0x400000; 2882 ptr_cmd[i++] = 0x74fac; 2883 2884 /* Writes the UAV constant data to the SGPRs. */ 2885 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2886 ptr_cmd[i++] = 0x244; 2887 ptr_cmd[i++] = mc_address_dst; 2888 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2889 ptr_cmd[i++] = 0x400000; 2890 ptr_cmd[i++] = 0x74fac; 2891 2892 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 2893 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2894 ptr_cmd[i++] = 0x215; 2895 ptr_cmd[i++] = 0; 2896 2897 /* dispatch direct command */ 2898 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2899 ptr_cmd[i++] = 0x10000; 2900 ptr_cmd[i++] = 1; 2901 ptr_cmd[i++] = 1; 2902 ptr_cmd[i++] = 1; 2903 2904 while (i & 7) 2905 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2906 2907 resources[0] = bo_shader; 2908 resources[1] = bo_src; 2909 resources[2] = bo_dst; 2910 resources[3] = bo_cmd; 2911 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 2912 CU_ASSERT_EQUAL(r, 0); 2913 2914 ib_info.ib_mc_address = mc_address_cmd; 2915 ib_info.size = i; 2916 ibs_request.ip_type = ip_type; 2917 ibs_request.ring = ring; 2918 ibs_request.resources = bo_list; 2919 ibs_request.number_of_ibs = 1; 2920 ibs_request.ibs = &ib_info; 2921 ibs_request.fence_info.handle = NULL; 2922 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2923 CU_ASSERT_EQUAL(r, 0); 2924 2925 fence_status.ip_type = ip_type; 2926 fence_status.ip_instance = 0; 2927 fence_status.ring = ring; 2928 fence_status.context = context_handle; 2929 fence_status.fence = ibs_request.seq_no; 2930 2931 /* wait for IB accomplished */ 2932 r = amdgpu_cs_query_fence_status(&fence_status, 2933 AMDGPU_TIMEOUT_INFINITE, 2934 0, &expired); 2935 2936 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 2937 CU_ASSERT_EQUAL(r, 0); 2938 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 2939 2940 r = amdgpu_bo_list_destroy(bo_list); 2941 CU_ASSERT_EQUAL(r, 0); 2942 2943 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 2944 CU_ASSERT_EQUAL(r, 0); 2945 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2946 CU_ASSERT_EQUAL(r, 0); 2947 2948 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2949 CU_ASSERT_EQUAL(r, 0); 2950 2951 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2952 CU_ASSERT_EQUAL(r, 0); 2953 2954 r = amdgpu_cs_ctx_free(context_handle); 2955 CU_ASSERT_EQUAL(r, 0); 2956} 2957 2958void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 2959{ 2960 int r; 2961 struct drm_amdgpu_info_hw_ip info; 2962 uint32_t ring_id; 2963 2964 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 2965 CU_ASSERT_EQUAL(r, 0); 2966 if (!info.available_rings) 2967 printf("SKIP ... as there's no ring for ip %d\n", ip_type); 2968 2969 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2970 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 2971 amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id); 2972 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 2973 } 2974} 2975 2976static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family) 2977{ 2978 struct amdgpu_test_shader *shader; 2979 int i, loop = 0x40000; 2980 2981 switch (family) { 2982 case AMDGPU_FAMILY_AI: 2983 case AMDGPU_FAMILY_RV: 2984 shader = &memcpy_ps_hang_slow_ai; 2985 break; 2986 default: 2987 return -1; 2988 break; 2989 } 2990 2991 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 2992 2993 for (i = 0; i < loop; i++) 2994 memcpy(ptr + shader->header_length + shader->body_length * i, 2995 shader->shader + shader->header_length, 2996 shader->body_length * sizeof(uint32_t)); 2997 2998 memcpy(ptr + shader->header_length + shader->body_length * loop, 2999 shader->shader + shader->header_length + shader->body_length, 3000 shader->foot_length * sizeof(uint32_t)); 3001 3002 return 0; 3003} 3004 3005static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type) 3006{ 3007 int i; 3008 uint32_t shader_offset= 256; 3009 uint32_t mem_offset, patch_code_offset; 3010 uint32_t shader_size, patchinfo_code_size; 3011 const uint32_t *shader; 3012 const uint32_t *patchinfo_code; 3013 const uint32_t *patchcode_offset; 3014 3015 switch (ps_type) { 3016 case PS_CONST: 3017 shader = ps_const_shader_gfx9; 3018 shader_size = sizeof(ps_const_shader_gfx9); 3019 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; 3020 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; 3021 patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; 3022 break; 3023 case PS_TEX: 3024 shader = ps_tex_shader_gfx9; 3025 shader_size = sizeof(ps_tex_shader_gfx9); 3026 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; 3027 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; 3028 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; 3029 break; 3030 case PS_HANG: 3031 shader = memcpy_ps_hang; 3032 shader_size = sizeof(memcpy_ps_hang); 3033 3034 memcpy(ptr, shader, shader_size); 3035 return 0; 3036 default: 3037 return -1; 3038 break; 3039 } 3040 3041 /* write main shader program */ 3042 for (i = 0 ; i < 10; i++) { 3043 mem_offset = i * shader_offset; 3044 memcpy(ptr + mem_offset, shader, shader_size); 3045 } 3046 3047 /* overwrite patch codes */ 3048 for (i = 0 ; i < 10; i++) { 3049 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t); 3050 patch_code_offset = i * patchinfo_code_size; 3051 memcpy(ptr + mem_offset, 3052 patchinfo_code + patch_code_offset, 3053 patchinfo_code_size * sizeof(uint32_t)); 3054 } 3055 3056 return 0; 3057} 3058 3059/* load RectPosTexFast_VS */ 3060static int amdgpu_draw_load_vs_shader(uint8_t *ptr) 3061{ 3062 const uint32_t *shader; 3063 uint32_t shader_size; 3064 3065 shader = vs_RectPosTexFast_shader_gfx9; 3066 shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); 3067 3068 memcpy(ptr, shader, shader_size); 3069 3070 return 0; 3071} 3072 3073static int amdgpu_draw_init(uint32_t *ptr) 3074{ 3075 int i = 0; 3076 const uint32_t *preamblecache_ptr; 3077 uint32_t preamblecache_size; 3078 3079 /* Write context control and load shadowing register if necessary */ 3080 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 3081 ptr[i++] = 0x80000000; 3082 ptr[i++] = 0x80000000; 3083 3084 preamblecache_ptr = preamblecache_gfx9; 3085 preamblecache_size = sizeof(preamblecache_gfx9); 3086 3087 memcpy(ptr + i, preamblecache_ptr, preamblecache_size); 3088 return i + preamblecache_size/sizeof(uint32_t); 3089} 3090 3091static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, 3092 uint64_t dst_addr, 3093 int hang_slow) 3094{ 3095 int i = 0; 3096 3097 /* setup color buffer */ 3098 /* offset reg 3099 0xA318 CB_COLOR0_BASE 3100 0xA319 CB_COLOR0_BASE_EXT 3101 0xA31A CB_COLOR0_ATTRIB2 3102 0xA31B CB_COLOR0_VIEW 3103 0xA31C CB_COLOR0_INFO 3104 0xA31D CB_COLOR0_ATTRIB 3105 0xA31E CB_COLOR0_DCC_CONTROL 3106 0xA31F CB_COLOR0_CMASK 3107 0xA320 CB_COLOR0_CMASK_BASE_EXT 3108 0xA321 CB_COLOR0_FMASK 3109 0xA322 CB_COLOR0_FMASK_BASE_EXT 3110 0xA323 CB_COLOR0_CLEAR_WORD0 3111 0xA324 CB_COLOR0_CLEAR_WORD1 3112 0xA325 CB_COLOR0_DCC_BASE 3113 0xA326 CB_COLOR0_DCC_BASE_EXT */ 3114 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); 3115 ptr[i++] = 0x318; 3116 ptr[i++] = dst_addr >> 8; 3117 ptr[i++] = dst_addr >> 40; 3118 ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f; 3119 ptr[i++] = 0; 3120 ptr[i++] = 0x50438; 3121 ptr[i++] = 0x10140000; 3122 i += 9; 3123 3124 /* mmCB_MRT0_EPITCH */ 3125 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3126 ptr[i++] = 0x1e8; 3127 ptr[i++] = hang_slow ? 0x7ff : 0x1f; 3128 3129 /* 0xA32B CB_COLOR1_BASE */ 3130 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3131 ptr[i++] = 0x32b; 3132 ptr[i++] = 0; 3133 3134 /* 0xA33A CB_COLOR1_BASE */ 3135 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3136 ptr[i++] = 0x33a; 3137 ptr[i++] = 0; 3138 3139 /* SPI_SHADER_COL_FORMAT */ 3140 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3141 ptr[i++] = 0x1c5; 3142 ptr[i++] = 9; 3143 3144 /* Setup depth buffer */ 3145 /* mmDB_Z_INFO */ 3146 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 3147 ptr[i++] = 0xe; 3148 i += 2; 3149 3150 return i; 3151} 3152 3153static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow) 3154{ 3155 int i = 0; 3156 const uint32_t *cached_cmd_ptr; 3157 uint32_t cached_cmd_size; 3158 3159 /* mmPA_SC_TILE_STEERING_OVERRIDE */ 3160 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3161 ptr[i++] = 0xd7; 3162 ptr[i++] = 0; 3163 3164 ptr[i++] = 0xffff1000; 3165 ptr[i++] = 0xc0021000; 3166 3167 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3168 ptr[i++] = 0xd7; 3169 ptr[i++] = 1; 3170 3171 /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 3172 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); 3173 ptr[i++] = 0x2fe; 3174 i += 16; 3175 3176 /* mmPA_SC_CENTROID_PRIORITY_0 */ 3177 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 3178 ptr[i++] = 0x2f5; 3179 i += 2; 3180 3181 cached_cmd_ptr = cached_cmd_gfx9; 3182 cached_cmd_size = sizeof(cached_cmd_gfx9); 3183 3184 memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); 3185 if (hang_slow) 3186 *(ptr + i + 12) = 0x8000800; 3187 i += cached_cmd_size/sizeof(uint32_t); 3188 3189 return i; 3190} 3191 3192static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, 3193 int ps_type, 3194 uint64_t shader_addr, 3195 int hang_slow) 3196{ 3197 int i = 0; 3198 3199 /* mmPA_CL_VS_OUT_CNTL */ 3200 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3201 ptr[i++] = 0x207; 3202 ptr[i++] = 0; 3203 3204 /* mmSPI_SHADER_PGM_RSRC3_VS */ 3205 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3206 ptr[i++] = 0x46; 3207 ptr[i++] = 0xffff; 3208 3209 /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ 3210 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 3211 ptr[i++] = 0x48; 3212 ptr[i++] = shader_addr >> 8; 3213 ptr[i++] = shader_addr >> 40; 3214 3215 /* mmSPI_SHADER_PGM_RSRC1_VS */ 3216 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3217 ptr[i++] = 0x4a; 3218 ptr[i++] = 0xc0081; 3219 /* mmSPI_SHADER_PGM_RSRC2_VS */ 3220 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3221 ptr[i++] = 0x4b; 3222 ptr[i++] = 0x18; 3223 3224 /* mmSPI_VS_OUT_CONFIG */ 3225 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3226 ptr[i++] = 0x1b1; 3227 ptr[i++] = 2; 3228 3229 /* mmSPI_SHADER_POS_FORMAT */ 3230 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3231 ptr[i++] = 0x1c3; 3232 ptr[i++] = 4; 3233 3234 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3235 ptr[i++] = 0x4c; 3236 i += 2; 3237 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 3238 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 3239 3240 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3241 ptr[i++] = 0x50; 3242 i += 2; 3243 if (ps_type == PS_CONST) { 3244 i += 2; 3245 } else if (ps_type == PS_TEX) { 3246 ptr[i++] = 0x3f800000; 3247 ptr[i++] = 0x3f800000; 3248 } 3249 3250 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3251 ptr[i++] = 0x54; 3252 i += 4; 3253 3254 return i; 3255} 3256 3257static int amdgpu_draw_ps_write2hw(uint32_t *ptr, 3258 int ps_type, 3259 uint64_t shader_addr) 3260{ 3261 int i, j; 3262 const uint32_t *sh_registers; 3263 const uint32_t *context_registers; 3264 uint32_t num_sh_reg, num_context_reg; 3265 3266 if (ps_type == PS_CONST) { 3267 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; 3268 context_registers = (const uint32_t *)ps_const_context_reg_gfx9; 3269 num_sh_reg = ps_num_sh_registers_gfx9; 3270 num_context_reg = ps_num_context_registers_gfx9; 3271 } else if (ps_type == PS_TEX) { 3272 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; 3273 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9; 3274 num_sh_reg = ps_num_sh_registers_gfx9; 3275 num_context_reg = ps_num_context_registers_gfx9; 3276 } 3277 3278 i = 0; 3279 3280 /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS 3281 0x2c08 SPI_SHADER_PGM_LO_PS 3282 0x2c09 SPI_SHADER_PGM_HI_PS */ 3283 shader_addr += 256 * 9; 3284 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 3285 ptr[i++] = 0x7; 3286 ptr[i++] = 0xffff; 3287 ptr[i++] = shader_addr >> 8; 3288 ptr[i++] = shader_addr >> 40; 3289 3290 for (j = 0; j < num_sh_reg; j++) { 3291 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3292 ptr[i++] = sh_registers[j * 2] - 0x2c00; 3293 ptr[i++] = sh_registers[j * 2 + 1]; 3294 } 3295 3296 for (j = 0; j < num_context_reg; j++) { 3297 if (context_registers[j * 2] != 0xA1C5) { 3298 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3299 ptr[i++] = context_registers[j * 2] - 0xa000; 3300 ptr[i++] = context_registers[j * 2 + 1]; 3301 } 3302 3303 if (context_registers[j * 2] == 0xA1B4) { 3304 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3305 ptr[i++] = 0x1b3; 3306 ptr[i++] = 2; 3307 } 3308 } 3309 3310 return i; 3311} 3312 3313static int amdgpu_draw_draw(uint32_t *ptr) 3314{ 3315 int i = 0; 3316 3317 /* mmIA_MULTI_VGT_PARAM */ 3318 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3319 ptr[i++] = 0x40000258; 3320 ptr[i++] = 0xd00ff; 3321 3322 /* mmVGT_PRIMITIVE_TYPE */ 3323 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3324 ptr[i++] = 0x10000242; 3325 ptr[i++] = 0x11; 3326 3327 ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); 3328 ptr[i++] = 3; 3329 ptr[i++] = 2; 3330 3331 return i; 3332} 3333 3334void amdgpu_memset_draw(amdgpu_device_handle device_handle, 3335 amdgpu_bo_handle bo_shader_ps, 3336 amdgpu_bo_handle bo_shader_vs, 3337 uint64_t mc_address_shader_ps, 3338 uint64_t mc_address_shader_vs, 3339 uint32_t ring_id) 3340{ 3341 amdgpu_context_handle context_handle; 3342 amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; 3343 volatile unsigned char *ptr_dst; 3344 uint32_t *ptr_cmd; 3345 uint64_t mc_address_dst, mc_address_cmd; 3346 amdgpu_va_handle va_dst, va_cmd; 3347 int i, r; 3348 int bo_dst_size = 16384; 3349 int bo_cmd_size = 4096; 3350 struct amdgpu_cs_request ibs_request = {0}; 3351 struct amdgpu_cs_ib_info ib_info = {0}; 3352 struct amdgpu_cs_fence fence_status = {0}; 3353 uint32_t expired; 3354 amdgpu_bo_list_handle bo_list; 3355 3356 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3357 CU_ASSERT_EQUAL(r, 0); 3358 3359 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 3360 AMDGPU_GEM_DOMAIN_GTT, 0, 3361 &bo_cmd, (void **)&ptr_cmd, 3362 &mc_address_cmd, &va_cmd); 3363 CU_ASSERT_EQUAL(r, 0); 3364 memset(ptr_cmd, 0, bo_cmd_size); 3365 3366 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 3367 AMDGPU_GEM_DOMAIN_VRAM, 0, 3368 &bo_dst, (void **)&ptr_dst, 3369 &mc_address_dst, &va_dst); 3370 CU_ASSERT_EQUAL(r, 0); 3371 3372 i = 0; 3373 i += amdgpu_draw_init(ptr_cmd + i); 3374 3375 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); 3376 3377 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); 3378 3379 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0); 3380 3381 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps); 3382 3383 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3384 ptr_cmd[i++] = 0xc; 3385 ptr_cmd[i++] = 0x33333333; 3386 ptr_cmd[i++] = 0x33333333; 3387 ptr_cmd[i++] = 0x33333333; 3388 ptr_cmd[i++] = 0x33333333; 3389 3390 i += amdgpu_draw_draw(ptr_cmd + i); 3391 3392 while (i & 7) 3393 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3394 3395 resources[0] = bo_dst; 3396 resources[1] = bo_shader_ps; 3397 resources[2] = bo_shader_vs; 3398 resources[3] = bo_cmd; 3399 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 3400 CU_ASSERT_EQUAL(r, 0); 3401 3402 ib_info.ib_mc_address = mc_address_cmd; 3403 ib_info.size = i; 3404 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3405 ibs_request.ring = ring_id; 3406 ibs_request.resources = bo_list; 3407 ibs_request.number_of_ibs = 1; 3408 ibs_request.ibs = &ib_info; 3409 ibs_request.fence_info.handle = NULL; 3410 3411 /* submit CS */ 3412 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3413 CU_ASSERT_EQUAL(r, 0); 3414 3415 r = amdgpu_bo_list_destroy(bo_list); 3416 CU_ASSERT_EQUAL(r, 0); 3417 3418 fence_status.ip_type = AMDGPU_HW_IP_GFX; 3419 fence_status.ip_instance = 0; 3420 fence_status.ring = ring_id; 3421 fence_status.context = context_handle; 3422 fence_status.fence = ibs_request.seq_no; 3423 3424 /* wait for IB accomplished */ 3425 r = amdgpu_cs_query_fence_status(&fence_status, 3426 AMDGPU_TIMEOUT_INFINITE, 3427 0, &expired); 3428 CU_ASSERT_EQUAL(r, 0); 3429 CU_ASSERT_EQUAL(expired, true); 3430 3431 /* verify if memset test result meets with expected */ 3432 i = 0; 3433 while(i < bo_dst_size) { 3434 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33); 3435 } 3436 3437 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 3438 CU_ASSERT_EQUAL(r, 0); 3439 3440 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 3441 CU_ASSERT_EQUAL(r, 0); 3442 3443 r = amdgpu_cs_ctx_free(context_handle); 3444 CU_ASSERT_EQUAL(r, 0); 3445} 3446 3447static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, 3448 uint32_t ring) 3449{ 3450 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 3451 void *ptr_shader_ps; 3452 void *ptr_shader_vs; 3453 uint64_t mc_address_shader_ps, mc_address_shader_vs; 3454 amdgpu_va_handle va_shader_ps, va_shader_vs; 3455 int r; 3456 int bo_shader_size = 4096; 3457 3458 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3459 AMDGPU_GEM_DOMAIN_VRAM, 0, 3460 &bo_shader_ps, &ptr_shader_ps, 3461 &mc_address_shader_ps, &va_shader_ps); 3462 CU_ASSERT_EQUAL(r, 0); 3463 memset(ptr_shader_ps, 0, bo_shader_size); 3464 3465 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3466 AMDGPU_GEM_DOMAIN_VRAM, 0, 3467 &bo_shader_vs, &ptr_shader_vs, 3468 &mc_address_shader_vs, &va_shader_vs); 3469 CU_ASSERT_EQUAL(r, 0); 3470 memset(ptr_shader_vs, 0, bo_shader_size); 3471 3472 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST); 3473 CU_ASSERT_EQUAL(r, 0); 3474 3475 r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 3476 CU_ASSERT_EQUAL(r, 0); 3477 3478 amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, 3479 mc_address_shader_ps, mc_address_shader_vs, ring); 3480 3481 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 3482 CU_ASSERT_EQUAL(r, 0); 3483 3484 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 3485 CU_ASSERT_EQUAL(r, 0); 3486} 3487 3488static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, 3489 amdgpu_bo_handle bo_shader_ps, 3490 amdgpu_bo_handle bo_shader_vs, 3491 uint64_t mc_address_shader_ps, 3492 uint64_t mc_address_shader_vs, 3493 uint32_t ring, int hang) 3494{ 3495 amdgpu_context_handle context_handle; 3496 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 3497 volatile unsigned char *ptr_dst; 3498 unsigned char *ptr_src; 3499 uint32_t *ptr_cmd; 3500 uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 3501 amdgpu_va_handle va_dst, va_src, va_cmd; 3502 int i, r; 3503 int bo_size = 16384; 3504 int bo_cmd_size = 4096; 3505 struct amdgpu_cs_request ibs_request = {0}; 3506 struct amdgpu_cs_ib_info ib_info= {0}; 3507 uint32_t hang_state, hangs; 3508 uint32_t expired; 3509 amdgpu_bo_list_handle bo_list; 3510 struct amdgpu_cs_fence fence_status = {0}; 3511 3512 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3513 CU_ASSERT_EQUAL(r, 0); 3514 3515 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 3516 AMDGPU_GEM_DOMAIN_GTT, 0, 3517 &bo_cmd, (void **)&ptr_cmd, 3518 &mc_address_cmd, &va_cmd); 3519 CU_ASSERT_EQUAL(r, 0); 3520 memset(ptr_cmd, 0, bo_cmd_size); 3521 3522 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3523 AMDGPU_GEM_DOMAIN_VRAM, 0, 3524 &bo_src, (void **)&ptr_src, 3525 &mc_address_src, &va_src); 3526 CU_ASSERT_EQUAL(r, 0); 3527 3528 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3529 AMDGPU_GEM_DOMAIN_VRAM, 0, 3530 &bo_dst, (void **)&ptr_dst, 3531 &mc_address_dst, &va_dst); 3532 CU_ASSERT_EQUAL(r, 0); 3533 3534 memset(ptr_src, 0x55, bo_size); 3535 3536 i = 0; 3537 i += amdgpu_draw_init(ptr_cmd + i); 3538 3539 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); 3540 3541 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); 3542 3543 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0); 3544 3545 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 3546 3547 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 3548 ptr_cmd[i++] = 0xc; 3549 ptr_cmd[i++] = mc_address_src >> 8; 3550 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 3551 ptr_cmd[i++] = 0x7c01f; 3552 ptr_cmd[i++] = 0x90500fac; 3553 ptr_cmd[i++] = 0x3e000; 3554 i += 3; 3555 3556 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3557 ptr_cmd[i++] = 0x14; 3558 ptr_cmd[i++] = 0x92; 3559 i += 3; 3560 3561 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3562 ptr_cmd[i++] = 0x191; 3563 ptr_cmd[i++] = 0; 3564 3565 i += amdgpu_draw_draw(ptr_cmd + i); 3566 3567 while (i & 7) 3568 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3569 3570 resources[0] = bo_dst; 3571 resources[1] = bo_src; 3572 resources[2] = bo_shader_ps; 3573 resources[3] = bo_shader_vs; 3574 resources[4] = bo_cmd; 3575 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 3576 CU_ASSERT_EQUAL(r, 0); 3577 3578 ib_info.ib_mc_address = mc_address_cmd; 3579 ib_info.size = i; 3580 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3581 ibs_request.ring = ring; 3582 ibs_request.resources = bo_list; 3583 ibs_request.number_of_ibs = 1; 3584 ibs_request.ibs = &ib_info; 3585 ibs_request.fence_info.handle = NULL; 3586 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3587 CU_ASSERT_EQUAL(r, 0); 3588 3589 fence_status.ip_type = AMDGPU_HW_IP_GFX; 3590 fence_status.ip_instance = 0; 3591 fence_status.ring = ring; 3592 fence_status.context = context_handle; 3593 fence_status.fence = ibs_request.seq_no; 3594 3595 /* wait for IB accomplished */ 3596 r = amdgpu_cs_query_fence_status(&fence_status, 3597 AMDGPU_TIMEOUT_INFINITE, 3598 0, &expired); 3599 if (!hang) { 3600 CU_ASSERT_EQUAL(r, 0); 3601 CU_ASSERT_EQUAL(expired, true); 3602 3603 /* verify if memcpy test result meets with expected */ 3604 i = 0; 3605 while(i < bo_size) { 3606 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 3607 i++; 3608 } 3609 } else { 3610 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 3611 CU_ASSERT_EQUAL(r, 0); 3612 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 3613 } 3614 3615 r = amdgpu_bo_list_destroy(bo_list); 3616 CU_ASSERT_EQUAL(r, 0); 3617 3618 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 3619 CU_ASSERT_EQUAL(r, 0); 3620 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 3621 CU_ASSERT_EQUAL(r, 0); 3622 3623 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 3624 CU_ASSERT_EQUAL(r, 0); 3625 3626 r = amdgpu_cs_ctx_free(context_handle); 3627 CU_ASSERT_EQUAL(r, 0); 3628} 3629 3630void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, 3631 int hang) 3632{ 3633 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 3634 void *ptr_shader_ps; 3635 void *ptr_shader_vs; 3636 uint64_t mc_address_shader_ps, mc_address_shader_vs; 3637 amdgpu_va_handle va_shader_ps, va_shader_vs; 3638 int bo_shader_size = 4096; 3639 enum ps_type ps_type = hang ? PS_HANG : PS_TEX; 3640 int r; 3641 3642 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3643 AMDGPU_GEM_DOMAIN_VRAM, 0, 3644 &bo_shader_ps, &ptr_shader_ps, 3645 &mc_address_shader_ps, &va_shader_ps); 3646 CU_ASSERT_EQUAL(r, 0); 3647 memset(ptr_shader_ps, 0, bo_shader_size); 3648 3649 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3650 AMDGPU_GEM_DOMAIN_VRAM, 0, 3651 &bo_shader_vs, &ptr_shader_vs, 3652 &mc_address_shader_vs, &va_shader_vs); 3653 CU_ASSERT_EQUAL(r, 0); 3654 memset(ptr_shader_vs, 0, bo_shader_size); 3655 3656 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type); 3657 CU_ASSERT_EQUAL(r, 0); 3658 3659 r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 3660 CU_ASSERT_EQUAL(r, 0); 3661 3662 amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, 3663 mc_address_shader_ps, mc_address_shader_vs, ring, hang); 3664 3665 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 3666 CU_ASSERT_EQUAL(r, 0); 3667 3668 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 3669 CU_ASSERT_EQUAL(r, 0); 3670} 3671 3672static void amdgpu_draw_test(void) 3673{ 3674 int r; 3675 struct drm_amdgpu_info_hw_ip info; 3676 uint32_t ring_id; 3677 3678 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 3679 CU_ASSERT_EQUAL(r, 0); 3680 if (!info.available_rings) 3681 printf("SKIP ... as there's no graphics ring\n"); 3682 3683 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 3684 amdgpu_memset_draw_test(device_handle, ring_id); 3685 amdgpu_memcpy_draw_test(device_handle, ring_id, 0); 3686 } 3687} 3688 3689void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring) 3690{ 3691 amdgpu_context_handle context_handle; 3692 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 3693 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 3694 void *ptr_shader_ps; 3695 void *ptr_shader_vs; 3696 volatile unsigned char *ptr_dst; 3697 unsigned char *ptr_src; 3698 uint32_t *ptr_cmd; 3699 uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 3700 uint64_t mc_address_shader_ps, mc_address_shader_vs; 3701 amdgpu_va_handle va_shader_ps, va_shader_vs; 3702 amdgpu_va_handle va_dst, va_src, va_cmd; 3703 struct amdgpu_gpu_info gpu_info = {0}; 3704 int i, r; 3705 int bo_size = 0x4000000; 3706 int bo_shader_ps_size = 0x400000; 3707 int bo_shader_vs_size = 4096; 3708 int bo_cmd_size = 4096; 3709 struct amdgpu_cs_request ibs_request = {0}; 3710 struct amdgpu_cs_ib_info ib_info= {0}; 3711 uint32_t hang_state, hangs, expired; 3712 amdgpu_bo_list_handle bo_list; 3713 struct amdgpu_cs_fence fence_status = {0}; 3714 3715 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 3716 CU_ASSERT_EQUAL(r, 0); 3717 3718 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3719 CU_ASSERT_EQUAL(r, 0); 3720 3721 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 3722 AMDGPU_GEM_DOMAIN_GTT, 0, 3723 &bo_cmd, (void **)&ptr_cmd, 3724 &mc_address_cmd, &va_cmd); 3725 CU_ASSERT_EQUAL(r, 0); 3726 memset(ptr_cmd, 0, bo_cmd_size); 3727 3728 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096, 3729 AMDGPU_GEM_DOMAIN_VRAM, 0, 3730 &bo_shader_ps, &ptr_shader_ps, 3731 &mc_address_shader_ps, &va_shader_ps); 3732 CU_ASSERT_EQUAL(r, 0); 3733 memset(ptr_shader_ps, 0, bo_shader_ps_size); 3734 3735 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096, 3736 AMDGPU_GEM_DOMAIN_VRAM, 0, 3737 &bo_shader_vs, &ptr_shader_vs, 3738 &mc_address_shader_vs, &va_shader_vs); 3739 CU_ASSERT_EQUAL(r, 0); 3740 memset(ptr_shader_vs, 0, bo_shader_vs_size); 3741 3742 r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id); 3743 CU_ASSERT_EQUAL(r, 0); 3744 3745 r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 3746 CU_ASSERT_EQUAL(r, 0); 3747 3748 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3749 AMDGPU_GEM_DOMAIN_VRAM, 0, 3750 &bo_src, (void **)&ptr_src, 3751 &mc_address_src, &va_src); 3752 CU_ASSERT_EQUAL(r, 0); 3753 3754 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3755 AMDGPU_GEM_DOMAIN_VRAM, 0, 3756 &bo_dst, (void **)&ptr_dst, 3757 &mc_address_dst, &va_dst); 3758 CU_ASSERT_EQUAL(r, 0); 3759 3760 memset(ptr_src, 0x55, bo_size); 3761 3762 i = 0; 3763 i += amdgpu_draw_init(ptr_cmd + i); 3764 3765 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1); 3766 3767 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1); 3768 3769 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, 3770 mc_address_shader_vs, 1); 3771 3772 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 3773 3774 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 3775 ptr_cmd[i++] = 0xc; 3776 ptr_cmd[i++] = mc_address_src >> 8; 3777 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 3778 ptr_cmd[i++] = 0x1ffc7ff; 3779 ptr_cmd[i++] = 0x90500fac; 3780 ptr_cmd[i++] = 0xffe000; 3781 i += 3; 3782 3783 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3784 ptr_cmd[i++] = 0x14; 3785 ptr_cmd[i++] = 0x92; 3786 i += 3; 3787 3788 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3789 ptr_cmd[i++] = 0x191; 3790 ptr_cmd[i++] = 0; 3791 3792 i += amdgpu_draw_draw(ptr_cmd + i); 3793 3794 while (i & 7) 3795 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3796 3797 resources[0] = bo_dst; 3798 resources[1] = bo_src; 3799 resources[2] = bo_shader_ps; 3800 resources[3] = bo_shader_vs; 3801 resources[4] = bo_cmd; 3802 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 3803 CU_ASSERT_EQUAL(r, 0); 3804 3805 ib_info.ib_mc_address = mc_address_cmd; 3806 ib_info.size = i; 3807 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3808 ibs_request.ring = ring; 3809 ibs_request.resources = bo_list; 3810 ibs_request.number_of_ibs = 1; 3811 ibs_request.ibs = &ib_info; 3812 ibs_request.fence_info.handle = NULL; 3813 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3814 CU_ASSERT_EQUAL(r, 0); 3815 3816 fence_status.ip_type = AMDGPU_HW_IP_GFX; 3817 fence_status.ip_instance = 0; 3818 fence_status.ring = ring; 3819 fence_status.context = context_handle; 3820 fence_status.fence = ibs_request.seq_no; 3821 3822 /* wait for IB accomplished */ 3823 r = amdgpu_cs_query_fence_status(&fence_status, 3824 AMDGPU_TIMEOUT_INFINITE, 3825 0, &expired); 3826 3827 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 3828 CU_ASSERT_EQUAL(r, 0); 3829 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 3830 3831 r = amdgpu_bo_list_destroy(bo_list); 3832 CU_ASSERT_EQUAL(r, 0); 3833 3834 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 3835 CU_ASSERT_EQUAL(r, 0); 3836 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 3837 CU_ASSERT_EQUAL(r, 0); 3838 3839 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 3840 CU_ASSERT_EQUAL(r, 0); 3841 3842 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size); 3843 CU_ASSERT_EQUAL(r, 0); 3844 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size); 3845 CU_ASSERT_EQUAL(r, 0); 3846 3847 r = amdgpu_cs_ctx_free(context_handle); 3848 CU_ASSERT_EQUAL(r, 0); 3849} 3850 3851static void amdgpu_gpu_reset_test(void) 3852{ 3853 int r; 3854 char debugfs_path[256], tmp[10]; 3855 int fd; 3856 struct stat sbuf; 3857 amdgpu_context_handle context_handle; 3858 uint32_t hang_state, hangs; 3859 3860 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3861 CU_ASSERT_EQUAL(r, 0); 3862 3863 r = fstat(drm_amdgpu[0], &sbuf); 3864 CU_ASSERT_EQUAL(r, 0); 3865 3866 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev)); 3867 fd = open(debugfs_path, O_RDONLY); 3868 CU_ASSERT(fd >= 0); 3869 3870 r = read(fd, tmp, sizeof(tmp)/sizeof(char)); 3871 CU_ASSERT(r > 0); 3872 3873 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 3874 CU_ASSERT_EQUAL(r, 0); 3875 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 3876 3877 close(fd); 3878 r = amdgpu_cs_ctx_free(context_handle); 3879 CU_ASSERT_EQUAL(r, 0); 3880 3881 amdgpu_compute_dispatch_test(); 3882 amdgpu_gfx_dispatch_test(); 3883} 3884