basic_tests.c revision 41687f09
1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22*/ 23 24#include <stdio.h> 25#include <stdlib.h> 26#include <unistd.h> 27#include <sys/types.h> 28#ifdef MAJOR_IN_SYSMACROS 29#include <sys/sysmacros.h> 30#endif 31#include <sys/stat.h> 32#include <fcntl.h> 33#if HAVE_ALLOCA_H 34# include <alloca.h> 35#endif 36#include <sys/wait.h> 37 38#include "CUnit/Basic.h" 39 40#include "amdgpu_test.h" 41#include "amdgpu_drm.h" 42#include "amdgpu_internal.h" 43#include "util_math.h" 44 45static amdgpu_device_handle device_handle; 46static uint32_t major_version; 47static uint32_t minor_version; 48static uint32_t family_id; 49 50static void amdgpu_query_info_test(void); 51static void amdgpu_command_submission_gfx(void); 52static void amdgpu_command_submission_compute(void); 53static void amdgpu_command_submission_multi_fence(void); 54static void amdgpu_command_submission_sdma(void); 55static void amdgpu_userptr_test(void); 56static void amdgpu_semaphore_test(void); 57static void amdgpu_sync_dependency_test(void); 58static void amdgpu_bo_eviction_test(void); 59static void amdgpu_compute_dispatch_test(void); 60static void amdgpu_gfx_dispatch_test(void); 61static void amdgpu_draw_test(void); 62static void amdgpu_gpu_reset_test(void); 63 64static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 65static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 66static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 67static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 68 unsigned ip_type, 69 int instance, int pm4_dw, uint32_t *pm4_src, 70 int res_cnt, amdgpu_bo_handle *resources, 71 struct amdgpu_cs_ib_info *ib_info, 72 struct amdgpu_cs_request *ibs_request); 73 74CU_TestInfo basic_tests[] = { 75 { "Query Info Test", amdgpu_query_info_test }, 76 { "Userptr Test", amdgpu_userptr_test }, 77 { "bo eviction Test", amdgpu_bo_eviction_test }, 78 { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 79 { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 80 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 81 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 82 { "SW semaphore Test", amdgpu_semaphore_test }, 83 { "Sync dependency Test", amdgpu_sync_dependency_test }, 84 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test }, 85 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test }, 86 { "Draw Test", amdgpu_draw_test }, 87 { "GPU reset Test", amdgpu_gpu_reset_test }, 88 CU_TEST_INFO_NULL, 89}; 90#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize())) 91#define SDMA_PKT_HEADER_op_offset 0 92#define SDMA_PKT_HEADER_op_mask 0x000000FF 93#define SDMA_PKT_HEADER_op_shift 0 94#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 95#define SDMA_OPCODE_CONSTANT_FILL 11 96# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 97 /* 0 = byte fill 98 * 2 = DW fill 99 */ 100#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 101 (((sub_op) & 0xFF) << 8) | \ 102 (((op) & 0xFF) << 0)) 103#define SDMA_OPCODE_WRITE 2 104# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 105# define SDMA_WRTIE_SUB_OPCODE_TILED 1 106 107#define SDMA_OPCODE_COPY 1 108# define SDMA_COPY_SUB_OPCODE_LINEAR 0 109 110#define SDMA_OPCODE_ATOMIC 10 111# define SDMA_ATOMIC_LOOP(x) ((x) << 0) 112 /* 0 - single_pass_atomic. 113 * 1 - loop_until_compare_satisfied. 114 */ 115# define SDMA_ATOMIC_TMZ(x) ((x) << 2) 116 /* 0 - non-TMZ. 117 * 1 - TMZ. 118 */ 119# define SDMA_ATOMIC_OPCODE(x) ((x) << 9) 120 /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 121 * same as Packet 3 122 */ 123 124#define GFX_COMPUTE_NOP 0xffff1000 125#define SDMA_NOP 0x0 126 127/* PM4 */ 128#define PACKET_TYPE0 0 129#define PACKET_TYPE1 1 130#define PACKET_TYPE2 2 131#define PACKET_TYPE3 3 132 133#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 134#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 135#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 136#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 137#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 138 ((reg) & 0xFFFF) | \ 139 ((n) & 0x3FFF) << 16) 140#define CP_PACKET2 0x80000000 141#define PACKET2_PAD_SHIFT 0 142#define PACKET2_PAD_MASK (0x3fffffff << 0) 143 144#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 145 146#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 147 (((op) & 0xFF) << 8) | \ 148 ((n) & 0x3FFF) << 16) 149#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) 150 151/* Packet 3 types */ 152#define PACKET3_NOP 0x10 153 154#define PACKET3_WRITE_DATA 0x37 155#define WRITE_DATA_DST_SEL(x) ((x) << 8) 156 /* 0 - register 157 * 1 - memory (sync - via GRBM) 158 * 2 - gl2 159 * 3 - gds 160 * 4 - reserved 161 * 5 - memory (async - direct) 162 */ 163#define WR_ONE_ADDR (1 << 16) 164#define WR_CONFIRM (1 << 20) 165#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 166 /* 0 - LRU 167 * 1 - Stream 168 */ 169#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 170 /* 0 - me 171 * 1 - pfp 172 * 2 - ce 173 */ 174 175#define PACKET3_ATOMIC_MEM 0x1E 176#define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 177#define ATOMIC_MEM_COMMAND(x) ((x) << 8) 178 /* 0 - single_pass_atomic. 179 * 1 - loop_until_compare_satisfied. 180 */ 181#define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25) 182 /* 0 - lru. 183 * 1 - stream. 184 */ 185#define ATOMIC_MEM_ENGINESEL(x) ((x) << 30) 186 /* 0 - micro_engine. 187 */ 188 189#define PACKET3_DMA_DATA 0x50 190/* 1. header 191 * 2. CONTROL 192 * 3. SRC_ADDR_LO or DATA [31:0] 193 * 4. SRC_ADDR_HI [31:0] 194 * 5. DST_ADDR_LO [31:0] 195 * 6. DST_ADDR_HI [7:0] 196 * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 197 */ 198/* CONTROL */ 199# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 200 /* 0 - ME 201 * 1 - PFP 202 */ 203# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 204 /* 0 - LRU 205 * 1 - Stream 206 * 2 - Bypass 207 */ 208# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 209# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 210 /* 0 - DST_ADDR using DAS 211 * 1 - GDS 212 * 3 - DST_ADDR using L2 213 */ 214# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 215 /* 0 - LRU 216 * 1 - Stream 217 * 2 - Bypass 218 */ 219# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 220# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 221 /* 0 - SRC_ADDR using SAS 222 * 1 - GDS 223 * 2 - DATA 224 * 3 - SRC_ADDR using L2 225 */ 226# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 227/* COMMAND */ 228# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 229# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 230 /* 0 - none 231 * 1 - 8 in 16 232 * 2 - 8 in 32 233 * 3 - 8 in 64 234 */ 235# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 236 /* 0 - none 237 * 1 - 8 in 16 238 * 2 - 8 in 32 239 * 3 - 8 in 64 240 */ 241# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 242 /* 0 - memory 243 * 1 - register 244 */ 245# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 246 /* 0 - memory 247 * 1 - register 248 */ 249# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 250# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 251# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 252 253#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 254 (((b) & 0x1) << 26) | \ 255 (((t) & 0x1) << 23) | \ 256 (((s) & 0x1) << 22) | \ 257 (((cnt) & 0xFFFFF) << 0)) 258#define SDMA_OPCODE_COPY_SI 3 259#define SDMA_OPCODE_CONSTANT_FILL_SI 13 260#define SDMA_NOP_SI 0xf 261#define GFX_COMPUTE_NOP_SI 0x80000000 262#define PACKET3_DMA_DATA_SI 0x41 263# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 264 /* 0 - ME 265 * 1 - PFP 266 */ 267# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 268 /* 0 - DST_ADDR using DAS 269 * 1 - GDS 270 * 3 - DST_ADDR using L2 271 */ 272# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 273 /* 0 - SRC_ADDR using SAS 274 * 1 - GDS 275 * 2 - DATA 276 * 3 - SRC_ADDR using L2 277 */ 278# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 279 280 281#define PKT3_CONTEXT_CONTROL 0x28 282#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 283#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 284#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 285 286#define PKT3_CLEAR_STATE 0x12 287 288#define PKT3_SET_SH_REG 0x76 289#define PACKET3_SET_SH_REG_START 0x00002c00 290 291#define PACKET3_DISPATCH_DIRECT 0x15 292#define PACKET3_EVENT_WRITE 0x46 293#define PACKET3_ACQUIRE_MEM 0x58 294#define PACKET3_SET_CONTEXT_REG 0x69 295#define PACKET3_SET_UCONFIG_REG 0x79 296#define PACKET3_DRAW_INDEX_AUTO 0x2D 297/* gfx 8 */ 298#define mmCOMPUTE_PGM_LO 0x2e0c 299#define mmCOMPUTE_PGM_RSRC1 0x2e12 300#define mmCOMPUTE_TMPRING_SIZE 0x2e18 301#define mmCOMPUTE_USER_DATA_0 0x2e40 302#define mmCOMPUTE_USER_DATA_1 0x2e41 303#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 304#define mmCOMPUTE_NUM_THREAD_X 0x2e07 305 306 307 308#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 309 ((num & 0x0000ff00) << 8) | \ 310 ((num & 0x00ff0000) >> 8) | \ 311 ((num & 0x000000ff) << 24)) 312 313 314/* Shader code 315 * void main() 316{ 317 318 float x = some_input; 319 for (unsigned i = 0; i < 1000000; i++) 320 x = sin(x); 321 322 u[0] = 42u; 323} 324*/ 325 326static uint32_t shader_bin[] = { 327 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 328 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 329 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 330 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 331}; 332 333#define CODE_OFFSET 512 334#define DATA_OFFSET 1024 335 336enum cs_type { 337 CS_BUFFERCLEAR, 338 CS_BUFFERCOPY, 339 CS_HANG, 340 CS_HANG_SLOW 341}; 342 343static const uint32_t bufferclear_cs_shader_gfx9[] = { 344 0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205, 345 0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100, 346 0xBF810000 347}; 348 349static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { 350 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, 351 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, 352 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, 353 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, 354 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } 355}; 356 357static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5; 358 359static const uint32_t buffercopy_cs_shader_gfx9[] = { 360 0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100, 361 0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000 362}; 363 364static const uint32_t preamblecache_gfx9[] = { 365 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 366 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 367 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 368 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 369 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 370 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 371 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 372 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 373 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20, 374 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 375 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, 376 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 377 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 378 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 379 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 380 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 381 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 382 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, 383 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 384 0xc0017900, 0x24b, 0x0 385}; 386 387enum ps_type { 388 PS_CONST, 389 PS_TEX, 390 PS_HANG, 391 PS_HANG_SLOW 392}; 393 394static const uint32_t ps_const_shader_gfx9[] = { 395 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 396 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 397 0xC4001C0F, 0x00000100, 0xBF810000 398}; 399 400static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; 401 402static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { 403 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 404 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, 405 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, 406 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, 407 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, 408 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, 409 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, 410 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, 411 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, 412 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } 413 } 414}; 415 416static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { 417 0x00000004 418}; 419 420static const uint32_t ps_num_sh_registers_gfx9 = 2; 421 422static const uint32_t ps_const_sh_registers_gfx9[][2] = { 423 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, 424 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 425}; 426 427static const uint32_t ps_num_context_registers_gfx9 = 7; 428 429static const uint32_t ps_const_context_reg_gfx9[][2] = { 430 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 431 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, 432 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 433 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 434 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 435 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 436 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 437}; 438 439static const uint32_t ps_tex_shader_gfx9[] = { 440 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 441 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, 442 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, 443 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 444 0x00000100, 0xBF810000 445}; 446 447static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { 448 0x0000000B 449}; 450 451static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; 452 453static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { 454 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 455 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, 456 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, 457 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, 458 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 459 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 460 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 461 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 462 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 463 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } 464 } 465}; 466 467static const uint32_t ps_tex_sh_registers_gfx9[][2] = { 468 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, 469 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } 470}; 471 472static const uint32_t ps_tex_context_reg_gfx9[][2] = { 473 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 474 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, 475 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 476 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 477 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 478 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 479 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 480}; 481 482static const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 483 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 484 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, 485 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, 486 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, 487 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, 488 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, 489 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, 490 0xC400020F, 0x05060403, 0xBF810000 491}; 492 493static const uint32_t cached_cmd_gfx9[] = { 494 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 495 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 496 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 497 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12, 498 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 499 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 500 0xc0026900, 0x292, 0x20, 0x60201b8, 501 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 502}; 503 504unsigned int memcpy_ps_hang[] = { 505 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100, 506 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001, 507 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002, 508 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000, 509 0xF800180F, 0x03020100, 0xBF810000 510}; 511 512struct amdgpu_test_shader { 513 uint32_t *shader; 514 uint32_t header_length; 515 uint32_t body_length; 516 uint32_t foot_length; 517}; 518 519unsigned int memcpy_cs_hang_slow_ai_codes[] = { 520 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100, 521 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000 522}; 523 524struct amdgpu_test_shader memcpy_cs_hang_slow_ai = { 525 memcpy_cs_hang_slow_ai_codes, 526 4, 527 3, 528 1 529}; 530 531unsigned int memcpy_cs_hang_slow_rv_codes[] = { 532 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100, 533 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000 534}; 535 536struct amdgpu_test_shader memcpy_cs_hang_slow_rv = { 537 memcpy_cs_hang_slow_rv_codes, 538 4, 539 3, 540 1 541}; 542 543unsigned int memcpy_ps_hang_slow_ai_codes[] = { 544 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000, 545 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00, 546 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000, 547 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f, 548 0x03020100, 0xbf810000 549}; 550 551struct amdgpu_test_shader memcpy_ps_hang_slow_ai = { 552 memcpy_ps_hang_slow_ai_codes, 553 7, 554 2, 555 9 556}; 557 558int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, 559 unsigned alignment, unsigned heap, uint64_t alloc_flags, 560 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, 561 uint64_t *mc_address, 562 amdgpu_va_handle *va_handle) 563{ 564 struct amdgpu_bo_alloc_request request = {}; 565 amdgpu_bo_handle buf_handle; 566 amdgpu_va_handle handle; 567 uint64_t vmc_addr; 568 int r; 569 570 request.alloc_size = size; 571 request.phys_alignment = alignment; 572 request.preferred_heap = heap; 573 request.flags = alloc_flags; 574 575 r = amdgpu_bo_alloc(dev, &request, &buf_handle); 576 if (r) 577 return r; 578 579 r = amdgpu_va_range_alloc(dev, 580 amdgpu_gpu_va_range_general, 581 size, alignment, 0, &vmc_addr, 582 &handle, 0); 583 if (r) 584 goto error_va_alloc; 585 586 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr, 587 AMDGPU_VM_PAGE_READABLE | 588 AMDGPU_VM_PAGE_WRITEABLE | 589 AMDGPU_VM_PAGE_EXECUTABLE | 590 mapping_flags, 591 AMDGPU_VA_OP_MAP); 592 if (r) 593 goto error_va_map; 594 595 r = amdgpu_bo_cpu_map(buf_handle, cpu); 596 if (r) 597 goto error_cpu_map; 598 599 *bo = buf_handle; 600 *mc_address = vmc_addr; 601 *va_handle = handle; 602 603 return 0; 604 605 error_cpu_map: 606 amdgpu_bo_cpu_unmap(buf_handle); 607 608 error_va_map: 609 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 610 611 error_va_alloc: 612 amdgpu_bo_free(buf_handle); 613 return r; 614} 615 616 617 618CU_BOOL suite_basic_tests_enable(void) 619{ 620 uint32_t asic_id; 621 622 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, 623 &minor_version, &device_handle)) 624 return CU_FALSE; 625 626 asic_id = device_handle->info.asic_id; 627 628 if (amdgpu_device_deinitialize(device_handle)) 629 return CU_FALSE; 630 631 /* disable gfx engine basic test cases for Arturus due to no CPG */ 632 if (asic_is_arcturus(asic_id)) { 633 if (amdgpu_set_test_active("Basic Tests", 634 "Command submission Test (GFX)", 635 CU_FALSE)) 636 fprintf(stderr, "test deactivation failed - %s\n", 637 CU_get_error_msg()); 638 639 if (amdgpu_set_test_active("Basic Tests", 640 "Command submission Test (Multi-Fence)", 641 CU_FALSE)) 642 fprintf(stderr, "test deactivation failed - %s\n", 643 CU_get_error_msg()); 644 645 if (amdgpu_set_test_active("Basic Tests", 646 "Sync dependency Test", 647 CU_FALSE)) 648 fprintf(stderr, "test deactivation failed - %s\n", 649 CU_get_error_msg()); 650 } 651 652 return CU_TRUE; 653} 654 655int suite_basic_tests_init(void) 656{ 657 struct amdgpu_gpu_info gpu_info = {0}; 658 int r; 659 660 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 661 &minor_version, &device_handle); 662 663 if (r) { 664 if ((r == -EACCES) && (errno == EACCES)) 665 printf("\n\nError:%s. " 666 "Hint:Try to run this test program as root.", 667 strerror(errno)); 668 return CUE_SINIT_FAILED; 669 } 670 671 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 672 if (r) 673 return CUE_SINIT_FAILED; 674 675 family_id = gpu_info.family_id; 676 677 return CUE_SUCCESS; 678} 679 680int suite_basic_tests_clean(void) 681{ 682 int r = amdgpu_device_deinitialize(device_handle); 683 684 if (r == 0) 685 return CUE_SUCCESS; 686 else 687 return CUE_SCLEAN_FAILED; 688} 689 690static void amdgpu_query_info_test(void) 691{ 692 struct amdgpu_gpu_info gpu_info = {0}; 693 uint32_t version, feature; 694 int r; 695 696 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 697 CU_ASSERT_EQUAL(r, 0); 698 699 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 700 0, &version, &feature); 701 CU_ASSERT_EQUAL(r, 0); 702} 703 704static void amdgpu_command_submission_gfx_separate_ibs(void) 705{ 706 amdgpu_context_handle context_handle; 707 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 708 void *ib_result_cpu, *ib_result_ce_cpu; 709 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 710 struct amdgpu_cs_request ibs_request = {0}; 711 struct amdgpu_cs_ib_info ib_info[2]; 712 struct amdgpu_cs_fence fence_status = {0}; 713 uint32_t *ptr; 714 uint32_t expired; 715 amdgpu_bo_list_handle bo_list; 716 amdgpu_va_handle va_handle, va_handle_ce; 717 int r, i = 0; 718 719 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 720 CU_ASSERT_EQUAL(r, 0); 721 722 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 723 AMDGPU_GEM_DOMAIN_GTT, 0, 724 &ib_result_handle, &ib_result_cpu, 725 &ib_result_mc_address, &va_handle); 726 CU_ASSERT_EQUAL(r, 0); 727 728 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 729 AMDGPU_GEM_DOMAIN_GTT, 0, 730 &ib_result_ce_handle, &ib_result_ce_cpu, 731 &ib_result_ce_mc_address, &va_handle_ce); 732 CU_ASSERT_EQUAL(r, 0); 733 734 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 735 ib_result_ce_handle, &bo_list); 736 CU_ASSERT_EQUAL(r, 0); 737 738 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 739 740 /* IT_SET_CE_DE_COUNTERS */ 741 ptr = ib_result_ce_cpu; 742 if (family_id != AMDGPU_FAMILY_SI) { 743 ptr[i++] = 0xc0008900; 744 ptr[i++] = 0; 745 } 746 ptr[i++] = 0xc0008400; 747 ptr[i++] = 1; 748 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 749 ib_info[0].size = i; 750 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 751 752 /* IT_WAIT_ON_CE_COUNTER */ 753 ptr = ib_result_cpu; 754 ptr[0] = 0xc0008600; 755 ptr[1] = 0x00000001; 756 ib_info[1].ib_mc_address = ib_result_mc_address; 757 ib_info[1].size = 2; 758 759 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 760 ibs_request.number_of_ibs = 2; 761 ibs_request.ibs = ib_info; 762 ibs_request.resources = bo_list; 763 ibs_request.fence_info.handle = NULL; 764 765 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 766 767 CU_ASSERT_EQUAL(r, 0); 768 769 fence_status.context = context_handle; 770 fence_status.ip_type = AMDGPU_HW_IP_GFX; 771 fence_status.ip_instance = 0; 772 fence_status.fence = ibs_request.seq_no; 773 774 r = amdgpu_cs_query_fence_status(&fence_status, 775 AMDGPU_TIMEOUT_INFINITE, 776 0, &expired); 777 CU_ASSERT_EQUAL(r, 0); 778 779 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 780 ib_result_mc_address, 4096); 781 CU_ASSERT_EQUAL(r, 0); 782 783 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 784 ib_result_ce_mc_address, 4096); 785 CU_ASSERT_EQUAL(r, 0); 786 787 r = amdgpu_bo_list_destroy(bo_list); 788 CU_ASSERT_EQUAL(r, 0); 789 790 r = amdgpu_cs_ctx_free(context_handle); 791 CU_ASSERT_EQUAL(r, 0); 792 793} 794 795static void amdgpu_command_submission_gfx_shared_ib(void) 796{ 797 amdgpu_context_handle context_handle; 798 amdgpu_bo_handle ib_result_handle; 799 void *ib_result_cpu; 800 uint64_t ib_result_mc_address; 801 struct amdgpu_cs_request ibs_request = {0}; 802 struct amdgpu_cs_ib_info ib_info[2]; 803 struct amdgpu_cs_fence fence_status = {0}; 804 uint32_t *ptr; 805 uint32_t expired; 806 amdgpu_bo_list_handle bo_list; 807 amdgpu_va_handle va_handle; 808 int r, i = 0; 809 810 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 811 CU_ASSERT_EQUAL(r, 0); 812 813 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 814 AMDGPU_GEM_DOMAIN_GTT, 0, 815 &ib_result_handle, &ib_result_cpu, 816 &ib_result_mc_address, &va_handle); 817 CU_ASSERT_EQUAL(r, 0); 818 819 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 820 &bo_list); 821 CU_ASSERT_EQUAL(r, 0); 822 823 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 824 825 /* IT_SET_CE_DE_COUNTERS */ 826 ptr = ib_result_cpu; 827 if (family_id != AMDGPU_FAMILY_SI) { 828 ptr[i++] = 0xc0008900; 829 ptr[i++] = 0; 830 } 831 ptr[i++] = 0xc0008400; 832 ptr[i++] = 1; 833 ib_info[0].ib_mc_address = ib_result_mc_address; 834 ib_info[0].size = i; 835 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 836 837 ptr = (uint32_t *)ib_result_cpu + 4; 838 ptr[0] = 0xc0008600; 839 ptr[1] = 0x00000001; 840 ib_info[1].ib_mc_address = ib_result_mc_address + 16; 841 ib_info[1].size = 2; 842 843 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 844 ibs_request.number_of_ibs = 2; 845 ibs_request.ibs = ib_info; 846 ibs_request.resources = bo_list; 847 ibs_request.fence_info.handle = NULL; 848 849 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 850 851 CU_ASSERT_EQUAL(r, 0); 852 853 fence_status.context = context_handle; 854 fence_status.ip_type = AMDGPU_HW_IP_GFX; 855 fence_status.ip_instance = 0; 856 fence_status.fence = ibs_request.seq_no; 857 858 r = amdgpu_cs_query_fence_status(&fence_status, 859 AMDGPU_TIMEOUT_INFINITE, 860 0, &expired); 861 CU_ASSERT_EQUAL(r, 0); 862 863 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 864 ib_result_mc_address, 4096); 865 CU_ASSERT_EQUAL(r, 0); 866 867 r = amdgpu_bo_list_destroy(bo_list); 868 CU_ASSERT_EQUAL(r, 0); 869 870 r = amdgpu_cs_ctx_free(context_handle); 871 CU_ASSERT_EQUAL(r, 0); 872} 873 874static void amdgpu_command_submission_gfx_cp_write_data(void) 875{ 876 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 877} 878 879static void amdgpu_command_submission_gfx_cp_const_fill(void) 880{ 881 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 882} 883 884static void amdgpu_command_submission_gfx_cp_copy_data(void) 885{ 886 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 887} 888 889static void amdgpu_bo_eviction_test(void) 890{ 891 const int sdma_write_length = 1024; 892 const int pm4_dw = 256; 893 amdgpu_context_handle context_handle; 894 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 895 amdgpu_bo_handle *resources; 896 uint32_t *pm4; 897 struct amdgpu_cs_ib_info *ib_info; 898 struct amdgpu_cs_request *ibs_request; 899 uint64_t bo1_mc, bo2_mc; 900 volatile unsigned char *bo1_cpu, *bo2_cpu; 901 int i, j, r, loop1, loop2; 902 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 903 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 904 struct amdgpu_heap_info vram_info, gtt_info; 905 906 pm4 = calloc(pm4_dw, sizeof(*pm4)); 907 CU_ASSERT_NOT_EQUAL(pm4, NULL); 908 909 ib_info = calloc(1, sizeof(*ib_info)); 910 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 911 912 ibs_request = calloc(1, sizeof(*ibs_request)); 913 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 914 915 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 916 CU_ASSERT_EQUAL(r, 0); 917 918 /* prepare resource */ 919 resources = calloc(4, sizeof(amdgpu_bo_handle)); 920 CU_ASSERT_NOT_EQUAL(resources, NULL); 921 922 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 923 0, &vram_info); 924 CU_ASSERT_EQUAL(r, 0); 925 926 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 927 0, >t_info); 928 CU_ASSERT_EQUAL(r, 0); 929 930 if (vram_info.max_allocation > gtt_info.heap_size/3) { 931 vram_info.max_allocation = gtt_info.heap_size/3; 932 gtt_info.max_allocation = vram_info.max_allocation; 933 } 934 935 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 936 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 937 CU_ASSERT_EQUAL(r, 0); 938 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 939 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 940 CU_ASSERT_EQUAL(r, 0); 941 942 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 943 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 944 CU_ASSERT_EQUAL(r, 0); 945 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 946 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 947 CU_ASSERT_EQUAL(r, 0); 948 949 950 951 loop1 = loop2 = 0; 952 /* run 9 circle to test all mapping combination */ 953 while(loop1 < 2) { 954 while(loop2 < 2) { 955 /* allocate UC bo1for sDMA use */ 956 r = amdgpu_bo_alloc_and_map(device_handle, 957 sdma_write_length, 4096, 958 AMDGPU_GEM_DOMAIN_GTT, 959 gtt_flags[loop1], &bo1, 960 (void**)&bo1_cpu, &bo1_mc, 961 &bo1_va_handle); 962 CU_ASSERT_EQUAL(r, 0); 963 964 /* set bo1 */ 965 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 966 967 /* allocate UC bo2 for sDMA use */ 968 r = amdgpu_bo_alloc_and_map(device_handle, 969 sdma_write_length, 4096, 970 AMDGPU_GEM_DOMAIN_GTT, 971 gtt_flags[loop2], &bo2, 972 (void**)&bo2_cpu, &bo2_mc, 973 &bo2_va_handle); 974 CU_ASSERT_EQUAL(r, 0); 975 976 /* clear bo2 */ 977 memset((void*)bo2_cpu, 0, sdma_write_length); 978 979 resources[0] = bo1; 980 resources[1] = bo2; 981 resources[2] = vram_max[loop2]; 982 resources[3] = gtt_max[loop2]; 983 984 /* fulfill PM4: test DMA copy linear */ 985 i = j = 0; 986 if (family_id == AMDGPU_FAMILY_SI) { 987 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 988 sdma_write_length); 989 pm4[i++] = 0xffffffff & bo2_mc; 990 pm4[i++] = 0xffffffff & bo1_mc; 991 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 992 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 993 } else { 994 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 995 if (family_id >= AMDGPU_FAMILY_AI) 996 pm4[i++] = sdma_write_length - 1; 997 else 998 pm4[i++] = sdma_write_length; 999 pm4[i++] = 0; 1000 pm4[i++] = 0xffffffff & bo1_mc; 1001 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1002 pm4[i++] = 0xffffffff & bo2_mc; 1003 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1004 } 1005 1006 amdgpu_test_exec_cs_helper(context_handle, 1007 AMDGPU_HW_IP_DMA, 0, 1008 i, pm4, 1009 4, resources, 1010 ib_info, ibs_request); 1011 1012 /* verify if SDMA test result meets with expected */ 1013 i = 0; 1014 while(i < sdma_write_length) { 1015 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 1016 } 1017 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 1018 sdma_write_length); 1019 CU_ASSERT_EQUAL(r, 0); 1020 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 1021 sdma_write_length); 1022 CU_ASSERT_EQUAL(r, 0); 1023 loop2++; 1024 } 1025 loop2 = 0; 1026 loop1++; 1027 } 1028 amdgpu_bo_free(vram_max[0]); 1029 amdgpu_bo_free(vram_max[1]); 1030 amdgpu_bo_free(gtt_max[0]); 1031 amdgpu_bo_free(gtt_max[1]); 1032 /* clean resources */ 1033 free(resources); 1034 free(ibs_request); 1035 free(ib_info); 1036 free(pm4); 1037 1038 /* end of test */ 1039 r = amdgpu_cs_ctx_free(context_handle); 1040 CU_ASSERT_EQUAL(r, 0); 1041} 1042 1043 1044static void amdgpu_command_submission_gfx(void) 1045{ 1046 /* write data using the CP */ 1047 amdgpu_command_submission_gfx_cp_write_data(); 1048 /* const fill using the CP */ 1049 amdgpu_command_submission_gfx_cp_const_fill(); 1050 /* copy data using the CP */ 1051 amdgpu_command_submission_gfx_cp_copy_data(); 1052 /* separate IB buffers for multi-IB submission */ 1053 amdgpu_command_submission_gfx_separate_ibs(); 1054 /* shared IB buffer for multi-IB submission */ 1055 amdgpu_command_submission_gfx_shared_ib(); 1056} 1057 1058static void amdgpu_semaphore_test(void) 1059{ 1060 amdgpu_context_handle context_handle[2]; 1061 amdgpu_semaphore_handle sem; 1062 amdgpu_bo_handle ib_result_handle[2]; 1063 void *ib_result_cpu[2]; 1064 uint64_t ib_result_mc_address[2]; 1065 struct amdgpu_cs_request ibs_request[2] = {0}; 1066 struct amdgpu_cs_ib_info ib_info[2] = {0}; 1067 struct amdgpu_cs_fence fence_status = {0}; 1068 uint32_t *ptr; 1069 uint32_t expired; 1070 uint32_t sdma_nop, gfx_nop; 1071 amdgpu_bo_list_handle bo_list[2]; 1072 amdgpu_va_handle va_handle[2]; 1073 int r, i; 1074 1075 if (family_id == AMDGPU_FAMILY_SI) { 1076 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 1077 gfx_nop = GFX_COMPUTE_NOP_SI; 1078 } else { 1079 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 1080 gfx_nop = GFX_COMPUTE_NOP; 1081 } 1082 1083 r = amdgpu_cs_create_semaphore(&sem); 1084 CU_ASSERT_EQUAL(r, 0); 1085 for (i = 0; i < 2; i++) { 1086 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 1087 CU_ASSERT_EQUAL(r, 0); 1088 1089 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1090 AMDGPU_GEM_DOMAIN_GTT, 0, 1091 &ib_result_handle[i], &ib_result_cpu[i], 1092 &ib_result_mc_address[i], &va_handle[i]); 1093 CU_ASSERT_EQUAL(r, 0); 1094 1095 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 1096 NULL, &bo_list[i]); 1097 CU_ASSERT_EQUAL(r, 0); 1098 } 1099 1100 /* 1. same context different engine */ 1101 ptr = ib_result_cpu[0]; 1102 ptr[0] = sdma_nop; 1103 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 1104 ib_info[0].size = 1; 1105 1106 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 1107 ibs_request[0].number_of_ibs = 1; 1108 ibs_request[0].ibs = &ib_info[0]; 1109 ibs_request[0].resources = bo_list[0]; 1110 ibs_request[0].fence_info.handle = NULL; 1111 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 1112 CU_ASSERT_EQUAL(r, 0); 1113 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 1114 CU_ASSERT_EQUAL(r, 0); 1115 1116 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 1117 CU_ASSERT_EQUAL(r, 0); 1118 ptr = ib_result_cpu[1]; 1119 ptr[0] = gfx_nop; 1120 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 1121 ib_info[1].size = 1; 1122 1123 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 1124 ibs_request[1].number_of_ibs = 1; 1125 ibs_request[1].ibs = &ib_info[1]; 1126 ibs_request[1].resources = bo_list[1]; 1127 ibs_request[1].fence_info.handle = NULL; 1128 1129 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 1130 CU_ASSERT_EQUAL(r, 0); 1131 1132 fence_status.context = context_handle[0]; 1133 fence_status.ip_type = AMDGPU_HW_IP_GFX; 1134 fence_status.ip_instance = 0; 1135 fence_status.fence = ibs_request[1].seq_no; 1136 r = amdgpu_cs_query_fence_status(&fence_status, 1137 500000000, 0, &expired); 1138 CU_ASSERT_EQUAL(r, 0); 1139 CU_ASSERT_EQUAL(expired, true); 1140 1141 /* 2. same engine different context */ 1142 ptr = ib_result_cpu[0]; 1143 ptr[0] = gfx_nop; 1144 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 1145 ib_info[0].size = 1; 1146 1147 ibs_request[0].ip_type = AMDGPU_HW_IP_GFX; 1148 ibs_request[0].number_of_ibs = 1; 1149 ibs_request[0].ibs = &ib_info[0]; 1150 ibs_request[0].resources = bo_list[0]; 1151 ibs_request[0].fence_info.handle = NULL; 1152 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 1153 CU_ASSERT_EQUAL(r, 0); 1154 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 1155 CU_ASSERT_EQUAL(r, 0); 1156 1157 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem); 1158 CU_ASSERT_EQUAL(r, 0); 1159 ptr = ib_result_cpu[1]; 1160 ptr[0] = gfx_nop; 1161 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 1162 ib_info[1].size = 1; 1163 1164 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 1165 ibs_request[1].number_of_ibs = 1; 1166 ibs_request[1].ibs = &ib_info[1]; 1167 ibs_request[1].resources = bo_list[1]; 1168 ibs_request[1].fence_info.handle = NULL; 1169 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 1170 1171 CU_ASSERT_EQUAL(r, 0); 1172 1173 fence_status.context = context_handle[1]; 1174 fence_status.ip_type = AMDGPU_HW_IP_GFX; 1175 fence_status.ip_instance = 0; 1176 fence_status.fence = ibs_request[1].seq_no; 1177 r = amdgpu_cs_query_fence_status(&fence_status, 1178 500000000, 0, &expired); 1179 CU_ASSERT_EQUAL(r, 0); 1180 CU_ASSERT_EQUAL(expired, true); 1181 1182 for (i = 0; i < 2; i++) { 1183 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 1184 ib_result_mc_address[i], 4096); 1185 CU_ASSERT_EQUAL(r, 0); 1186 1187 r = amdgpu_bo_list_destroy(bo_list[i]); 1188 CU_ASSERT_EQUAL(r, 0); 1189 1190 r = amdgpu_cs_ctx_free(context_handle[i]); 1191 CU_ASSERT_EQUAL(r, 0); 1192 } 1193 1194 r = amdgpu_cs_destroy_semaphore(sem); 1195 CU_ASSERT_EQUAL(r, 0); 1196} 1197 1198static void amdgpu_command_submission_compute_nop(void) 1199{ 1200 amdgpu_context_handle context_handle; 1201 amdgpu_bo_handle ib_result_handle; 1202 void *ib_result_cpu; 1203 uint64_t ib_result_mc_address; 1204 struct amdgpu_cs_request ibs_request; 1205 struct amdgpu_cs_ib_info ib_info; 1206 struct amdgpu_cs_fence fence_status; 1207 uint32_t *ptr; 1208 uint32_t expired; 1209 int r, instance; 1210 amdgpu_bo_list_handle bo_list; 1211 amdgpu_va_handle va_handle; 1212 struct drm_amdgpu_info_hw_ip info; 1213 1214 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 1215 CU_ASSERT_EQUAL(r, 0); 1216 1217 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1218 CU_ASSERT_EQUAL(r, 0); 1219 1220 for (instance = 0; (1 << instance) & info.available_rings; instance++) { 1221 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1222 AMDGPU_GEM_DOMAIN_GTT, 0, 1223 &ib_result_handle, &ib_result_cpu, 1224 &ib_result_mc_address, &va_handle); 1225 CU_ASSERT_EQUAL(r, 0); 1226 1227 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 1228 &bo_list); 1229 CU_ASSERT_EQUAL(r, 0); 1230 1231 ptr = ib_result_cpu; 1232 memset(ptr, 0, 16); 1233 ptr[0]=PACKET3(PACKET3_NOP, 14); 1234 1235 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 1236 ib_info.ib_mc_address = ib_result_mc_address; 1237 ib_info.size = 16; 1238 1239 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 1240 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 1241 ibs_request.ring = instance; 1242 ibs_request.number_of_ibs = 1; 1243 ibs_request.ibs = &ib_info; 1244 ibs_request.resources = bo_list; 1245 ibs_request.fence_info.handle = NULL; 1246 1247 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 1248 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 1249 CU_ASSERT_EQUAL(r, 0); 1250 1251 fence_status.context = context_handle; 1252 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 1253 fence_status.ip_instance = 0; 1254 fence_status.ring = instance; 1255 fence_status.fence = ibs_request.seq_no; 1256 1257 r = amdgpu_cs_query_fence_status(&fence_status, 1258 AMDGPU_TIMEOUT_INFINITE, 1259 0, &expired); 1260 CU_ASSERT_EQUAL(r, 0); 1261 1262 r = amdgpu_bo_list_destroy(bo_list); 1263 CU_ASSERT_EQUAL(r, 0); 1264 1265 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1266 ib_result_mc_address, 4096); 1267 CU_ASSERT_EQUAL(r, 0); 1268 } 1269 1270 r = amdgpu_cs_ctx_free(context_handle); 1271 CU_ASSERT_EQUAL(r, 0); 1272} 1273 1274static void amdgpu_command_submission_compute_cp_write_data(void) 1275{ 1276 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 1277} 1278 1279static void amdgpu_command_submission_compute_cp_const_fill(void) 1280{ 1281 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 1282} 1283 1284static void amdgpu_command_submission_compute_cp_copy_data(void) 1285{ 1286 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 1287} 1288 1289static void amdgpu_command_submission_compute(void) 1290{ 1291 /* write data using the CP */ 1292 amdgpu_command_submission_compute_cp_write_data(); 1293 /* const fill using the CP */ 1294 amdgpu_command_submission_compute_cp_const_fill(); 1295 /* copy data using the CP */ 1296 amdgpu_command_submission_compute_cp_copy_data(); 1297 /* nop test */ 1298 amdgpu_command_submission_compute_nop(); 1299} 1300 1301/* 1302 * caller need create/release: 1303 * pm4_src, resources, ib_info, and ibs_request 1304 * submit command stream described in ibs_request and wait for this IB accomplished 1305 */ 1306void 1307amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle, 1308 amdgpu_context_handle context_handle, 1309 unsigned ip_type, int instance, int pm4_dw, 1310 uint32_t *pm4_src, int res_cnt, 1311 amdgpu_bo_handle *resources, 1312 struct amdgpu_cs_ib_info *ib_info, 1313 struct amdgpu_cs_request *ibs_request, 1314 bool secure) 1315{ 1316 int r; 1317 uint32_t expired; 1318 uint32_t *ring_ptr; 1319 amdgpu_bo_handle ib_result_handle; 1320 void *ib_result_cpu; 1321 uint64_t ib_result_mc_address; 1322 struct amdgpu_cs_fence fence_status = {0}; 1323 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 1324 amdgpu_va_handle va_handle; 1325 1326 /* prepare CS */ 1327 CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 1328 CU_ASSERT_NOT_EQUAL(resources, NULL); 1329 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1330 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1331 CU_ASSERT_TRUE(pm4_dw <= 1024); 1332 1333 /* allocate IB */ 1334 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1335 AMDGPU_GEM_DOMAIN_GTT, 0, 1336 &ib_result_handle, &ib_result_cpu, 1337 &ib_result_mc_address, &va_handle); 1338 CU_ASSERT_EQUAL(r, 0); 1339 1340 /* copy PM4 packet to ring from caller */ 1341 ring_ptr = ib_result_cpu; 1342 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 1343 1344 ib_info->ib_mc_address = ib_result_mc_address; 1345 ib_info->size = pm4_dw; 1346 if (secure) 1347 ib_info->flags |= AMDGPU_IB_FLAGS_SECURE; 1348 1349 ibs_request->ip_type = ip_type; 1350 ibs_request->ring = instance; 1351 ibs_request->number_of_ibs = 1; 1352 ibs_request->ibs = ib_info; 1353 ibs_request->fence_info.handle = NULL; 1354 1355 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 1356 all_res[res_cnt] = ib_result_handle; 1357 1358 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 1359 NULL, &ibs_request->resources); 1360 CU_ASSERT_EQUAL(r, 0); 1361 1362 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1363 1364 /* submit CS */ 1365 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 1366 CU_ASSERT_EQUAL(r, 0); 1367 1368 r = amdgpu_bo_list_destroy(ibs_request->resources); 1369 CU_ASSERT_EQUAL(r, 0); 1370 1371 fence_status.ip_type = ip_type; 1372 fence_status.ip_instance = 0; 1373 fence_status.ring = ibs_request->ring; 1374 fence_status.context = context_handle; 1375 fence_status.fence = ibs_request->seq_no; 1376 1377 /* wait for IB accomplished */ 1378 r = amdgpu_cs_query_fence_status(&fence_status, 1379 AMDGPU_TIMEOUT_INFINITE, 1380 0, &expired); 1381 CU_ASSERT_EQUAL(r, 0); 1382 CU_ASSERT_EQUAL(expired, true); 1383 1384 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1385 ib_result_mc_address, 4096); 1386 CU_ASSERT_EQUAL(r, 0); 1387} 1388 1389static void 1390amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 1391 unsigned ip_type, int instance, int pm4_dw, 1392 uint32_t *pm4_src, int res_cnt, 1393 amdgpu_bo_handle *resources, 1394 struct amdgpu_cs_ib_info *ib_info, 1395 struct amdgpu_cs_request *ibs_request) 1396{ 1397 amdgpu_test_exec_cs_helper_raw(device_handle, context_handle, 1398 ip_type, instance, pm4_dw, pm4_src, 1399 res_cnt, resources, ib_info, 1400 ibs_request, false); 1401} 1402 1403void 1404amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle 1405 device, unsigned 1406 ip_type, bool secure) 1407{ 1408 const int sdma_write_length = 128; 1409 const int pm4_dw = 256; 1410 amdgpu_context_handle context_handle; 1411 amdgpu_bo_handle bo; 1412 amdgpu_bo_handle *resources; 1413 uint32_t *pm4; 1414 struct amdgpu_cs_ib_info *ib_info; 1415 struct amdgpu_cs_request *ibs_request; 1416 uint64_t bo_mc; 1417 volatile uint32_t *bo_cpu; 1418 uint32_t bo_cpu_origin; 1419 int i, j, r, loop, ring_id; 1420 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1421 amdgpu_va_handle va_handle; 1422 struct drm_amdgpu_info_hw_ip hw_ip_info; 1423 1424 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1425 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1426 1427 ib_info = calloc(1, sizeof(*ib_info)); 1428 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1429 1430 ibs_request = calloc(1, sizeof(*ibs_request)); 1431 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1432 1433 r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info); 1434 CU_ASSERT_EQUAL(r, 0); 1435 1436 for (i = 0; secure && (i < 2); i++) 1437 gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED; 1438 1439 r = amdgpu_cs_ctx_create(device, &context_handle); 1440 1441 CU_ASSERT_EQUAL(r, 0); 1442 1443 /* prepare resource */ 1444 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1445 CU_ASSERT_NOT_EQUAL(resources, NULL); 1446 1447 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1448 loop = 0; 1449 while(loop < 2) { 1450 /* allocate UC bo for sDMA use */ 1451 r = amdgpu_bo_alloc_and_map(device, 1452 sdma_write_length * sizeof(uint32_t), 1453 4096, AMDGPU_GEM_DOMAIN_GTT, 1454 gtt_flags[loop], &bo, (void**)&bo_cpu, 1455 &bo_mc, &va_handle); 1456 CU_ASSERT_EQUAL(r, 0); 1457 1458 /* clear bo */ 1459 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 1460 1461 resources[0] = bo; 1462 1463 /* fulfill PM4: test DMA write-linear */ 1464 i = j = 0; 1465 if (ip_type == AMDGPU_HW_IP_DMA) { 1466 if (family_id == AMDGPU_FAMILY_SI) 1467 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1468 sdma_write_length); 1469 else 1470 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1471 SDMA_WRITE_SUB_OPCODE_LINEAR, 1472 secure ? SDMA_ATOMIC_TMZ(1) : 0); 1473 pm4[i++] = 0xfffffffc & bo_mc; 1474 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1475 if (family_id >= AMDGPU_FAMILY_AI) 1476 pm4[i++] = sdma_write_length - 1; 1477 else if (family_id != AMDGPU_FAMILY_SI) 1478 pm4[i++] = sdma_write_length; 1479 while(j++ < sdma_write_length) 1480 pm4[i++] = 0xdeadbeaf; 1481 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1482 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1483 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 1484 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1485 pm4[i++] = 0xfffffffc & bo_mc; 1486 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1487 while(j++ < sdma_write_length) 1488 pm4[i++] = 0xdeadbeaf; 1489 } 1490 1491 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1492 ip_type, ring_id, i, pm4, 1493 1, resources, ib_info, 1494 ibs_request, secure); 1495 1496 /* verify if SDMA test result meets with expected */ 1497 i = 0; 1498 if (!secure) { 1499 while(i < sdma_write_length) { 1500 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1501 } 1502 } else if (ip_type == AMDGPU_HW_IP_GFX) { 1503 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 1504 pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7); 1505 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 1506 * command, 1-loop_until_compare_satisfied. 1507 * single_pass_atomic, 0-lru 1508 * engine_sel, 0-micro_engine 1509 */ 1510 pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 | 1511 ATOMIC_MEM_COMMAND(1) | 1512 ATOMIC_MEM_CACHEPOLICAY(0) | 1513 ATOMIC_MEM_ENGINESEL(0)); 1514 pm4[i++] = 0xfffffffc & bo_mc; 1515 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1516 pm4[i++] = 0x12345678; 1517 pm4[i++] = 0x0; 1518 pm4[i++] = 0xdeadbeaf; 1519 pm4[i++] = 0x0; 1520 pm4[i++] = 0x100; 1521 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1522 ip_type, ring_id, i, pm4, 1523 1, resources, ib_info, 1524 ibs_request, true); 1525 } else if (ip_type == AMDGPU_HW_IP_DMA) { 1526 /* restore the bo_cpu to compare */ 1527 bo_cpu_origin = bo_cpu[0]; 1528 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 1529 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 1530 * loop, 1-loop_until_compare_satisfied. 1531 * single_pass_atomic, 0-lru 1532 */ 1533 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 1534 0, 1535 SDMA_ATOMIC_LOOP(1) | 1536 SDMA_ATOMIC_TMZ(1) | 1537 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 1538 pm4[i++] = 0xfffffffc & bo_mc; 1539 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1540 pm4[i++] = 0x12345678; 1541 pm4[i++] = 0x0; 1542 pm4[i++] = 0xdeadbeaf; 1543 pm4[i++] = 0x0; 1544 pm4[i++] = 0x100; 1545 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1546 ip_type, ring_id, i, pm4, 1547 1, resources, ib_info, 1548 ibs_request, true); 1549 /* DMA's atomic behavir is unlike GFX 1550 * If the comparing data is not equal to destination data, 1551 * For GFX, loop again till gfx timeout(system hang). 1552 * For DMA, loop again till timer expired and then send interrupt. 1553 * So testcase can't use interrupt mechanism. 1554 * We take another way to verify. When the comparing data is not 1555 * equal to destination data, overwrite the source data to the destination 1556 * buffer. Otherwise, original destination data unchanged. 1557 * So if the bo_cpu data is overwritten, the result is passed. 1558 */ 1559 CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin); 1560 1561 /* compare again for the case of dest_data != cmp_data */ 1562 i = 0; 1563 /* restore again, here dest_data should be */ 1564 bo_cpu_origin = bo_cpu[0]; 1565 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 1566 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 1567 0, 1568 SDMA_ATOMIC_LOOP(1) | 1569 SDMA_ATOMIC_TMZ(1) | 1570 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 1571 pm4[i++] = 0xfffffffc & bo_mc; 1572 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1573 pm4[i++] = 0x87654321; 1574 pm4[i++] = 0x0; 1575 pm4[i++] = 0xdeadbeaf; 1576 pm4[i++] = 0x0; 1577 pm4[i++] = 0x100; 1578 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1579 ip_type, ring_id, i, pm4, 1580 1, resources, ib_info, 1581 ibs_request, true); 1582 /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/ 1583 CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin); 1584 } 1585 1586 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1587 sdma_write_length * sizeof(uint32_t)); 1588 CU_ASSERT_EQUAL(r, 0); 1589 loop++; 1590 } 1591 } 1592 /* clean resources */ 1593 free(resources); 1594 free(ibs_request); 1595 free(ib_info); 1596 free(pm4); 1597 1598 /* end of test */ 1599 r = amdgpu_cs_ctx_free(context_handle); 1600 CU_ASSERT_EQUAL(r, 0); 1601} 1602 1603static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 1604{ 1605 amdgpu_command_submission_write_linear_helper_with_secure(device_handle, 1606 ip_type, 1607 false); 1608} 1609 1610static void amdgpu_command_submission_sdma_write_linear(void) 1611{ 1612 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 1613} 1614 1615static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 1616{ 1617 const int sdma_write_length = 1024 * 1024; 1618 const int pm4_dw = 256; 1619 amdgpu_context_handle context_handle; 1620 amdgpu_bo_handle bo; 1621 amdgpu_bo_handle *resources; 1622 uint32_t *pm4; 1623 struct amdgpu_cs_ib_info *ib_info; 1624 struct amdgpu_cs_request *ibs_request; 1625 uint64_t bo_mc; 1626 volatile uint32_t *bo_cpu; 1627 int i, j, r, loop, ring_id; 1628 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1629 amdgpu_va_handle va_handle; 1630 struct drm_amdgpu_info_hw_ip hw_ip_info; 1631 1632 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1633 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1634 1635 ib_info = calloc(1, sizeof(*ib_info)); 1636 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1637 1638 ibs_request = calloc(1, sizeof(*ibs_request)); 1639 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1640 1641 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1642 CU_ASSERT_EQUAL(r, 0); 1643 1644 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1645 CU_ASSERT_EQUAL(r, 0); 1646 1647 /* prepare resource */ 1648 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1649 CU_ASSERT_NOT_EQUAL(resources, NULL); 1650 1651 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1652 loop = 0; 1653 while(loop < 2) { 1654 /* allocate UC bo for sDMA use */ 1655 r = amdgpu_bo_alloc_and_map(device_handle, 1656 sdma_write_length, 4096, 1657 AMDGPU_GEM_DOMAIN_GTT, 1658 gtt_flags[loop], &bo, (void**)&bo_cpu, 1659 &bo_mc, &va_handle); 1660 CU_ASSERT_EQUAL(r, 0); 1661 1662 /* clear bo */ 1663 memset((void*)bo_cpu, 0, sdma_write_length); 1664 1665 resources[0] = bo; 1666 1667 /* fulfill PM4: test DMA const fill */ 1668 i = j = 0; 1669 if (ip_type == AMDGPU_HW_IP_DMA) { 1670 if (family_id == AMDGPU_FAMILY_SI) { 1671 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 1672 0, 0, 0, 1673 sdma_write_length / 4); 1674 pm4[i++] = 0xfffffffc & bo_mc; 1675 pm4[i++] = 0xdeadbeaf; 1676 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 1677 } else { 1678 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 1679 SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 1680 pm4[i++] = 0xffffffff & bo_mc; 1681 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1682 pm4[i++] = 0xdeadbeaf; 1683 if (family_id >= AMDGPU_FAMILY_AI) 1684 pm4[i++] = sdma_write_length - 1; 1685 else 1686 pm4[i++] = sdma_write_length; 1687 } 1688 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1689 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1690 if (family_id == AMDGPU_FAMILY_SI) { 1691 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1692 pm4[i++] = 0xdeadbeaf; 1693 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1694 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1695 PACKET3_DMA_DATA_SI_SRC_SEL(2) | 1696 PACKET3_DMA_DATA_SI_CP_SYNC; 1697 pm4[i++] = 0xffffffff & bo_mc; 1698 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1699 pm4[i++] = sdma_write_length; 1700 } else { 1701 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1702 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1703 PACKET3_DMA_DATA_DST_SEL(0) | 1704 PACKET3_DMA_DATA_SRC_SEL(2) | 1705 PACKET3_DMA_DATA_CP_SYNC; 1706 pm4[i++] = 0xdeadbeaf; 1707 pm4[i++] = 0; 1708 pm4[i++] = 0xfffffffc & bo_mc; 1709 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1710 pm4[i++] = sdma_write_length; 1711 } 1712 } 1713 1714 amdgpu_test_exec_cs_helper(context_handle, 1715 ip_type, ring_id, 1716 i, pm4, 1717 1, resources, 1718 ib_info, ibs_request); 1719 1720 /* verify if SDMA test result meets with expected */ 1721 i = 0; 1722 while(i < (sdma_write_length / 4)) { 1723 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1724 } 1725 1726 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1727 sdma_write_length); 1728 CU_ASSERT_EQUAL(r, 0); 1729 loop++; 1730 } 1731 } 1732 /* clean resources */ 1733 free(resources); 1734 free(ibs_request); 1735 free(ib_info); 1736 free(pm4); 1737 1738 /* end of test */ 1739 r = amdgpu_cs_ctx_free(context_handle); 1740 CU_ASSERT_EQUAL(r, 0); 1741} 1742 1743static void amdgpu_command_submission_sdma_const_fill(void) 1744{ 1745 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 1746} 1747 1748static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 1749{ 1750 const int sdma_write_length = 1024; 1751 const int pm4_dw = 256; 1752 amdgpu_context_handle context_handle; 1753 amdgpu_bo_handle bo1, bo2; 1754 amdgpu_bo_handle *resources; 1755 uint32_t *pm4; 1756 struct amdgpu_cs_ib_info *ib_info; 1757 struct amdgpu_cs_request *ibs_request; 1758 uint64_t bo1_mc, bo2_mc; 1759 volatile unsigned char *bo1_cpu, *bo2_cpu; 1760 int i, j, r, loop1, loop2, ring_id; 1761 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1762 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 1763 struct drm_amdgpu_info_hw_ip hw_ip_info; 1764 1765 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1766 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1767 1768 ib_info = calloc(1, sizeof(*ib_info)); 1769 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1770 1771 ibs_request = calloc(1, sizeof(*ibs_request)); 1772 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1773 1774 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1775 CU_ASSERT_EQUAL(r, 0); 1776 1777 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1778 CU_ASSERT_EQUAL(r, 0); 1779 1780 /* prepare resource */ 1781 resources = calloc(2, sizeof(amdgpu_bo_handle)); 1782 CU_ASSERT_NOT_EQUAL(resources, NULL); 1783 1784 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1785 loop1 = loop2 = 0; 1786 /* run 9 circle to test all mapping combination */ 1787 while(loop1 < 2) { 1788 while(loop2 < 2) { 1789 /* allocate UC bo1for sDMA use */ 1790 r = amdgpu_bo_alloc_and_map(device_handle, 1791 sdma_write_length, 4096, 1792 AMDGPU_GEM_DOMAIN_GTT, 1793 gtt_flags[loop1], &bo1, 1794 (void**)&bo1_cpu, &bo1_mc, 1795 &bo1_va_handle); 1796 CU_ASSERT_EQUAL(r, 0); 1797 1798 /* set bo1 */ 1799 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 1800 1801 /* allocate UC bo2 for sDMA use */ 1802 r = amdgpu_bo_alloc_and_map(device_handle, 1803 sdma_write_length, 4096, 1804 AMDGPU_GEM_DOMAIN_GTT, 1805 gtt_flags[loop2], &bo2, 1806 (void**)&bo2_cpu, &bo2_mc, 1807 &bo2_va_handle); 1808 CU_ASSERT_EQUAL(r, 0); 1809 1810 /* clear bo2 */ 1811 memset((void*)bo2_cpu, 0, sdma_write_length); 1812 1813 resources[0] = bo1; 1814 resources[1] = bo2; 1815 1816 /* fulfill PM4: test DMA copy linear */ 1817 i = j = 0; 1818 if (ip_type == AMDGPU_HW_IP_DMA) { 1819 if (family_id == AMDGPU_FAMILY_SI) { 1820 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 1821 0, 0, 0, 1822 sdma_write_length); 1823 pm4[i++] = 0xffffffff & bo2_mc; 1824 pm4[i++] = 0xffffffff & bo1_mc; 1825 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1826 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1827 } else { 1828 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 1829 SDMA_COPY_SUB_OPCODE_LINEAR, 1830 0); 1831 if (family_id >= AMDGPU_FAMILY_AI) 1832 pm4[i++] = sdma_write_length - 1; 1833 else 1834 pm4[i++] = sdma_write_length; 1835 pm4[i++] = 0; 1836 pm4[i++] = 0xffffffff & bo1_mc; 1837 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1838 pm4[i++] = 0xffffffff & bo2_mc; 1839 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1840 } 1841 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1842 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1843 if (family_id == AMDGPU_FAMILY_SI) { 1844 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1845 pm4[i++] = 0xfffffffc & bo1_mc; 1846 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1847 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1848 PACKET3_DMA_DATA_SI_SRC_SEL(0) | 1849 PACKET3_DMA_DATA_SI_CP_SYNC | 1850 (0xffff00000000 & bo1_mc) >> 32; 1851 pm4[i++] = 0xfffffffc & bo2_mc; 1852 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1853 pm4[i++] = sdma_write_length; 1854 } else { 1855 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1856 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1857 PACKET3_DMA_DATA_DST_SEL(0) | 1858 PACKET3_DMA_DATA_SRC_SEL(0) | 1859 PACKET3_DMA_DATA_CP_SYNC; 1860 pm4[i++] = 0xfffffffc & bo1_mc; 1861 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1862 pm4[i++] = 0xfffffffc & bo2_mc; 1863 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1864 pm4[i++] = sdma_write_length; 1865 } 1866 } 1867 1868 amdgpu_test_exec_cs_helper(context_handle, 1869 ip_type, ring_id, 1870 i, pm4, 1871 2, resources, 1872 ib_info, ibs_request); 1873 1874 /* verify if SDMA test result meets with expected */ 1875 i = 0; 1876 while(i < sdma_write_length) { 1877 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 1878 } 1879 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 1880 sdma_write_length); 1881 CU_ASSERT_EQUAL(r, 0); 1882 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 1883 sdma_write_length); 1884 CU_ASSERT_EQUAL(r, 0); 1885 loop2++; 1886 } 1887 loop1++; 1888 } 1889 } 1890 /* clean resources */ 1891 free(resources); 1892 free(ibs_request); 1893 free(ib_info); 1894 free(pm4); 1895 1896 /* end of test */ 1897 r = amdgpu_cs_ctx_free(context_handle); 1898 CU_ASSERT_EQUAL(r, 0); 1899} 1900 1901static void amdgpu_command_submission_sdma_copy_linear(void) 1902{ 1903 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 1904} 1905 1906static void amdgpu_command_submission_sdma(void) 1907{ 1908 amdgpu_command_submission_sdma_write_linear(); 1909 amdgpu_command_submission_sdma_const_fill(); 1910 amdgpu_command_submission_sdma_copy_linear(); 1911} 1912 1913static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1914{ 1915 amdgpu_context_handle context_handle; 1916 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1917 void *ib_result_cpu, *ib_result_ce_cpu; 1918 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1919 struct amdgpu_cs_request ibs_request[2] = {0}; 1920 struct amdgpu_cs_ib_info ib_info[2]; 1921 struct amdgpu_cs_fence fence_status[2] = {0}; 1922 uint32_t *ptr; 1923 uint32_t expired; 1924 amdgpu_bo_list_handle bo_list; 1925 amdgpu_va_handle va_handle, va_handle_ce; 1926 int r; 1927 int i = 0, ib_cs_num = 2; 1928 1929 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1930 CU_ASSERT_EQUAL(r, 0); 1931 1932 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1933 AMDGPU_GEM_DOMAIN_GTT, 0, 1934 &ib_result_handle, &ib_result_cpu, 1935 &ib_result_mc_address, &va_handle); 1936 CU_ASSERT_EQUAL(r, 0); 1937 1938 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1939 AMDGPU_GEM_DOMAIN_GTT, 0, 1940 &ib_result_ce_handle, &ib_result_ce_cpu, 1941 &ib_result_ce_mc_address, &va_handle_ce); 1942 CU_ASSERT_EQUAL(r, 0); 1943 1944 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1945 ib_result_ce_handle, &bo_list); 1946 CU_ASSERT_EQUAL(r, 0); 1947 1948 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1949 1950 /* IT_SET_CE_DE_COUNTERS */ 1951 ptr = ib_result_ce_cpu; 1952 if (family_id != AMDGPU_FAMILY_SI) { 1953 ptr[i++] = 0xc0008900; 1954 ptr[i++] = 0; 1955 } 1956 ptr[i++] = 0xc0008400; 1957 ptr[i++] = 1; 1958 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1959 ib_info[0].size = i; 1960 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1961 1962 /* IT_WAIT_ON_CE_COUNTER */ 1963 ptr = ib_result_cpu; 1964 ptr[0] = 0xc0008600; 1965 ptr[1] = 0x00000001; 1966 ib_info[1].ib_mc_address = ib_result_mc_address; 1967 ib_info[1].size = 2; 1968 1969 for (i = 0; i < ib_cs_num; i++) { 1970 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1971 ibs_request[i].number_of_ibs = 2; 1972 ibs_request[i].ibs = ib_info; 1973 ibs_request[i].resources = bo_list; 1974 ibs_request[i].fence_info.handle = NULL; 1975 } 1976 1977 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1978 1979 CU_ASSERT_EQUAL(r, 0); 1980 1981 for (i = 0; i < ib_cs_num; i++) { 1982 fence_status[i].context = context_handle; 1983 fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1984 fence_status[i].fence = ibs_request[i].seq_no; 1985 } 1986 1987 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1988 AMDGPU_TIMEOUT_INFINITE, 1989 &expired, NULL); 1990 CU_ASSERT_EQUAL(r, 0); 1991 1992 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1993 ib_result_mc_address, 4096); 1994 CU_ASSERT_EQUAL(r, 0); 1995 1996 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 1997 ib_result_ce_mc_address, 4096); 1998 CU_ASSERT_EQUAL(r, 0); 1999 2000 r = amdgpu_bo_list_destroy(bo_list); 2001 CU_ASSERT_EQUAL(r, 0); 2002 2003 r = amdgpu_cs_ctx_free(context_handle); 2004 CU_ASSERT_EQUAL(r, 0); 2005} 2006 2007static void amdgpu_command_submission_multi_fence(void) 2008{ 2009 amdgpu_command_submission_multi_fence_wait_all(true); 2010 amdgpu_command_submission_multi_fence_wait_all(false); 2011} 2012 2013static void amdgpu_userptr_test(void) 2014{ 2015 int i, r, j; 2016 uint32_t *pm4 = NULL; 2017 uint64_t bo_mc; 2018 void *ptr = NULL; 2019 int pm4_dw = 256; 2020 int sdma_write_length = 4; 2021 amdgpu_bo_handle handle; 2022 amdgpu_context_handle context_handle; 2023 struct amdgpu_cs_ib_info *ib_info; 2024 struct amdgpu_cs_request *ibs_request; 2025 amdgpu_bo_handle buf_handle; 2026 amdgpu_va_handle va_handle; 2027 2028 pm4 = calloc(pm4_dw, sizeof(*pm4)); 2029 CU_ASSERT_NOT_EQUAL(pm4, NULL); 2030 2031 ib_info = calloc(1, sizeof(*ib_info)); 2032 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 2033 2034 ibs_request = calloc(1, sizeof(*ibs_request)); 2035 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 2036 2037 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2038 CU_ASSERT_EQUAL(r, 0); 2039 2040 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 2041 CU_ASSERT_NOT_EQUAL(ptr, NULL); 2042 memset(ptr, 0, BUFFER_SIZE); 2043 2044 r = amdgpu_create_bo_from_user_mem(device_handle, 2045 ptr, BUFFER_SIZE, &buf_handle); 2046 CU_ASSERT_EQUAL(r, 0); 2047 2048 r = amdgpu_va_range_alloc(device_handle, 2049 amdgpu_gpu_va_range_general, 2050 BUFFER_SIZE, 1, 0, &bo_mc, 2051 &va_handle, 0); 2052 CU_ASSERT_EQUAL(r, 0); 2053 2054 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 2055 CU_ASSERT_EQUAL(r, 0); 2056 2057 handle = buf_handle; 2058 2059 j = i = 0; 2060 2061 if (family_id == AMDGPU_FAMILY_SI) 2062 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 2063 sdma_write_length); 2064 else 2065 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 2066 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 2067 pm4[i++] = 0xffffffff & bo_mc; 2068 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 2069 if (family_id >= AMDGPU_FAMILY_AI) 2070 pm4[i++] = sdma_write_length - 1; 2071 else if (family_id != AMDGPU_FAMILY_SI) 2072 pm4[i++] = sdma_write_length; 2073 2074 while (j++ < sdma_write_length) 2075 pm4[i++] = 0xdeadbeaf; 2076 2077 if (!fork()) { 2078 pm4[0] = 0x0; 2079 exit(0); 2080 } 2081 2082 amdgpu_test_exec_cs_helper(context_handle, 2083 AMDGPU_HW_IP_DMA, 0, 2084 i, pm4, 2085 1, &handle, 2086 ib_info, ibs_request); 2087 i = 0; 2088 while (i < sdma_write_length) { 2089 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 2090 } 2091 free(ibs_request); 2092 free(ib_info); 2093 free(pm4); 2094 2095 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 2096 CU_ASSERT_EQUAL(r, 0); 2097 r = amdgpu_va_range_free(va_handle); 2098 CU_ASSERT_EQUAL(r, 0); 2099 r = amdgpu_bo_free(buf_handle); 2100 CU_ASSERT_EQUAL(r, 0); 2101 free(ptr); 2102 2103 r = amdgpu_cs_ctx_free(context_handle); 2104 CU_ASSERT_EQUAL(r, 0); 2105 2106 wait(NULL); 2107} 2108 2109static void amdgpu_sync_dependency_test(void) 2110{ 2111 amdgpu_context_handle context_handle[2]; 2112 amdgpu_bo_handle ib_result_handle; 2113 void *ib_result_cpu; 2114 uint64_t ib_result_mc_address; 2115 struct amdgpu_cs_request ibs_request; 2116 struct amdgpu_cs_ib_info ib_info; 2117 struct amdgpu_cs_fence fence_status; 2118 uint32_t expired; 2119 int i, j, r; 2120 amdgpu_bo_list_handle bo_list; 2121 amdgpu_va_handle va_handle; 2122 static uint32_t *ptr; 2123 uint64_t seq_no; 2124 2125 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 2126 CU_ASSERT_EQUAL(r, 0); 2127 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 2128 CU_ASSERT_EQUAL(r, 0); 2129 2130 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 2131 AMDGPU_GEM_DOMAIN_GTT, 0, 2132 &ib_result_handle, &ib_result_cpu, 2133 &ib_result_mc_address, &va_handle); 2134 CU_ASSERT_EQUAL(r, 0); 2135 2136 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 2137 &bo_list); 2138 CU_ASSERT_EQUAL(r, 0); 2139 2140 ptr = ib_result_cpu; 2141 i = 0; 2142 2143 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 2144 2145 /* Dispatch minimal init config and verify it's executed */ 2146 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 2147 ptr[i++] = 0x80000000; 2148 ptr[i++] = 0x80000000; 2149 2150 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 2151 ptr[i++] = 0x80000000; 2152 2153 2154 /* Program compute regs */ 2155 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2156 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 2157 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 2158 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 2159 2160 2161 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2162 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 2163 /* 2164 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 2165 SGPRS = 1 2166 PRIORITY = 0 2167 FLOAT_MODE = 192 (0xc0) 2168 PRIV = 0 2169 DX10_CLAMP = 1 2170 DEBUG_MODE = 0 2171 IEEE_MODE = 0 2172 BULKY = 0 2173 CDBG_USER = 0 2174 * 2175 */ 2176 ptr[i++] = 0x002c0040; 2177 2178 2179 /* 2180 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 2181 USER_SGPR = 8 2182 TRAP_PRESENT = 0 2183 TGID_X_EN = 0 2184 TGID_Y_EN = 0 2185 TGID_Z_EN = 0 2186 TG_SIZE_EN = 0 2187 TIDIG_COMP_CNT = 0 2188 EXCP_EN_MSB = 0 2189 LDS_SIZE = 0 2190 EXCP_EN = 0 2191 * 2192 */ 2193 ptr[i++] = 0x00000010; 2194 2195 2196/* 2197 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 2198 WAVESIZE = 0 2199 * 2200 */ 2201 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2202 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 2203 ptr[i++] = 0x00000100; 2204 2205 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2206 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 2207 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 2208 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 2209 2210 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2211 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 2212 ptr[i++] = 0; 2213 2214 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 2215 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 2216 ptr[i++] = 1; 2217 ptr[i++] = 1; 2218 ptr[i++] = 1; 2219 2220 2221 /* Dispatch */ 2222 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 2223 ptr[i++] = 1; 2224 ptr[i++] = 1; 2225 ptr[i++] = 1; 2226 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 2227 2228 2229 while (i & 7) 2230 ptr[i++] = 0xffff1000; /* type3 nop packet */ 2231 2232 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 2233 ib_info.ib_mc_address = ib_result_mc_address; 2234 ib_info.size = i; 2235 2236 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 2237 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 2238 ibs_request.ring = 0; 2239 ibs_request.number_of_ibs = 1; 2240 ibs_request.ibs = &ib_info; 2241 ibs_request.resources = bo_list; 2242 ibs_request.fence_info.handle = NULL; 2243 2244 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 2245 CU_ASSERT_EQUAL(r, 0); 2246 seq_no = ibs_request.seq_no; 2247 2248 2249 2250 /* Prepare second command with dependency on the first */ 2251 j = i; 2252 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 2253 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 2254 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 2255 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 2256 ptr[i++] = 99; 2257 2258 while (i & 7) 2259 ptr[i++] = 0xffff1000; /* type3 nop packet */ 2260 2261 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 2262 ib_info.ib_mc_address = ib_result_mc_address + j * 4; 2263 ib_info.size = i - j; 2264 2265 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 2266 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 2267 ibs_request.ring = 0; 2268 ibs_request.number_of_ibs = 1; 2269 ibs_request.ibs = &ib_info; 2270 ibs_request.resources = bo_list; 2271 ibs_request.fence_info.handle = NULL; 2272 2273 ibs_request.number_of_dependencies = 1; 2274 2275 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 2276 ibs_request.dependencies[0].context = context_handle[1]; 2277 ibs_request.dependencies[0].ip_instance = 0; 2278 ibs_request.dependencies[0].ring = 0; 2279 ibs_request.dependencies[0].fence = seq_no; 2280 2281 2282 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 2283 CU_ASSERT_EQUAL(r, 0); 2284 2285 2286 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 2287 fence_status.context = context_handle[0]; 2288 fence_status.ip_type = AMDGPU_HW_IP_GFX; 2289 fence_status.ip_instance = 0; 2290 fence_status.ring = 0; 2291 fence_status.fence = ibs_request.seq_no; 2292 2293 r = amdgpu_cs_query_fence_status(&fence_status, 2294 AMDGPU_TIMEOUT_INFINITE,0, &expired); 2295 CU_ASSERT_EQUAL(r, 0); 2296 2297 /* Expect the second command to wait for shader to complete */ 2298 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 2299 2300 r = amdgpu_bo_list_destroy(bo_list); 2301 CU_ASSERT_EQUAL(r, 0); 2302 2303 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 2304 ib_result_mc_address, 4096); 2305 CU_ASSERT_EQUAL(r, 0); 2306 2307 r = amdgpu_cs_ctx_free(context_handle[0]); 2308 CU_ASSERT_EQUAL(r, 0); 2309 r = amdgpu_cs_ctx_free(context_handle[1]); 2310 CU_ASSERT_EQUAL(r, 0); 2311 2312 free(ibs_request.dependencies); 2313} 2314 2315static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family) 2316{ 2317 struct amdgpu_test_shader *shader; 2318 int i, loop = 0x10000; 2319 2320 switch (family) { 2321 case AMDGPU_FAMILY_AI: 2322 shader = &memcpy_cs_hang_slow_ai; 2323 break; 2324 case AMDGPU_FAMILY_RV: 2325 shader = &memcpy_cs_hang_slow_rv; 2326 break; 2327 default: 2328 return -1; 2329 break; 2330 } 2331 2332 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 2333 2334 for (i = 0; i < loop; i++) 2335 memcpy(ptr + shader->header_length + shader->body_length * i, 2336 shader->shader + shader->header_length, 2337 shader->body_length * sizeof(uint32_t)); 2338 2339 memcpy(ptr + shader->header_length + shader->body_length * loop, 2340 shader->shader + shader->header_length + shader->body_length, 2341 shader->foot_length * sizeof(uint32_t)); 2342 2343 return 0; 2344} 2345 2346static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, 2347 int cs_type) 2348{ 2349 uint32_t shader_size; 2350 const uint32_t *shader; 2351 2352 switch (cs_type) { 2353 case CS_BUFFERCLEAR: 2354 shader = bufferclear_cs_shader_gfx9; 2355 shader_size = sizeof(bufferclear_cs_shader_gfx9); 2356 break; 2357 case CS_BUFFERCOPY: 2358 shader = buffercopy_cs_shader_gfx9; 2359 shader_size = sizeof(buffercopy_cs_shader_gfx9); 2360 break; 2361 case CS_HANG: 2362 shader = memcpy_ps_hang; 2363 shader_size = sizeof(memcpy_ps_hang); 2364 break; 2365 default: 2366 return -1; 2367 break; 2368 } 2369 2370 memcpy(ptr, shader, shader_size); 2371 return 0; 2372} 2373 2374static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type) 2375{ 2376 int i = 0; 2377 2378 /* Write context control and load shadowing register if necessary */ 2379 if (ip_type == AMDGPU_HW_IP_GFX) { 2380 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 2381 ptr[i++] = 0x80000000; 2382 ptr[i++] = 0x80000000; 2383 } 2384 2385 /* Issue commands to set default compute state. */ 2386 /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ 2387 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3); 2388 ptr[i++] = 0x204; 2389 i += 3; 2390 2391 /* clear mmCOMPUTE_TMPRING_SIZE */ 2392 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2393 ptr[i++] = 0x218; 2394 ptr[i++] = 0; 2395 2396 return i; 2397} 2398 2399static int amdgpu_dispatch_write_cumask(uint32_t *ptr) 2400{ 2401 int i = 0; 2402 2403 /* Issue commands to set cu mask used in current dispatch */ 2404 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 2405 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2406 ptr[i++] = 0x216; 2407 ptr[i++] = 0xffffffff; 2408 ptr[i++] = 0xffffffff; 2409 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 2410 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2411 ptr[i++] = 0x219; 2412 ptr[i++] = 0xffffffff; 2413 ptr[i++] = 0xffffffff; 2414 2415 return i; 2416} 2417 2418static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr) 2419{ 2420 int i, j; 2421 2422 i = 0; 2423 2424 /* Writes shader state to HW */ 2425 /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ 2426 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2427 ptr[i++] = 0x20c; 2428 ptr[i++] = (shader_addr >> 8); 2429 ptr[i++] = (shader_addr >> 40); 2430 /* write sh regs*/ 2431 for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) { 2432 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2433 /* - Gfx9ShRegBase */ 2434 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00; 2435 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; 2436 } 2437 2438 return i; 2439} 2440 2441static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, 2442 uint32_t ip_type, 2443 uint32_t ring) 2444{ 2445 amdgpu_context_handle context_handle; 2446 amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; 2447 volatile unsigned char *ptr_dst; 2448 void *ptr_shader; 2449 uint32_t *ptr_cmd; 2450 uint64_t mc_address_dst, mc_address_shader, mc_address_cmd; 2451 amdgpu_va_handle va_dst, va_shader, va_cmd; 2452 int i, r; 2453 int bo_dst_size = 16384; 2454 int bo_shader_size = 4096; 2455 int bo_cmd_size = 4096; 2456 struct amdgpu_cs_request ibs_request = {0}; 2457 struct amdgpu_cs_ib_info ib_info= {0}; 2458 amdgpu_bo_list_handle bo_list; 2459 struct amdgpu_cs_fence fence_status = {0}; 2460 uint32_t expired; 2461 2462 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2463 CU_ASSERT_EQUAL(r, 0); 2464 2465 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2466 AMDGPU_GEM_DOMAIN_GTT, 0, 2467 &bo_cmd, (void **)&ptr_cmd, 2468 &mc_address_cmd, &va_cmd); 2469 CU_ASSERT_EQUAL(r, 0); 2470 memset(ptr_cmd, 0, bo_cmd_size); 2471 2472 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2473 AMDGPU_GEM_DOMAIN_VRAM, 0, 2474 &bo_shader, &ptr_shader, 2475 &mc_address_shader, &va_shader); 2476 CU_ASSERT_EQUAL(r, 0); 2477 memset(ptr_shader, 0, bo_shader_size); 2478 2479 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR); 2480 CU_ASSERT_EQUAL(r, 0); 2481 2482 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2483 AMDGPU_GEM_DOMAIN_VRAM, 0, 2484 &bo_dst, (void **)&ptr_dst, 2485 &mc_address_dst, &va_dst); 2486 CU_ASSERT_EQUAL(r, 0); 2487 2488 i = 0; 2489 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 2490 2491 /* Issue commands to set cu mask used in current dispatch */ 2492 i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 2493 2494 /* Writes shader state to HW */ 2495 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 2496 2497 /* Write constant data */ 2498 /* Writes the UAV constant data to the SGPRs. */ 2499 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2500 ptr_cmd[i++] = 0x240; 2501 ptr_cmd[i++] = mc_address_dst; 2502 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2503 ptr_cmd[i++] = 0x400; 2504 ptr_cmd[i++] = 0x74fac; 2505 2506 /* Sets a range of pixel shader constants */ 2507 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2508 ptr_cmd[i++] = 0x244; 2509 ptr_cmd[i++] = 0x22222222; 2510 ptr_cmd[i++] = 0x22222222; 2511 ptr_cmd[i++] = 0x22222222; 2512 ptr_cmd[i++] = 0x22222222; 2513 2514 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 2515 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2516 ptr_cmd[i++] = 0x215; 2517 ptr_cmd[i++] = 0; 2518 2519 /* dispatch direct command */ 2520 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2521 ptr_cmd[i++] = 0x10; 2522 ptr_cmd[i++] = 1; 2523 ptr_cmd[i++] = 1; 2524 ptr_cmd[i++] = 1; 2525 2526 while (i & 7) 2527 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2528 2529 resources[0] = bo_dst; 2530 resources[1] = bo_shader; 2531 resources[2] = bo_cmd; 2532 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 2533 CU_ASSERT_EQUAL(r, 0); 2534 2535 ib_info.ib_mc_address = mc_address_cmd; 2536 ib_info.size = i; 2537 ibs_request.ip_type = ip_type; 2538 ibs_request.ring = ring; 2539 ibs_request.resources = bo_list; 2540 ibs_request.number_of_ibs = 1; 2541 ibs_request.ibs = &ib_info; 2542 ibs_request.fence_info.handle = NULL; 2543 2544 /* submit CS */ 2545 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2546 CU_ASSERT_EQUAL(r, 0); 2547 2548 r = amdgpu_bo_list_destroy(bo_list); 2549 CU_ASSERT_EQUAL(r, 0); 2550 2551 fence_status.ip_type = ip_type; 2552 fence_status.ip_instance = 0; 2553 fence_status.ring = ring; 2554 fence_status.context = context_handle; 2555 fence_status.fence = ibs_request.seq_no; 2556 2557 /* wait for IB accomplished */ 2558 r = amdgpu_cs_query_fence_status(&fence_status, 2559 AMDGPU_TIMEOUT_INFINITE, 2560 0, &expired); 2561 CU_ASSERT_EQUAL(r, 0); 2562 CU_ASSERT_EQUAL(expired, true); 2563 2564 /* verify if memset test result meets with expected */ 2565 i = 0; 2566 while(i < bo_dst_size) { 2567 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22); 2568 } 2569 2570 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2571 CU_ASSERT_EQUAL(r, 0); 2572 2573 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2574 CU_ASSERT_EQUAL(r, 0); 2575 2576 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2577 CU_ASSERT_EQUAL(r, 0); 2578 2579 r = amdgpu_cs_ctx_free(context_handle); 2580 CU_ASSERT_EQUAL(r, 0); 2581} 2582 2583static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, 2584 uint32_t ip_type, 2585 uint32_t ring, 2586 int hang) 2587{ 2588 amdgpu_context_handle context_handle; 2589 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 2590 volatile unsigned char *ptr_dst; 2591 void *ptr_shader; 2592 unsigned char *ptr_src; 2593 uint32_t *ptr_cmd; 2594 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 2595 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 2596 int i, r; 2597 int bo_dst_size = 16384; 2598 int bo_shader_size = 4096; 2599 int bo_cmd_size = 4096; 2600 struct amdgpu_cs_request ibs_request = {0}; 2601 struct amdgpu_cs_ib_info ib_info= {0}; 2602 uint32_t expired, hang_state, hangs; 2603 enum cs_type cs_type; 2604 amdgpu_bo_list_handle bo_list; 2605 struct amdgpu_cs_fence fence_status = {0}; 2606 2607 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2608 CU_ASSERT_EQUAL(r, 0); 2609 2610 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2611 AMDGPU_GEM_DOMAIN_GTT, 0, 2612 &bo_cmd, (void **)&ptr_cmd, 2613 &mc_address_cmd, &va_cmd); 2614 CU_ASSERT_EQUAL(r, 0); 2615 memset(ptr_cmd, 0, bo_cmd_size); 2616 2617 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2618 AMDGPU_GEM_DOMAIN_VRAM, 0, 2619 &bo_shader, &ptr_shader, 2620 &mc_address_shader, &va_shader); 2621 CU_ASSERT_EQUAL(r, 0); 2622 memset(ptr_shader, 0, bo_shader_size); 2623 2624 cs_type = hang ? CS_HANG : CS_BUFFERCOPY; 2625 r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type); 2626 CU_ASSERT_EQUAL(r, 0); 2627 2628 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2629 AMDGPU_GEM_DOMAIN_VRAM, 0, 2630 &bo_src, (void **)&ptr_src, 2631 &mc_address_src, &va_src); 2632 CU_ASSERT_EQUAL(r, 0); 2633 2634 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2635 AMDGPU_GEM_DOMAIN_VRAM, 0, 2636 &bo_dst, (void **)&ptr_dst, 2637 &mc_address_dst, &va_dst); 2638 CU_ASSERT_EQUAL(r, 0); 2639 2640 memset(ptr_src, 0x55, bo_dst_size); 2641 2642 i = 0; 2643 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 2644 2645 /* Issue commands to set cu mask used in current dispatch */ 2646 i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 2647 2648 /* Writes shader state to HW */ 2649 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 2650 2651 /* Write constant data */ 2652 /* Writes the texture resource constants data to the SGPRs */ 2653 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2654 ptr_cmd[i++] = 0x240; 2655 ptr_cmd[i++] = mc_address_src; 2656 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 2657 ptr_cmd[i++] = 0x400; 2658 ptr_cmd[i++] = 0x74fac; 2659 2660 /* Writes the UAV constant data to the SGPRs. */ 2661 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2662 ptr_cmd[i++] = 0x244; 2663 ptr_cmd[i++] = mc_address_dst; 2664 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2665 ptr_cmd[i++] = 0x400; 2666 ptr_cmd[i++] = 0x74fac; 2667 2668 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 2669 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2670 ptr_cmd[i++] = 0x215; 2671 ptr_cmd[i++] = 0; 2672 2673 /* dispatch direct command */ 2674 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2675 ptr_cmd[i++] = 0x10; 2676 ptr_cmd[i++] = 1; 2677 ptr_cmd[i++] = 1; 2678 ptr_cmd[i++] = 1; 2679 2680 while (i & 7) 2681 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2682 2683 resources[0] = bo_shader; 2684 resources[1] = bo_src; 2685 resources[2] = bo_dst; 2686 resources[3] = bo_cmd; 2687 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 2688 CU_ASSERT_EQUAL(r, 0); 2689 2690 ib_info.ib_mc_address = mc_address_cmd; 2691 ib_info.size = i; 2692 ibs_request.ip_type = ip_type; 2693 ibs_request.ring = ring; 2694 ibs_request.resources = bo_list; 2695 ibs_request.number_of_ibs = 1; 2696 ibs_request.ibs = &ib_info; 2697 ibs_request.fence_info.handle = NULL; 2698 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2699 CU_ASSERT_EQUAL(r, 0); 2700 2701 fence_status.ip_type = ip_type; 2702 fence_status.ip_instance = 0; 2703 fence_status.ring = ring; 2704 fence_status.context = context_handle; 2705 fence_status.fence = ibs_request.seq_no; 2706 2707 /* wait for IB accomplished */ 2708 r = amdgpu_cs_query_fence_status(&fence_status, 2709 AMDGPU_TIMEOUT_INFINITE, 2710 0, &expired); 2711 2712 if (!hang) { 2713 CU_ASSERT_EQUAL(r, 0); 2714 CU_ASSERT_EQUAL(expired, true); 2715 2716 /* verify if memcpy test result meets with expected */ 2717 i = 0; 2718 while(i < bo_dst_size) { 2719 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 2720 i++; 2721 } 2722 } else { 2723 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 2724 CU_ASSERT_EQUAL(r, 0); 2725 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 2726 } 2727 2728 r = amdgpu_bo_list_destroy(bo_list); 2729 CU_ASSERT_EQUAL(r, 0); 2730 2731 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 2732 CU_ASSERT_EQUAL(r, 0); 2733 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2734 CU_ASSERT_EQUAL(r, 0); 2735 2736 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2737 CU_ASSERT_EQUAL(r, 0); 2738 2739 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2740 CU_ASSERT_EQUAL(r, 0); 2741 2742 r = amdgpu_cs_ctx_free(context_handle); 2743 CU_ASSERT_EQUAL(r, 0); 2744} 2745 2746static void amdgpu_compute_dispatch_test(void) 2747{ 2748 int r; 2749 struct drm_amdgpu_info_hw_ip info; 2750 uint32_t ring_id; 2751 2752 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 2753 CU_ASSERT_EQUAL(r, 0); 2754 if (!info.available_rings) 2755 printf("SKIP ... as there's no compute ring\n"); 2756 2757 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2758 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); 2759 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0); 2760 } 2761} 2762 2763static void amdgpu_gfx_dispatch_test(void) 2764{ 2765 int r; 2766 struct drm_amdgpu_info_hw_ip info; 2767 uint32_t ring_id; 2768 2769 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 2770 CU_ASSERT_EQUAL(r, 0); 2771 if (!info.available_rings) 2772 printf("SKIP ... as there's no graphics ring\n"); 2773 2774 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2775 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); 2776 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0); 2777 } 2778} 2779 2780void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 2781{ 2782 int r; 2783 struct drm_amdgpu_info_hw_ip info; 2784 uint32_t ring_id; 2785 2786 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 2787 CU_ASSERT_EQUAL(r, 0); 2788 if (!info.available_rings) 2789 printf("SKIP ... as there's no ring for ip %d\n", ip_type); 2790 2791 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2792 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 2793 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1); 2794 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 2795 } 2796} 2797 2798static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle, 2799 uint32_t ip_type, uint32_t ring) 2800{ 2801 amdgpu_context_handle context_handle; 2802 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 2803 volatile unsigned char *ptr_dst; 2804 void *ptr_shader; 2805 unsigned char *ptr_src; 2806 uint32_t *ptr_cmd; 2807 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 2808 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 2809 int i, r; 2810 int bo_dst_size = 0x4000000; 2811 int bo_shader_size = 0x400000; 2812 int bo_cmd_size = 4096; 2813 struct amdgpu_cs_request ibs_request = {0}; 2814 struct amdgpu_cs_ib_info ib_info= {0}; 2815 uint32_t hang_state, hangs, expired; 2816 struct amdgpu_gpu_info gpu_info = {0}; 2817 amdgpu_bo_list_handle bo_list; 2818 struct amdgpu_cs_fence fence_status = {0}; 2819 2820 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 2821 CU_ASSERT_EQUAL(r, 0); 2822 2823 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2824 CU_ASSERT_EQUAL(r, 0); 2825 2826 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2827 AMDGPU_GEM_DOMAIN_GTT, 0, 2828 &bo_cmd, (void **)&ptr_cmd, 2829 &mc_address_cmd, &va_cmd); 2830 CU_ASSERT_EQUAL(r, 0); 2831 memset(ptr_cmd, 0, bo_cmd_size); 2832 2833 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2834 AMDGPU_GEM_DOMAIN_VRAM, 0, 2835 &bo_shader, &ptr_shader, 2836 &mc_address_shader, &va_shader); 2837 CU_ASSERT_EQUAL(r, 0); 2838 memset(ptr_shader, 0, bo_shader_size); 2839 2840 r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id); 2841 CU_ASSERT_EQUAL(r, 0); 2842 2843 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2844 AMDGPU_GEM_DOMAIN_VRAM, 0, 2845 &bo_src, (void **)&ptr_src, 2846 &mc_address_src, &va_src); 2847 CU_ASSERT_EQUAL(r, 0); 2848 2849 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2850 AMDGPU_GEM_DOMAIN_VRAM, 0, 2851 &bo_dst, (void **)&ptr_dst, 2852 &mc_address_dst, &va_dst); 2853 CU_ASSERT_EQUAL(r, 0); 2854 2855 memset(ptr_src, 0x55, bo_dst_size); 2856 2857 i = 0; 2858 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 2859 2860 /* Issue commands to set cu mask used in current dispatch */ 2861 i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 2862 2863 /* Writes shader state to HW */ 2864 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 2865 2866 /* Write constant data */ 2867 /* Writes the texture resource constants data to the SGPRs */ 2868 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2869 ptr_cmd[i++] = 0x240; 2870 ptr_cmd[i++] = mc_address_src; 2871 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 2872 ptr_cmd[i++] = 0x400000; 2873 ptr_cmd[i++] = 0x74fac; 2874 2875 /* Writes the UAV constant data to the SGPRs. */ 2876 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2877 ptr_cmd[i++] = 0x244; 2878 ptr_cmd[i++] = mc_address_dst; 2879 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2880 ptr_cmd[i++] = 0x400000; 2881 ptr_cmd[i++] = 0x74fac; 2882 2883 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 2884 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2885 ptr_cmd[i++] = 0x215; 2886 ptr_cmd[i++] = 0; 2887 2888 /* dispatch direct command */ 2889 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2890 ptr_cmd[i++] = 0x10000; 2891 ptr_cmd[i++] = 1; 2892 ptr_cmd[i++] = 1; 2893 ptr_cmd[i++] = 1; 2894 2895 while (i & 7) 2896 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2897 2898 resources[0] = bo_shader; 2899 resources[1] = bo_src; 2900 resources[2] = bo_dst; 2901 resources[3] = bo_cmd; 2902 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 2903 CU_ASSERT_EQUAL(r, 0); 2904 2905 ib_info.ib_mc_address = mc_address_cmd; 2906 ib_info.size = i; 2907 ibs_request.ip_type = ip_type; 2908 ibs_request.ring = ring; 2909 ibs_request.resources = bo_list; 2910 ibs_request.number_of_ibs = 1; 2911 ibs_request.ibs = &ib_info; 2912 ibs_request.fence_info.handle = NULL; 2913 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2914 CU_ASSERT_EQUAL(r, 0); 2915 2916 fence_status.ip_type = ip_type; 2917 fence_status.ip_instance = 0; 2918 fence_status.ring = ring; 2919 fence_status.context = context_handle; 2920 fence_status.fence = ibs_request.seq_no; 2921 2922 /* wait for IB accomplished */ 2923 r = amdgpu_cs_query_fence_status(&fence_status, 2924 AMDGPU_TIMEOUT_INFINITE, 2925 0, &expired); 2926 2927 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 2928 CU_ASSERT_EQUAL(r, 0); 2929 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 2930 2931 r = amdgpu_bo_list_destroy(bo_list); 2932 CU_ASSERT_EQUAL(r, 0); 2933 2934 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 2935 CU_ASSERT_EQUAL(r, 0); 2936 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2937 CU_ASSERT_EQUAL(r, 0); 2938 2939 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2940 CU_ASSERT_EQUAL(r, 0); 2941 2942 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2943 CU_ASSERT_EQUAL(r, 0); 2944 2945 r = amdgpu_cs_ctx_free(context_handle); 2946 CU_ASSERT_EQUAL(r, 0); 2947} 2948 2949void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 2950{ 2951 int r; 2952 struct drm_amdgpu_info_hw_ip info; 2953 uint32_t ring_id; 2954 2955 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 2956 CU_ASSERT_EQUAL(r, 0); 2957 if (!info.available_rings) 2958 printf("SKIP ... as there's no ring for ip %d\n", ip_type); 2959 2960 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2961 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 2962 amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id); 2963 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 2964 } 2965} 2966 2967static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family) 2968{ 2969 struct amdgpu_test_shader *shader; 2970 int i, loop = 0x40000; 2971 2972 switch (family) { 2973 case AMDGPU_FAMILY_AI: 2974 case AMDGPU_FAMILY_RV: 2975 shader = &memcpy_ps_hang_slow_ai; 2976 break; 2977 default: 2978 return -1; 2979 break; 2980 } 2981 2982 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 2983 2984 for (i = 0; i < loop; i++) 2985 memcpy(ptr + shader->header_length + shader->body_length * i, 2986 shader->shader + shader->header_length, 2987 shader->body_length * sizeof(uint32_t)); 2988 2989 memcpy(ptr + shader->header_length + shader->body_length * loop, 2990 shader->shader + shader->header_length + shader->body_length, 2991 shader->foot_length * sizeof(uint32_t)); 2992 2993 return 0; 2994} 2995 2996static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type) 2997{ 2998 int i; 2999 uint32_t shader_offset= 256; 3000 uint32_t mem_offset, patch_code_offset; 3001 uint32_t shader_size, patchinfo_code_size; 3002 const uint32_t *shader; 3003 const uint32_t *patchinfo_code; 3004 const uint32_t *patchcode_offset; 3005 3006 switch (ps_type) { 3007 case PS_CONST: 3008 shader = ps_const_shader_gfx9; 3009 shader_size = sizeof(ps_const_shader_gfx9); 3010 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; 3011 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; 3012 patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; 3013 break; 3014 case PS_TEX: 3015 shader = ps_tex_shader_gfx9; 3016 shader_size = sizeof(ps_tex_shader_gfx9); 3017 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; 3018 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; 3019 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; 3020 break; 3021 case PS_HANG: 3022 shader = memcpy_ps_hang; 3023 shader_size = sizeof(memcpy_ps_hang); 3024 3025 memcpy(ptr, shader, shader_size); 3026 return 0; 3027 default: 3028 return -1; 3029 break; 3030 } 3031 3032 /* write main shader program */ 3033 for (i = 0 ; i < 10; i++) { 3034 mem_offset = i * shader_offset; 3035 memcpy(ptr + mem_offset, shader, shader_size); 3036 } 3037 3038 /* overwrite patch codes */ 3039 for (i = 0 ; i < 10; i++) { 3040 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t); 3041 patch_code_offset = i * patchinfo_code_size; 3042 memcpy(ptr + mem_offset, 3043 patchinfo_code + patch_code_offset, 3044 patchinfo_code_size * sizeof(uint32_t)); 3045 } 3046 3047 return 0; 3048} 3049 3050/* load RectPosTexFast_VS */ 3051static int amdgpu_draw_load_vs_shader(uint8_t *ptr) 3052{ 3053 const uint32_t *shader; 3054 uint32_t shader_size; 3055 3056 shader = vs_RectPosTexFast_shader_gfx9; 3057 shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); 3058 3059 memcpy(ptr, shader, shader_size); 3060 3061 return 0; 3062} 3063 3064static int amdgpu_draw_init(uint32_t *ptr) 3065{ 3066 int i = 0; 3067 const uint32_t *preamblecache_ptr; 3068 uint32_t preamblecache_size; 3069 3070 /* Write context control and load shadowing register if necessary */ 3071 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 3072 ptr[i++] = 0x80000000; 3073 ptr[i++] = 0x80000000; 3074 3075 preamblecache_ptr = preamblecache_gfx9; 3076 preamblecache_size = sizeof(preamblecache_gfx9); 3077 3078 memcpy(ptr + i, preamblecache_ptr, preamblecache_size); 3079 return i + preamblecache_size/sizeof(uint32_t); 3080} 3081 3082static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, 3083 uint64_t dst_addr, 3084 int hang_slow) 3085{ 3086 int i = 0; 3087 3088 /* setup color buffer */ 3089 /* offset reg 3090 0xA318 CB_COLOR0_BASE 3091 0xA319 CB_COLOR0_BASE_EXT 3092 0xA31A CB_COLOR0_ATTRIB2 3093 0xA31B CB_COLOR0_VIEW 3094 0xA31C CB_COLOR0_INFO 3095 0xA31D CB_COLOR0_ATTRIB 3096 0xA31E CB_COLOR0_DCC_CONTROL 3097 0xA31F CB_COLOR0_CMASK 3098 0xA320 CB_COLOR0_CMASK_BASE_EXT 3099 0xA321 CB_COLOR0_FMASK 3100 0xA322 CB_COLOR0_FMASK_BASE_EXT 3101 0xA323 CB_COLOR0_CLEAR_WORD0 3102 0xA324 CB_COLOR0_CLEAR_WORD1 3103 0xA325 CB_COLOR0_DCC_BASE 3104 0xA326 CB_COLOR0_DCC_BASE_EXT */ 3105 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); 3106 ptr[i++] = 0x318; 3107 ptr[i++] = dst_addr >> 8; 3108 ptr[i++] = dst_addr >> 40; 3109 ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f; 3110 ptr[i++] = 0; 3111 ptr[i++] = 0x50438; 3112 ptr[i++] = 0x10140000; 3113 i += 9; 3114 3115 /* mmCB_MRT0_EPITCH */ 3116 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3117 ptr[i++] = 0x1e8; 3118 ptr[i++] = hang_slow ? 0x7ff : 0x1f; 3119 3120 /* 0xA32B CB_COLOR1_BASE */ 3121 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3122 ptr[i++] = 0x32b; 3123 ptr[i++] = 0; 3124 3125 /* 0xA33A CB_COLOR1_BASE */ 3126 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3127 ptr[i++] = 0x33a; 3128 ptr[i++] = 0; 3129 3130 /* SPI_SHADER_COL_FORMAT */ 3131 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3132 ptr[i++] = 0x1c5; 3133 ptr[i++] = 9; 3134 3135 /* Setup depth buffer */ 3136 /* mmDB_Z_INFO */ 3137 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 3138 ptr[i++] = 0xe; 3139 i += 2; 3140 3141 return i; 3142} 3143 3144static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow) 3145{ 3146 int i = 0; 3147 const uint32_t *cached_cmd_ptr; 3148 uint32_t cached_cmd_size; 3149 3150 /* mmPA_SC_TILE_STEERING_OVERRIDE */ 3151 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3152 ptr[i++] = 0xd7; 3153 ptr[i++] = 0; 3154 3155 ptr[i++] = 0xffff1000; 3156 ptr[i++] = 0xc0021000; 3157 3158 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3159 ptr[i++] = 0xd7; 3160 ptr[i++] = 1; 3161 3162 /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 3163 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); 3164 ptr[i++] = 0x2fe; 3165 i += 16; 3166 3167 /* mmPA_SC_CENTROID_PRIORITY_0 */ 3168 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 3169 ptr[i++] = 0x2f5; 3170 i += 2; 3171 3172 cached_cmd_ptr = cached_cmd_gfx9; 3173 cached_cmd_size = sizeof(cached_cmd_gfx9); 3174 3175 memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); 3176 if (hang_slow) 3177 *(ptr + i + 12) = 0x8000800; 3178 i += cached_cmd_size/sizeof(uint32_t); 3179 3180 return i; 3181} 3182 3183static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, 3184 int ps_type, 3185 uint64_t shader_addr, 3186 int hang_slow) 3187{ 3188 int i = 0; 3189 3190 /* mmPA_CL_VS_OUT_CNTL */ 3191 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3192 ptr[i++] = 0x207; 3193 ptr[i++] = 0; 3194 3195 /* mmSPI_SHADER_PGM_RSRC3_VS */ 3196 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3197 ptr[i++] = 0x46; 3198 ptr[i++] = 0xffff; 3199 3200 /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ 3201 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 3202 ptr[i++] = 0x48; 3203 ptr[i++] = shader_addr >> 8; 3204 ptr[i++] = shader_addr >> 40; 3205 3206 /* mmSPI_SHADER_PGM_RSRC1_VS */ 3207 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3208 ptr[i++] = 0x4a; 3209 ptr[i++] = 0xc0081; 3210 /* mmSPI_SHADER_PGM_RSRC2_VS */ 3211 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3212 ptr[i++] = 0x4b; 3213 ptr[i++] = 0x18; 3214 3215 /* mmSPI_VS_OUT_CONFIG */ 3216 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3217 ptr[i++] = 0x1b1; 3218 ptr[i++] = 2; 3219 3220 /* mmSPI_SHADER_POS_FORMAT */ 3221 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3222 ptr[i++] = 0x1c3; 3223 ptr[i++] = 4; 3224 3225 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3226 ptr[i++] = 0x4c; 3227 i += 2; 3228 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 3229 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 3230 3231 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3232 ptr[i++] = 0x50; 3233 i += 2; 3234 if (ps_type == PS_CONST) { 3235 i += 2; 3236 } else if (ps_type == PS_TEX) { 3237 ptr[i++] = 0x3f800000; 3238 ptr[i++] = 0x3f800000; 3239 } 3240 3241 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3242 ptr[i++] = 0x54; 3243 i += 4; 3244 3245 return i; 3246} 3247 3248static int amdgpu_draw_ps_write2hw(uint32_t *ptr, 3249 int ps_type, 3250 uint64_t shader_addr) 3251{ 3252 int i, j; 3253 const uint32_t *sh_registers; 3254 const uint32_t *context_registers; 3255 uint32_t num_sh_reg, num_context_reg; 3256 3257 if (ps_type == PS_CONST) { 3258 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; 3259 context_registers = (const uint32_t *)ps_const_context_reg_gfx9; 3260 num_sh_reg = ps_num_sh_registers_gfx9; 3261 num_context_reg = ps_num_context_registers_gfx9; 3262 } else if (ps_type == PS_TEX) { 3263 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; 3264 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9; 3265 num_sh_reg = ps_num_sh_registers_gfx9; 3266 num_context_reg = ps_num_context_registers_gfx9; 3267 } 3268 3269 i = 0; 3270 3271 /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS 3272 0x2c08 SPI_SHADER_PGM_LO_PS 3273 0x2c09 SPI_SHADER_PGM_HI_PS */ 3274 shader_addr += 256 * 9; 3275 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 3276 ptr[i++] = 0x7; 3277 ptr[i++] = 0xffff; 3278 ptr[i++] = shader_addr >> 8; 3279 ptr[i++] = shader_addr >> 40; 3280 3281 for (j = 0; j < num_sh_reg; j++) { 3282 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3283 ptr[i++] = sh_registers[j * 2] - 0x2c00; 3284 ptr[i++] = sh_registers[j * 2 + 1]; 3285 } 3286 3287 for (j = 0; j < num_context_reg; j++) { 3288 if (context_registers[j * 2] != 0xA1C5) { 3289 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3290 ptr[i++] = context_registers[j * 2] - 0xa000; 3291 ptr[i++] = context_registers[j * 2 + 1]; 3292 } 3293 3294 if (context_registers[j * 2] == 0xA1B4) { 3295 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3296 ptr[i++] = 0x1b3; 3297 ptr[i++] = 2; 3298 } 3299 } 3300 3301 return i; 3302} 3303 3304static int amdgpu_draw_draw(uint32_t *ptr) 3305{ 3306 int i = 0; 3307 3308 /* mmIA_MULTI_VGT_PARAM */ 3309 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3310 ptr[i++] = 0x40000258; 3311 ptr[i++] = 0xd00ff; 3312 3313 /* mmVGT_PRIMITIVE_TYPE */ 3314 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3315 ptr[i++] = 0x10000242; 3316 ptr[i++] = 0x11; 3317 3318 ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); 3319 ptr[i++] = 3; 3320 ptr[i++] = 2; 3321 3322 return i; 3323} 3324 3325void amdgpu_memset_draw(amdgpu_device_handle device_handle, 3326 amdgpu_bo_handle bo_shader_ps, 3327 amdgpu_bo_handle bo_shader_vs, 3328 uint64_t mc_address_shader_ps, 3329 uint64_t mc_address_shader_vs, 3330 uint32_t ring_id) 3331{ 3332 amdgpu_context_handle context_handle; 3333 amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; 3334 volatile unsigned char *ptr_dst; 3335 uint32_t *ptr_cmd; 3336 uint64_t mc_address_dst, mc_address_cmd; 3337 amdgpu_va_handle va_dst, va_cmd; 3338 int i, r; 3339 int bo_dst_size = 16384; 3340 int bo_cmd_size = 4096; 3341 struct amdgpu_cs_request ibs_request = {0}; 3342 struct amdgpu_cs_ib_info ib_info = {0}; 3343 struct amdgpu_cs_fence fence_status = {0}; 3344 uint32_t expired; 3345 amdgpu_bo_list_handle bo_list; 3346 3347 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3348 CU_ASSERT_EQUAL(r, 0); 3349 3350 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 3351 AMDGPU_GEM_DOMAIN_GTT, 0, 3352 &bo_cmd, (void **)&ptr_cmd, 3353 &mc_address_cmd, &va_cmd); 3354 CU_ASSERT_EQUAL(r, 0); 3355 memset(ptr_cmd, 0, bo_cmd_size); 3356 3357 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 3358 AMDGPU_GEM_DOMAIN_VRAM, 0, 3359 &bo_dst, (void **)&ptr_dst, 3360 &mc_address_dst, &va_dst); 3361 CU_ASSERT_EQUAL(r, 0); 3362 3363 i = 0; 3364 i += amdgpu_draw_init(ptr_cmd + i); 3365 3366 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); 3367 3368 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); 3369 3370 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0); 3371 3372 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps); 3373 3374 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3375 ptr_cmd[i++] = 0xc; 3376 ptr_cmd[i++] = 0x33333333; 3377 ptr_cmd[i++] = 0x33333333; 3378 ptr_cmd[i++] = 0x33333333; 3379 ptr_cmd[i++] = 0x33333333; 3380 3381 i += amdgpu_draw_draw(ptr_cmd + i); 3382 3383 while (i & 7) 3384 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3385 3386 resources[0] = bo_dst; 3387 resources[1] = bo_shader_ps; 3388 resources[2] = bo_shader_vs; 3389 resources[3] = bo_cmd; 3390 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 3391 CU_ASSERT_EQUAL(r, 0); 3392 3393 ib_info.ib_mc_address = mc_address_cmd; 3394 ib_info.size = i; 3395 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3396 ibs_request.ring = ring_id; 3397 ibs_request.resources = bo_list; 3398 ibs_request.number_of_ibs = 1; 3399 ibs_request.ibs = &ib_info; 3400 ibs_request.fence_info.handle = NULL; 3401 3402 /* submit CS */ 3403 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3404 CU_ASSERT_EQUAL(r, 0); 3405 3406 r = amdgpu_bo_list_destroy(bo_list); 3407 CU_ASSERT_EQUAL(r, 0); 3408 3409 fence_status.ip_type = AMDGPU_HW_IP_GFX; 3410 fence_status.ip_instance = 0; 3411 fence_status.ring = ring_id; 3412 fence_status.context = context_handle; 3413 fence_status.fence = ibs_request.seq_no; 3414 3415 /* wait for IB accomplished */ 3416 r = amdgpu_cs_query_fence_status(&fence_status, 3417 AMDGPU_TIMEOUT_INFINITE, 3418 0, &expired); 3419 CU_ASSERT_EQUAL(r, 0); 3420 CU_ASSERT_EQUAL(expired, true); 3421 3422 /* verify if memset test result meets with expected */ 3423 i = 0; 3424 while(i < bo_dst_size) { 3425 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33); 3426 } 3427 3428 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 3429 CU_ASSERT_EQUAL(r, 0); 3430 3431 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 3432 CU_ASSERT_EQUAL(r, 0); 3433 3434 r = amdgpu_cs_ctx_free(context_handle); 3435 CU_ASSERT_EQUAL(r, 0); 3436} 3437 3438static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, 3439 uint32_t ring) 3440{ 3441 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 3442 void *ptr_shader_ps; 3443 void *ptr_shader_vs; 3444 uint64_t mc_address_shader_ps, mc_address_shader_vs; 3445 amdgpu_va_handle va_shader_ps, va_shader_vs; 3446 int r; 3447 int bo_shader_size = 4096; 3448 3449 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3450 AMDGPU_GEM_DOMAIN_VRAM, 0, 3451 &bo_shader_ps, &ptr_shader_ps, 3452 &mc_address_shader_ps, &va_shader_ps); 3453 CU_ASSERT_EQUAL(r, 0); 3454 memset(ptr_shader_ps, 0, bo_shader_size); 3455 3456 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3457 AMDGPU_GEM_DOMAIN_VRAM, 0, 3458 &bo_shader_vs, &ptr_shader_vs, 3459 &mc_address_shader_vs, &va_shader_vs); 3460 CU_ASSERT_EQUAL(r, 0); 3461 memset(ptr_shader_vs, 0, bo_shader_size); 3462 3463 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST); 3464 CU_ASSERT_EQUAL(r, 0); 3465 3466 r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 3467 CU_ASSERT_EQUAL(r, 0); 3468 3469 amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, 3470 mc_address_shader_ps, mc_address_shader_vs, ring); 3471 3472 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 3473 CU_ASSERT_EQUAL(r, 0); 3474 3475 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 3476 CU_ASSERT_EQUAL(r, 0); 3477} 3478 3479static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, 3480 amdgpu_bo_handle bo_shader_ps, 3481 amdgpu_bo_handle bo_shader_vs, 3482 uint64_t mc_address_shader_ps, 3483 uint64_t mc_address_shader_vs, 3484 uint32_t ring, int hang) 3485{ 3486 amdgpu_context_handle context_handle; 3487 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 3488 volatile unsigned char *ptr_dst; 3489 unsigned char *ptr_src; 3490 uint32_t *ptr_cmd; 3491 uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 3492 amdgpu_va_handle va_dst, va_src, va_cmd; 3493 int i, r; 3494 int bo_size = 16384; 3495 int bo_cmd_size = 4096; 3496 struct amdgpu_cs_request ibs_request = {0}; 3497 struct amdgpu_cs_ib_info ib_info= {0}; 3498 uint32_t hang_state, hangs; 3499 uint32_t expired; 3500 amdgpu_bo_list_handle bo_list; 3501 struct amdgpu_cs_fence fence_status = {0}; 3502 3503 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3504 CU_ASSERT_EQUAL(r, 0); 3505 3506 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 3507 AMDGPU_GEM_DOMAIN_GTT, 0, 3508 &bo_cmd, (void **)&ptr_cmd, 3509 &mc_address_cmd, &va_cmd); 3510 CU_ASSERT_EQUAL(r, 0); 3511 memset(ptr_cmd, 0, bo_cmd_size); 3512 3513 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3514 AMDGPU_GEM_DOMAIN_VRAM, 0, 3515 &bo_src, (void **)&ptr_src, 3516 &mc_address_src, &va_src); 3517 CU_ASSERT_EQUAL(r, 0); 3518 3519 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3520 AMDGPU_GEM_DOMAIN_VRAM, 0, 3521 &bo_dst, (void **)&ptr_dst, 3522 &mc_address_dst, &va_dst); 3523 CU_ASSERT_EQUAL(r, 0); 3524 3525 memset(ptr_src, 0x55, bo_size); 3526 3527 i = 0; 3528 i += amdgpu_draw_init(ptr_cmd + i); 3529 3530 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); 3531 3532 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); 3533 3534 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0); 3535 3536 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 3537 3538 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 3539 ptr_cmd[i++] = 0xc; 3540 ptr_cmd[i++] = mc_address_src >> 8; 3541 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 3542 ptr_cmd[i++] = 0x7c01f; 3543 ptr_cmd[i++] = 0x90500fac; 3544 ptr_cmd[i++] = 0x3e000; 3545 i += 3; 3546 3547 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3548 ptr_cmd[i++] = 0x14; 3549 ptr_cmd[i++] = 0x92; 3550 i += 3; 3551 3552 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3553 ptr_cmd[i++] = 0x191; 3554 ptr_cmd[i++] = 0; 3555 3556 i += amdgpu_draw_draw(ptr_cmd + i); 3557 3558 while (i & 7) 3559 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3560 3561 resources[0] = bo_dst; 3562 resources[1] = bo_src; 3563 resources[2] = bo_shader_ps; 3564 resources[3] = bo_shader_vs; 3565 resources[4] = bo_cmd; 3566 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 3567 CU_ASSERT_EQUAL(r, 0); 3568 3569 ib_info.ib_mc_address = mc_address_cmd; 3570 ib_info.size = i; 3571 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3572 ibs_request.ring = ring; 3573 ibs_request.resources = bo_list; 3574 ibs_request.number_of_ibs = 1; 3575 ibs_request.ibs = &ib_info; 3576 ibs_request.fence_info.handle = NULL; 3577 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3578 CU_ASSERT_EQUAL(r, 0); 3579 3580 fence_status.ip_type = AMDGPU_HW_IP_GFX; 3581 fence_status.ip_instance = 0; 3582 fence_status.ring = ring; 3583 fence_status.context = context_handle; 3584 fence_status.fence = ibs_request.seq_no; 3585 3586 /* wait for IB accomplished */ 3587 r = amdgpu_cs_query_fence_status(&fence_status, 3588 AMDGPU_TIMEOUT_INFINITE, 3589 0, &expired); 3590 if (!hang) { 3591 CU_ASSERT_EQUAL(r, 0); 3592 CU_ASSERT_EQUAL(expired, true); 3593 3594 /* verify if memcpy test result meets with expected */ 3595 i = 0; 3596 while(i < bo_size) { 3597 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 3598 i++; 3599 } 3600 } else { 3601 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 3602 CU_ASSERT_EQUAL(r, 0); 3603 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 3604 } 3605 3606 r = amdgpu_bo_list_destroy(bo_list); 3607 CU_ASSERT_EQUAL(r, 0); 3608 3609 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 3610 CU_ASSERT_EQUAL(r, 0); 3611 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 3612 CU_ASSERT_EQUAL(r, 0); 3613 3614 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 3615 CU_ASSERT_EQUAL(r, 0); 3616 3617 r = amdgpu_cs_ctx_free(context_handle); 3618 CU_ASSERT_EQUAL(r, 0); 3619} 3620 3621void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, 3622 int hang) 3623{ 3624 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 3625 void *ptr_shader_ps; 3626 void *ptr_shader_vs; 3627 uint64_t mc_address_shader_ps, mc_address_shader_vs; 3628 amdgpu_va_handle va_shader_ps, va_shader_vs; 3629 int bo_shader_size = 4096; 3630 enum ps_type ps_type = hang ? PS_HANG : PS_TEX; 3631 int r; 3632 3633 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3634 AMDGPU_GEM_DOMAIN_VRAM, 0, 3635 &bo_shader_ps, &ptr_shader_ps, 3636 &mc_address_shader_ps, &va_shader_ps); 3637 CU_ASSERT_EQUAL(r, 0); 3638 memset(ptr_shader_ps, 0, bo_shader_size); 3639 3640 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3641 AMDGPU_GEM_DOMAIN_VRAM, 0, 3642 &bo_shader_vs, &ptr_shader_vs, 3643 &mc_address_shader_vs, &va_shader_vs); 3644 CU_ASSERT_EQUAL(r, 0); 3645 memset(ptr_shader_vs, 0, bo_shader_size); 3646 3647 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type); 3648 CU_ASSERT_EQUAL(r, 0); 3649 3650 r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 3651 CU_ASSERT_EQUAL(r, 0); 3652 3653 amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, 3654 mc_address_shader_ps, mc_address_shader_vs, ring, hang); 3655 3656 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 3657 CU_ASSERT_EQUAL(r, 0); 3658 3659 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 3660 CU_ASSERT_EQUAL(r, 0); 3661} 3662 3663static void amdgpu_draw_test(void) 3664{ 3665 int r; 3666 struct drm_amdgpu_info_hw_ip info; 3667 uint32_t ring_id; 3668 3669 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 3670 CU_ASSERT_EQUAL(r, 0); 3671 if (!info.available_rings) 3672 printf("SKIP ... as there's no graphics ring\n"); 3673 3674 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 3675 amdgpu_memset_draw_test(device_handle, ring_id); 3676 amdgpu_memcpy_draw_test(device_handle, ring_id, 0); 3677 } 3678} 3679 3680void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring) 3681{ 3682 amdgpu_context_handle context_handle; 3683 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 3684 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 3685 void *ptr_shader_ps; 3686 void *ptr_shader_vs; 3687 volatile unsigned char *ptr_dst; 3688 unsigned char *ptr_src; 3689 uint32_t *ptr_cmd; 3690 uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 3691 uint64_t mc_address_shader_ps, mc_address_shader_vs; 3692 amdgpu_va_handle va_shader_ps, va_shader_vs; 3693 amdgpu_va_handle va_dst, va_src, va_cmd; 3694 struct amdgpu_gpu_info gpu_info = {0}; 3695 int i, r; 3696 int bo_size = 0x4000000; 3697 int bo_shader_ps_size = 0x400000; 3698 int bo_shader_vs_size = 4096; 3699 int bo_cmd_size = 4096; 3700 struct amdgpu_cs_request ibs_request = {0}; 3701 struct amdgpu_cs_ib_info ib_info= {0}; 3702 uint32_t hang_state, hangs, expired; 3703 amdgpu_bo_list_handle bo_list; 3704 struct amdgpu_cs_fence fence_status = {0}; 3705 3706 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 3707 CU_ASSERT_EQUAL(r, 0); 3708 3709 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3710 CU_ASSERT_EQUAL(r, 0); 3711 3712 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 3713 AMDGPU_GEM_DOMAIN_GTT, 0, 3714 &bo_cmd, (void **)&ptr_cmd, 3715 &mc_address_cmd, &va_cmd); 3716 CU_ASSERT_EQUAL(r, 0); 3717 memset(ptr_cmd, 0, bo_cmd_size); 3718 3719 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096, 3720 AMDGPU_GEM_DOMAIN_VRAM, 0, 3721 &bo_shader_ps, &ptr_shader_ps, 3722 &mc_address_shader_ps, &va_shader_ps); 3723 CU_ASSERT_EQUAL(r, 0); 3724 memset(ptr_shader_ps, 0, bo_shader_ps_size); 3725 3726 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096, 3727 AMDGPU_GEM_DOMAIN_VRAM, 0, 3728 &bo_shader_vs, &ptr_shader_vs, 3729 &mc_address_shader_vs, &va_shader_vs); 3730 CU_ASSERT_EQUAL(r, 0); 3731 memset(ptr_shader_vs, 0, bo_shader_vs_size); 3732 3733 r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id); 3734 CU_ASSERT_EQUAL(r, 0); 3735 3736 r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 3737 CU_ASSERT_EQUAL(r, 0); 3738 3739 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3740 AMDGPU_GEM_DOMAIN_VRAM, 0, 3741 &bo_src, (void **)&ptr_src, 3742 &mc_address_src, &va_src); 3743 CU_ASSERT_EQUAL(r, 0); 3744 3745 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3746 AMDGPU_GEM_DOMAIN_VRAM, 0, 3747 &bo_dst, (void **)&ptr_dst, 3748 &mc_address_dst, &va_dst); 3749 CU_ASSERT_EQUAL(r, 0); 3750 3751 memset(ptr_src, 0x55, bo_size); 3752 3753 i = 0; 3754 i += amdgpu_draw_init(ptr_cmd + i); 3755 3756 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1); 3757 3758 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1); 3759 3760 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, 3761 mc_address_shader_vs, 1); 3762 3763 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 3764 3765 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 3766 ptr_cmd[i++] = 0xc; 3767 ptr_cmd[i++] = mc_address_src >> 8; 3768 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 3769 ptr_cmd[i++] = 0x1ffc7ff; 3770 ptr_cmd[i++] = 0x90500fac; 3771 ptr_cmd[i++] = 0xffe000; 3772 i += 3; 3773 3774 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3775 ptr_cmd[i++] = 0x14; 3776 ptr_cmd[i++] = 0x92; 3777 i += 3; 3778 3779 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3780 ptr_cmd[i++] = 0x191; 3781 ptr_cmd[i++] = 0; 3782 3783 i += amdgpu_draw_draw(ptr_cmd + i); 3784 3785 while (i & 7) 3786 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3787 3788 resources[0] = bo_dst; 3789 resources[1] = bo_src; 3790 resources[2] = bo_shader_ps; 3791 resources[3] = bo_shader_vs; 3792 resources[4] = bo_cmd; 3793 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 3794 CU_ASSERT_EQUAL(r, 0); 3795 3796 ib_info.ib_mc_address = mc_address_cmd; 3797 ib_info.size = i; 3798 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3799 ibs_request.ring = ring; 3800 ibs_request.resources = bo_list; 3801 ibs_request.number_of_ibs = 1; 3802 ibs_request.ibs = &ib_info; 3803 ibs_request.fence_info.handle = NULL; 3804 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3805 CU_ASSERT_EQUAL(r, 0); 3806 3807 fence_status.ip_type = AMDGPU_HW_IP_GFX; 3808 fence_status.ip_instance = 0; 3809 fence_status.ring = ring; 3810 fence_status.context = context_handle; 3811 fence_status.fence = ibs_request.seq_no; 3812 3813 /* wait for IB accomplished */ 3814 r = amdgpu_cs_query_fence_status(&fence_status, 3815 AMDGPU_TIMEOUT_INFINITE, 3816 0, &expired); 3817 3818 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 3819 CU_ASSERT_EQUAL(r, 0); 3820 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 3821 3822 r = amdgpu_bo_list_destroy(bo_list); 3823 CU_ASSERT_EQUAL(r, 0); 3824 3825 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 3826 CU_ASSERT_EQUAL(r, 0); 3827 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 3828 CU_ASSERT_EQUAL(r, 0); 3829 3830 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 3831 CU_ASSERT_EQUAL(r, 0); 3832 3833 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size); 3834 CU_ASSERT_EQUAL(r, 0); 3835 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size); 3836 CU_ASSERT_EQUAL(r, 0); 3837 3838 r = amdgpu_cs_ctx_free(context_handle); 3839 CU_ASSERT_EQUAL(r, 0); 3840} 3841 3842static void amdgpu_gpu_reset_test(void) 3843{ 3844 int r; 3845 char debugfs_path[256], tmp[10]; 3846 int fd; 3847 struct stat sbuf; 3848 amdgpu_context_handle context_handle; 3849 uint32_t hang_state, hangs; 3850 3851 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3852 CU_ASSERT_EQUAL(r, 0); 3853 3854 r = fstat(drm_amdgpu[0], &sbuf); 3855 CU_ASSERT_EQUAL(r, 0); 3856 3857 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev)); 3858 fd = open(debugfs_path, O_RDONLY); 3859 CU_ASSERT(fd >= 0); 3860 3861 r = read(fd, tmp, sizeof(tmp)/sizeof(char)); 3862 CU_ASSERT(r > 0); 3863 3864 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 3865 CU_ASSERT_EQUAL(r, 0); 3866 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 3867 3868 close(fd); 3869 r = amdgpu_cs_ctx_free(context_handle); 3870 CU_ASSERT_EQUAL(r, 0); 3871 3872 amdgpu_compute_dispatch_test(); 3873 amdgpu_gfx_dispatch_test(); 3874} 3875