basic_tests.c revision 9bd392ad
1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22*/ 23 24#include <stdio.h> 25#include <stdlib.h> 26#include <unistd.h> 27#include <sys/types.h> 28#ifdef MAJOR_IN_SYSMACROS 29#include <sys/sysmacros.h> 30#endif 31#include <sys/stat.h> 32#include <fcntl.h> 33#if HAVE_ALLOCA_H 34# include <alloca.h> 35#endif 36#include <sys/wait.h> 37 38#include "CUnit/Basic.h" 39 40#include "amdgpu_test.h" 41#include "amdgpu_drm.h" 42#include "util_math.h" 43 44static amdgpu_device_handle device_handle; 45static uint32_t major_version; 46static uint32_t minor_version; 47static uint32_t family_id; 48 49static void amdgpu_query_info_test(void); 50static void amdgpu_command_submission_gfx(void); 51static void amdgpu_command_submission_compute(void); 52static void amdgpu_command_submission_multi_fence(void); 53static void amdgpu_command_submission_sdma(void); 54static void amdgpu_userptr_test(void); 55static void amdgpu_semaphore_test(void); 56static void amdgpu_sync_dependency_test(void); 57static void amdgpu_bo_eviction_test(void); 58static void amdgpu_compute_dispatch_test(void); 59static void amdgpu_gfx_dispatch_test(void); 60static void amdgpu_draw_test(void); 61static void amdgpu_gpu_reset_test(void); 62 63static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 64static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 65static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 66static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 67 unsigned ip_type, 68 int instance, int pm4_dw, uint32_t *pm4_src, 69 int res_cnt, amdgpu_bo_handle *resources, 70 struct amdgpu_cs_ib_info *ib_info, 71 struct amdgpu_cs_request *ibs_request); 72 73CU_TestInfo basic_tests[] = { 74 { "Query Info Test", amdgpu_query_info_test }, 75 { "Userptr Test", amdgpu_userptr_test }, 76 { "bo eviction Test", amdgpu_bo_eviction_test }, 77 { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 78 { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 79 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 80 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 81 { "SW semaphore Test", amdgpu_semaphore_test }, 82 { "Sync dependency Test", amdgpu_sync_dependency_test }, 83 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test }, 84 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test }, 85 { "Draw Test", amdgpu_draw_test }, 86 { "GPU reset Test", amdgpu_gpu_reset_test }, 87 CU_TEST_INFO_NULL, 88}; 89#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize())) 90#define SDMA_PKT_HEADER_op_offset 0 91#define SDMA_PKT_HEADER_op_mask 0x000000FF 92#define SDMA_PKT_HEADER_op_shift 0 93#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 94#define SDMA_OPCODE_CONSTANT_FILL 11 95# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 96 /* 0 = byte fill 97 * 2 = DW fill 98 */ 99#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 100 (((sub_op) & 0xFF) << 8) | \ 101 (((op) & 0xFF) << 0)) 102#define SDMA_OPCODE_WRITE 2 103# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 104# define SDMA_WRTIE_SUB_OPCODE_TILED 1 105 106#define SDMA_OPCODE_COPY 1 107# define SDMA_COPY_SUB_OPCODE_LINEAR 0 108 109#define GFX_COMPUTE_NOP 0xffff1000 110#define SDMA_NOP 0x0 111 112/* PM4 */ 113#define PACKET_TYPE0 0 114#define PACKET_TYPE1 1 115#define PACKET_TYPE2 2 116#define PACKET_TYPE3 3 117 118#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 119#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 120#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 121#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 122#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 123 ((reg) & 0xFFFF) | \ 124 ((n) & 0x3FFF) << 16) 125#define CP_PACKET2 0x80000000 126#define PACKET2_PAD_SHIFT 0 127#define PACKET2_PAD_MASK (0x3fffffff << 0) 128 129#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 130 131#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 132 (((op) & 0xFF) << 8) | \ 133 ((n) & 0x3FFF) << 16) 134#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) 135 136/* Packet 3 types */ 137#define PACKET3_NOP 0x10 138 139#define PACKET3_WRITE_DATA 0x37 140#define WRITE_DATA_DST_SEL(x) ((x) << 8) 141 /* 0 - register 142 * 1 - memory (sync - via GRBM) 143 * 2 - gl2 144 * 3 - gds 145 * 4 - reserved 146 * 5 - memory (async - direct) 147 */ 148#define WR_ONE_ADDR (1 << 16) 149#define WR_CONFIRM (1 << 20) 150#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 151 /* 0 - LRU 152 * 1 - Stream 153 */ 154#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 155 /* 0 - me 156 * 1 - pfp 157 * 2 - ce 158 */ 159 160#define PACKET3_DMA_DATA 0x50 161/* 1. header 162 * 2. CONTROL 163 * 3. SRC_ADDR_LO or DATA [31:0] 164 * 4. SRC_ADDR_HI [31:0] 165 * 5. DST_ADDR_LO [31:0] 166 * 6. DST_ADDR_HI [7:0] 167 * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 168 */ 169/* CONTROL */ 170# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 171 /* 0 - ME 172 * 1 - PFP 173 */ 174# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 175 /* 0 - LRU 176 * 1 - Stream 177 * 2 - Bypass 178 */ 179# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 180# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 181 /* 0 - DST_ADDR using DAS 182 * 1 - GDS 183 * 3 - DST_ADDR using L2 184 */ 185# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 186 /* 0 - LRU 187 * 1 - Stream 188 * 2 - Bypass 189 */ 190# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 191# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 192 /* 0 - SRC_ADDR using SAS 193 * 1 - GDS 194 * 2 - DATA 195 * 3 - SRC_ADDR using L2 196 */ 197# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 198/* COMMAND */ 199# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 200# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 201 /* 0 - none 202 * 1 - 8 in 16 203 * 2 - 8 in 32 204 * 3 - 8 in 64 205 */ 206# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 207 /* 0 - none 208 * 1 - 8 in 16 209 * 2 - 8 in 32 210 * 3 - 8 in 64 211 */ 212# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 213 /* 0 - memory 214 * 1 - register 215 */ 216# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 217 /* 0 - memory 218 * 1 - register 219 */ 220# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 221# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 222# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 223 224#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 225 (((b) & 0x1) << 26) | \ 226 (((t) & 0x1) << 23) | \ 227 (((s) & 0x1) << 22) | \ 228 (((cnt) & 0xFFFFF) << 0)) 229#define SDMA_OPCODE_COPY_SI 3 230#define SDMA_OPCODE_CONSTANT_FILL_SI 13 231#define SDMA_NOP_SI 0xf 232#define GFX_COMPUTE_NOP_SI 0x80000000 233#define PACKET3_DMA_DATA_SI 0x41 234# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 235 /* 0 - ME 236 * 1 - PFP 237 */ 238# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 239 /* 0 - DST_ADDR using DAS 240 * 1 - GDS 241 * 3 - DST_ADDR using L2 242 */ 243# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 244 /* 0 - SRC_ADDR using SAS 245 * 1 - GDS 246 * 2 - DATA 247 * 3 - SRC_ADDR using L2 248 */ 249# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 250 251 252#define PKT3_CONTEXT_CONTROL 0x28 253#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 254#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 255#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 256 257#define PKT3_CLEAR_STATE 0x12 258 259#define PKT3_SET_SH_REG 0x76 260#define PACKET3_SET_SH_REG_START 0x00002c00 261 262#define PACKET3_DISPATCH_DIRECT 0x15 263#define PACKET3_EVENT_WRITE 0x46 264#define PACKET3_ACQUIRE_MEM 0x58 265#define PACKET3_SET_CONTEXT_REG 0x69 266#define PACKET3_SET_UCONFIG_REG 0x79 267#define PACKET3_DRAW_INDEX_AUTO 0x2D 268/* gfx 8 */ 269#define mmCOMPUTE_PGM_LO 0x2e0c 270#define mmCOMPUTE_PGM_RSRC1 0x2e12 271#define mmCOMPUTE_TMPRING_SIZE 0x2e18 272#define mmCOMPUTE_USER_DATA_0 0x2e40 273#define mmCOMPUTE_USER_DATA_1 0x2e41 274#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 275#define mmCOMPUTE_NUM_THREAD_X 0x2e07 276 277 278 279#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 280 ((num & 0x0000ff00) << 8) | \ 281 ((num & 0x00ff0000) >> 8) | \ 282 ((num & 0x000000ff) << 24)) 283 284 285/* Shader code 286 * void main() 287{ 288 289 float x = some_input; 290 for (unsigned i = 0; i < 1000000; i++) 291 x = sin(x); 292 293 u[0] = 42u; 294} 295*/ 296 297static uint32_t shader_bin[] = { 298 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 299 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 300 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 301 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 302}; 303 304#define CODE_OFFSET 512 305#define DATA_OFFSET 1024 306 307enum cs_type { 308 CS_BUFFERCLEAR, 309 CS_BUFFERCOPY, 310 CS_HANG, 311 CS_HANG_SLOW 312}; 313 314static const uint32_t bufferclear_cs_shader_gfx9[] = { 315 0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205, 316 0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100, 317 0xBF810000 318}; 319 320static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { 321 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, 322 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, 323 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, 324 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, 325 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } 326}; 327 328static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5; 329 330static const uint32_t buffercopy_cs_shader_gfx9[] = { 331 0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100, 332 0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000 333}; 334 335static const uint32_t preamblecache_gfx9[] = { 336 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 337 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 338 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 339 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 340 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 341 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 342 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 343 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 344 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20, 345 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 346 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, 347 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 348 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 349 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 350 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 351 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 352 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 353 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, 354 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 355 0xc0017900, 0x24b, 0x0 356}; 357 358enum ps_type { 359 PS_CONST, 360 PS_TEX, 361 PS_HANG, 362 PS_HANG_SLOW 363}; 364 365static const uint32_t ps_const_shader_gfx9[] = { 366 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 367 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 368 0xC4001C0F, 0x00000100, 0xBF810000 369}; 370 371static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; 372 373static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { 374 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 375 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, 376 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, 377 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, 378 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, 379 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, 380 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, 381 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, 382 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, 383 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } 384 } 385}; 386 387static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { 388 0x00000004 389}; 390 391static const uint32_t ps_num_sh_registers_gfx9 = 2; 392 393static const uint32_t ps_const_sh_registers_gfx9[][2] = { 394 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, 395 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 396}; 397 398static const uint32_t ps_num_context_registers_gfx9 = 7; 399 400static const uint32_t ps_const_context_reg_gfx9[][2] = { 401 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 402 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, 403 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 404 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 405 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 406 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 407 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 408}; 409 410static const uint32_t ps_tex_shader_gfx9[] = { 411 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 412 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, 413 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, 414 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 415 0x00000100, 0xBF810000 416}; 417 418static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { 419 0x0000000B 420}; 421 422static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; 423 424static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { 425 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 426 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, 427 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, 428 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, 429 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 430 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 431 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 432 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 433 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 434 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } 435 } 436}; 437 438static const uint32_t ps_tex_sh_registers_gfx9[][2] = { 439 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, 440 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } 441}; 442 443static const uint32_t ps_tex_context_reg_gfx9[][2] = { 444 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 445 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, 446 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 447 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 448 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 449 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 450 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 451}; 452 453static const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 454 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 455 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, 456 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, 457 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, 458 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, 459 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, 460 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, 461 0xC400020F, 0x05060403, 0xBF810000 462}; 463 464static const uint32_t cached_cmd_gfx9[] = { 465 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 466 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 467 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 468 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12, 469 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 470 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 471 0xc0026900, 0x292, 0x20, 0x60201b8, 472 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 473}; 474 475unsigned int memcpy_ps_hang[] = { 476 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100, 477 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001, 478 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002, 479 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000, 480 0xF800180F, 0x03020100, 0xBF810000 481}; 482 483struct amdgpu_test_shader { 484 uint32_t *shader; 485 uint32_t header_length; 486 uint32_t body_length; 487 uint32_t foot_length; 488}; 489 490unsigned int memcpy_cs_hang_slow_ai_codes[] = { 491 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100, 492 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000 493}; 494 495struct amdgpu_test_shader memcpy_cs_hang_slow_ai = { 496 memcpy_cs_hang_slow_ai_codes, 497 4, 498 3, 499 1 500}; 501 502unsigned int memcpy_cs_hang_slow_rv_codes[] = { 503 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100, 504 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000 505}; 506 507struct amdgpu_test_shader memcpy_cs_hang_slow_rv = { 508 memcpy_cs_hang_slow_rv_codes, 509 4, 510 3, 511 1 512}; 513 514unsigned int memcpy_ps_hang_slow_ai_codes[] = { 515 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000, 516 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00, 517 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000, 518 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f, 519 0x03020100, 0xbf810000 520}; 521 522struct amdgpu_test_shader memcpy_ps_hang_slow_ai = { 523 memcpy_ps_hang_slow_ai_codes, 524 7, 525 2, 526 9 527}; 528 529int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, 530 unsigned alignment, unsigned heap, uint64_t alloc_flags, 531 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, 532 uint64_t *mc_address, 533 amdgpu_va_handle *va_handle) 534{ 535 struct amdgpu_bo_alloc_request request = {}; 536 amdgpu_bo_handle buf_handle; 537 amdgpu_va_handle handle; 538 uint64_t vmc_addr; 539 int r; 540 541 request.alloc_size = size; 542 request.phys_alignment = alignment; 543 request.preferred_heap = heap; 544 request.flags = alloc_flags; 545 546 r = amdgpu_bo_alloc(dev, &request, &buf_handle); 547 if (r) 548 return r; 549 550 r = amdgpu_va_range_alloc(dev, 551 amdgpu_gpu_va_range_general, 552 size, alignment, 0, &vmc_addr, 553 &handle, 0); 554 if (r) 555 goto error_va_alloc; 556 557 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr, 558 AMDGPU_VM_PAGE_READABLE | 559 AMDGPU_VM_PAGE_WRITEABLE | 560 AMDGPU_VM_PAGE_EXECUTABLE | 561 mapping_flags, 562 AMDGPU_VA_OP_MAP); 563 if (r) 564 goto error_va_map; 565 566 r = amdgpu_bo_cpu_map(buf_handle, cpu); 567 if (r) 568 goto error_cpu_map; 569 570 *bo = buf_handle; 571 *mc_address = vmc_addr; 572 *va_handle = handle; 573 574 return 0; 575 576 error_cpu_map: 577 amdgpu_bo_cpu_unmap(buf_handle); 578 579 error_va_map: 580 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 581 582 error_va_alloc: 583 amdgpu_bo_free(buf_handle); 584 return r; 585} 586 587 588 589int suite_basic_tests_init(void) 590{ 591 struct amdgpu_gpu_info gpu_info = {0}; 592 int r; 593 594 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 595 &minor_version, &device_handle); 596 597 if (r) { 598 if ((r == -EACCES) && (errno == EACCES)) 599 printf("\n\nError:%s. " 600 "Hint:Try to run this test program as root.", 601 strerror(errno)); 602 return CUE_SINIT_FAILED; 603 } 604 605 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 606 if (r) 607 return CUE_SINIT_FAILED; 608 609 family_id = gpu_info.family_id; 610 611 return CUE_SUCCESS; 612} 613 614int suite_basic_tests_clean(void) 615{ 616 int r = amdgpu_device_deinitialize(device_handle); 617 618 if (r == 0) 619 return CUE_SUCCESS; 620 else 621 return CUE_SCLEAN_FAILED; 622} 623 624static void amdgpu_query_info_test(void) 625{ 626 struct amdgpu_gpu_info gpu_info = {0}; 627 uint32_t version, feature; 628 int r; 629 630 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 631 CU_ASSERT_EQUAL(r, 0); 632 633 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 634 0, &version, &feature); 635 CU_ASSERT_EQUAL(r, 0); 636} 637 638static void amdgpu_command_submission_gfx_separate_ibs(void) 639{ 640 amdgpu_context_handle context_handle; 641 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 642 void *ib_result_cpu, *ib_result_ce_cpu; 643 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 644 struct amdgpu_cs_request ibs_request = {0}; 645 struct amdgpu_cs_ib_info ib_info[2]; 646 struct amdgpu_cs_fence fence_status = {0}; 647 uint32_t *ptr; 648 uint32_t expired; 649 amdgpu_bo_list_handle bo_list; 650 amdgpu_va_handle va_handle, va_handle_ce; 651 int r, i = 0; 652 653 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 654 CU_ASSERT_EQUAL(r, 0); 655 656 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 657 AMDGPU_GEM_DOMAIN_GTT, 0, 658 &ib_result_handle, &ib_result_cpu, 659 &ib_result_mc_address, &va_handle); 660 CU_ASSERT_EQUAL(r, 0); 661 662 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 663 AMDGPU_GEM_DOMAIN_GTT, 0, 664 &ib_result_ce_handle, &ib_result_ce_cpu, 665 &ib_result_ce_mc_address, &va_handle_ce); 666 CU_ASSERT_EQUAL(r, 0); 667 668 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 669 ib_result_ce_handle, &bo_list); 670 CU_ASSERT_EQUAL(r, 0); 671 672 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 673 674 /* IT_SET_CE_DE_COUNTERS */ 675 ptr = ib_result_ce_cpu; 676 if (family_id != AMDGPU_FAMILY_SI) { 677 ptr[i++] = 0xc0008900; 678 ptr[i++] = 0; 679 } 680 ptr[i++] = 0xc0008400; 681 ptr[i++] = 1; 682 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 683 ib_info[0].size = i; 684 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 685 686 /* IT_WAIT_ON_CE_COUNTER */ 687 ptr = ib_result_cpu; 688 ptr[0] = 0xc0008600; 689 ptr[1] = 0x00000001; 690 ib_info[1].ib_mc_address = ib_result_mc_address; 691 ib_info[1].size = 2; 692 693 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 694 ibs_request.number_of_ibs = 2; 695 ibs_request.ibs = ib_info; 696 ibs_request.resources = bo_list; 697 ibs_request.fence_info.handle = NULL; 698 699 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 700 701 CU_ASSERT_EQUAL(r, 0); 702 703 fence_status.context = context_handle; 704 fence_status.ip_type = AMDGPU_HW_IP_GFX; 705 fence_status.ip_instance = 0; 706 fence_status.fence = ibs_request.seq_no; 707 708 r = amdgpu_cs_query_fence_status(&fence_status, 709 AMDGPU_TIMEOUT_INFINITE, 710 0, &expired); 711 CU_ASSERT_EQUAL(r, 0); 712 713 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 714 ib_result_mc_address, 4096); 715 CU_ASSERT_EQUAL(r, 0); 716 717 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 718 ib_result_ce_mc_address, 4096); 719 CU_ASSERT_EQUAL(r, 0); 720 721 r = amdgpu_bo_list_destroy(bo_list); 722 CU_ASSERT_EQUAL(r, 0); 723 724 r = amdgpu_cs_ctx_free(context_handle); 725 CU_ASSERT_EQUAL(r, 0); 726 727} 728 729static void amdgpu_command_submission_gfx_shared_ib(void) 730{ 731 amdgpu_context_handle context_handle; 732 amdgpu_bo_handle ib_result_handle; 733 void *ib_result_cpu; 734 uint64_t ib_result_mc_address; 735 struct amdgpu_cs_request ibs_request = {0}; 736 struct amdgpu_cs_ib_info ib_info[2]; 737 struct amdgpu_cs_fence fence_status = {0}; 738 uint32_t *ptr; 739 uint32_t expired; 740 amdgpu_bo_list_handle bo_list; 741 amdgpu_va_handle va_handle; 742 int r, i = 0; 743 744 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 745 CU_ASSERT_EQUAL(r, 0); 746 747 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 748 AMDGPU_GEM_DOMAIN_GTT, 0, 749 &ib_result_handle, &ib_result_cpu, 750 &ib_result_mc_address, &va_handle); 751 CU_ASSERT_EQUAL(r, 0); 752 753 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 754 &bo_list); 755 CU_ASSERT_EQUAL(r, 0); 756 757 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 758 759 /* IT_SET_CE_DE_COUNTERS */ 760 ptr = ib_result_cpu; 761 if (family_id != AMDGPU_FAMILY_SI) { 762 ptr[i++] = 0xc0008900; 763 ptr[i++] = 0; 764 } 765 ptr[i++] = 0xc0008400; 766 ptr[i++] = 1; 767 ib_info[0].ib_mc_address = ib_result_mc_address; 768 ib_info[0].size = i; 769 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 770 771 ptr = (uint32_t *)ib_result_cpu + 4; 772 ptr[0] = 0xc0008600; 773 ptr[1] = 0x00000001; 774 ib_info[1].ib_mc_address = ib_result_mc_address + 16; 775 ib_info[1].size = 2; 776 777 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 778 ibs_request.number_of_ibs = 2; 779 ibs_request.ibs = ib_info; 780 ibs_request.resources = bo_list; 781 ibs_request.fence_info.handle = NULL; 782 783 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 784 785 CU_ASSERT_EQUAL(r, 0); 786 787 fence_status.context = context_handle; 788 fence_status.ip_type = AMDGPU_HW_IP_GFX; 789 fence_status.ip_instance = 0; 790 fence_status.fence = ibs_request.seq_no; 791 792 r = amdgpu_cs_query_fence_status(&fence_status, 793 AMDGPU_TIMEOUT_INFINITE, 794 0, &expired); 795 CU_ASSERT_EQUAL(r, 0); 796 797 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 798 ib_result_mc_address, 4096); 799 CU_ASSERT_EQUAL(r, 0); 800 801 r = amdgpu_bo_list_destroy(bo_list); 802 CU_ASSERT_EQUAL(r, 0); 803 804 r = amdgpu_cs_ctx_free(context_handle); 805 CU_ASSERT_EQUAL(r, 0); 806} 807 808static void amdgpu_command_submission_gfx_cp_write_data(void) 809{ 810 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 811} 812 813static void amdgpu_command_submission_gfx_cp_const_fill(void) 814{ 815 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 816} 817 818static void amdgpu_command_submission_gfx_cp_copy_data(void) 819{ 820 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 821} 822 823static void amdgpu_bo_eviction_test(void) 824{ 825 const int sdma_write_length = 1024; 826 const int pm4_dw = 256; 827 amdgpu_context_handle context_handle; 828 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 829 amdgpu_bo_handle *resources; 830 uint32_t *pm4; 831 struct amdgpu_cs_ib_info *ib_info; 832 struct amdgpu_cs_request *ibs_request; 833 uint64_t bo1_mc, bo2_mc; 834 volatile unsigned char *bo1_cpu, *bo2_cpu; 835 int i, j, r, loop1, loop2; 836 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 837 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 838 struct amdgpu_heap_info vram_info, gtt_info; 839 840 pm4 = calloc(pm4_dw, sizeof(*pm4)); 841 CU_ASSERT_NOT_EQUAL(pm4, NULL); 842 843 ib_info = calloc(1, sizeof(*ib_info)); 844 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 845 846 ibs_request = calloc(1, sizeof(*ibs_request)); 847 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 848 849 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 850 CU_ASSERT_EQUAL(r, 0); 851 852 /* prepare resource */ 853 resources = calloc(4, sizeof(amdgpu_bo_handle)); 854 CU_ASSERT_NOT_EQUAL(resources, NULL); 855 856 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 857 0, &vram_info); 858 CU_ASSERT_EQUAL(r, 0); 859 860 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 861 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 862 CU_ASSERT_EQUAL(r, 0); 863 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 864 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 865 CU_ASSERT_EQUAL(r, 0); 866 867 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 868 0, >t_info); 869 CU_ASSERT_EQUAL(r, 0); 870 871 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 872 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 873 CU_ASSERT_EQUAL(r, 0); 874 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 875 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 876 CU_ASSERT_EQUAL(r, 0); 877 878 879 880 loop1 = loop2 = 0; 881 /* run 9 circle to test all mapping combination */ 882 while(loop1 < 2) { 883 while(loop2 < 2) { 884 /* allocate UC bo1for sDMA use */ 885 r = amdgpu_bo_alloc_and_map(device_handle, 886 sdma_write_length, 4096, 887 AMDGPU_GEM_DOMAIN_GTT, 888 gtt_flags[loop1], &bo1, 889 (void**)&bo1_cpu, &bo1_mc, 890 &bo1_va_handle); 891 CU_ASSERT_EQUAL(r, 0); 892 893 /* set bo1 */ 894 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 895 896 /* allocate UC bo2 for sDMA use */ 897 r = amdgpu_bo_alloc_and_map(device_handle, 898 sdma_write_length, 4096, 899 AMDGPU_GEM_DOMAIN_GTT, 900 gtt_flags[loop2], &bo2, 901 (void**)&bo2_cpu, &bo2_mc, 902 &bo2_va_handle); 903 CU_ASSERT_EQUAL(r, 0); 904 905 /* clear bo2 */ 906 memset((void*)bo2_cpu, 0, sdma_write_length); 907 908 resources[0] = bo1; 909 resources[1] = bo2; 910 resources[2] = vram_max[loop2]; 911 resources[3] = gtt_max[loop2]; 912 913 /* fulfill PM4: test DMA copy linear */ 914 i = j = 0; 915 if (family_id == AMDGPU_FAMILY_SI) { 916 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 917 sdma_write_length); 918 pm4[i++] = 0xffffffff & bo2_mc; 919 pm4[i++] = 0xffffffff & bo1_mc; 920 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 921 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 922 } else { 923 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 924 if (family_id >= AMDGPU_FAMILY_AI) 925 pm4[i++] = sdma_write_length - 1; 926 else 927 pm4[i++] = sdma_write_length; 928 pm4[i++] = 0; 929 pm4[i++] = 0xffffffff & bo1_mc; 930 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 931 pm4[i++] = 0xffffffff & bo2_mc; 932 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 933 } 934 935 amdgpu_test_exec_cs_helper(context_handle, 936 AMDGPU_HW_IP_DMA, 0, 937 i, pm4, 938 4, resources, 939 ib_info, ibs_request); 940 941 /* verify if SDMA test result meets with expected */ 942 i = 0; 943 while(i < sdma_write_length) { 944 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 945 } 946 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 947 sdma_write_length); 948 CU_ASSERT_EQUAL(r, 0); 949 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 950 sdma_write_length); 951 CU_ASSERT_EQUAL(r, 0); 952 loop2++; 953 } 954 loop2 = 0; 955 loop1++; 956 } 957 amdgpu_bo_free(vram_max[0]); 958 amdgpu_bo_free(vram_max[1]); 959 amdgpu_bo_free(gtt_max[0]); 960 amdgpu_bo_free(gtt_max[1]); 961 /* clean resources */ 962 free(resources); 963 free(ibs_request); 964 free(ib_info); 965 free(pm4); 966 967 /* end of test */ 968 r = amdgpu_cs_ctx_free(context_handle); 969 CU_ASSERT_EQUAL(r, 0); 970} 971 972 973static void amdgpu_command_submission_gfx(void) 974{ 975 /* write data using the CP */ 976 amdgpu_command_submission_gfx_cp_write_data(); 977 /* const fill using the CP */ 978 amdgpu_command_submission_gfx_cp_const_fill(); 979 /* copy data using the CP */ 980 amdgpu_command_submission_gfx_cp_copy_data(); 981 /* separate IB buffers for multi-IB submission */ 982 amdgpu_command_submission_gfx_separate_ibs(); 983 /* shared IB buffer for multi-IB submission */ 984 amdgpu_command_submission_gfx_shared_ib(); 985} 986 987static void amdgpu_semaphore_test(void) 988{ 989 amdgpu_context_handle context_handle[2]; 990 amdgpu_semaphore_handle sem; 991 amdgpu_bo_handle ib_result_handle[2]; 992 void *ib_result_cpu[2]; 993 uint64_t ib_result_mc_address[2]; 994 struct amdgpu_cs_request ibs_request[2] = {0}; 995 struct amdgpu_cs_ib_info ib_info[2] = {0}; 996 struct amdgpu_cs_fence fence_status = {0}; 997 uint32_t *ptr; 998 uint32_t expired; 999 uint32_t sdma_nop, gfx_nop; 1000 amdgpu_bo_list_handle bo_list[2]; 1001 amdgpu_va_handle va_handle[2]; 1002 int r, i; 1003 1004 if (family_id == AMDGPU_FAMILY_SI) { 1005 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 1006 gfx_nop = GFX_COMPUTE_NOP_SI; 1007 } else { 1008 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 1009 gfx_nop = GFX_COMPUTE_NOP; 1010 } 1011 1012 r = amdgpu_cs_create_semaphore(&sem); 1013 CU_ASSERT_EQUAL(r, 0); 1014 for (i = 0; i < 2; i++) { 1015 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 1016 CU_ASSERT_EQUAL(r, 0); 1017 1018 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1019 AMDGPU_GEM_DOMAIN_GTT, 0, 1020 &ib_result_handle[i], &ib_result_cpu[i], 1021 &ib_result_mc_address[i], &va_handle[i]); 1022 CU_ASSERT_EQUAL(r, 0); 1023 1024 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 1025 NULL, &bo_list[i]); 1026 CU_ASSERT_EQUAL(r, 0); 1027 } 1028 1029 /* 1. same context different engine */ 1030 ptr = ib_result_cpu[0]; 1031 ptr[0] = sdma_nop; 1032 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 1033 ib_info[0].size = 1; 1034 1035 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 1036 ibs_request[0].number_of_ibs = 1; 1037 ibs_request[0].ibs = &ib_info[0]; 1038 ibs_request[0].resources = bo_list[0]; 1039 ibs_request[0].fence_info.handle = NULL; 1040 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 1041 CU_ASSERT_EQUAL(r, 0); 1042 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 1043 CU_ASSERT_EQUAL(r, 0); 1044 1045 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 1046 CU_ASSERT_EQUAL(r, 0); 1047 ptr = ib_result_cpu[1]; 1048 ptr[0] = gfx_nop; 1049 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 1050 ib_info[1].size = 1; 1051 1052 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 1053 ibs_request[1].number_of_ibs = 1; 1054 ibs_request[1].ibs = &ib_info[1]; 1055 ibs_request[1].resources = bo_list[1]; 1056 ibs_request[1].fence_info.handle = NULL; 1057 1058 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 1059 CU_ASSERT_EQUAL(r, 0); 1060 1061 fence_status.context = context_handle[0]; 1062 fence_status.ip_type = AMDGPU_HW_IP_GFX; 1063 fence_status.ip_instance = 0; 1064 fence_status.fence = ibs_request[1].seq_no; 1065 r = amdgpu_cs_query_fence_status(&fence_status, 1066 500000000, 0, &expired); 1067 CU_ASSERT_EQUAL(r, 0); 1068 CU_ASSERT_EQUAL(expired, true); 1069 1070 /* 2. same engine different context */ 1071 ptr = ib_result_cpu[0]; 1072 ptr[0] = gfx_nop; 1073 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 1074 ib_info[0].size = 1; 1075 1076 ibs_request[0].ip_type = AMDGPU_HW_IP_GFX; 1077 ibs_request[0].number_of_ibs = 1; 1078 ibs_request[0].ibs = &ib_info[0]; 1079 ibs_request[0].resources = bo_list[0]; 1080 ibs_request[0].fence_info.handle = NULL; 1081 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 1082 CU_ASSERT_EQUAL(r, 0); 1083 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 1084 CU_ASSERT_EQUAL(r, 0); 1085 1086 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem); 1087 CU_ASSERT_EQUAL(r, 0); 1088 ptr = ib_result_cpu[1]; 1089 ptr[0] = gfx_nop; 1090 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 1091 ib_info[1].size = 1; 1092 1093 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 1094 ibs_request[1].number_of_ibs = 1; 1095 ibs_request[1].ibs = &ib_info[1]; 1096 ibs_request[1].resources = bo_list[1]; 1097 ibs_request[1].fence_info.handle = NULL; 1098 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 1099 1100 CU_ASSERT_EQUAL(r, 0); 1101 1102 fence_status.context = context_handle[1]; 1103 fence_status.ip_type = AMDGPU_HW_IP_GFX; 1104 fence_status.ip_instance = 0; 1105 fence_status.fence = ibs_request[1].seq_no; 1106 r = amdgpu_cs_query_fence_status(&fence_status, 1107 500000000, 0, &expired); 1108 CU_ASSERT_EQUAL(r, 0); 1109 CU_ASSERT_EQUAL(expired, true); 1110 1111 for (i = 0; i < 2; i++) { 1112 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 1113 ib_result_mc_address[i], 4096); 1114 CU_ASSERT_EQUAL(r, 0); 1115 1116 r = amdgpu_bo_list_destroy(bo_list[i]); 1117 CU_ASSERT_EQUAL(r, 0); 1118 1119 r = amdgpu_cs_ctx_free(context_handle[i]); 1120 CU_ASSERT_EQUAL(r, 0); 1121 } 1122 1123 r = amdgpu_cs_destroy_semaphore(sem); 1124 CU_ASSERT_EQUAL(r, 0); 1125} 1126 1127static void amdgpu_command_submission_compute_nop(void) 1128{ 1129 amdgpu_context_handle context_handle; 1130 amdgpu_bo_handle ib_result_handle; 1131 void *ib_result_cpu; 1132 uint64_t ib_result_mc_address; 1133 struct amdgpu_cs_request ibs_request; 1134 struct amdgpu_cs_ib_info ib_info; 1135 struct amdgpu_cs_fence fence_status; 1136 uint32_t *ptr; 1137 uint32_t expired; 1138 int r, instance; 1139 amdgpu_bo_list_handle bo_list; 1140 amdgpu_va_handle va_handle; 1141 struct drm_amdgpu_info_hw_ip info; 1142 1143 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 1144 CU_ASSERT_EQUAL(r, 0); 1145 1146 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1147 CU_ASSERT_EQUAL(r, 0); 1148 1149 for (instance = 0; (1 << instance) & info.available_rings; instance++) { 1150 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1151 AMDGPU_GEM_DOMAIN_GTT, 0, 1152 &ib_result_handle, &ib_result_cpu, 1153 &ib_result_mc_address, &va_handle); 1154 CU_ASSERT_EQUAL(r, 0); 1155 1156 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 1157 &bo_list); 1158 CU_ASSERT_EQUAL(r, 0); 1159 1160 ptr = ib_result_cpu; 1161 memset(ptr, 0, 16); 1162 ptr[0]=PACKET3(PACKET3_NOP, 14); 1163 1164 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 1165 ib_info.ib_mc_address = ib_result_mc_address; 1166 ib_info.size = 16; 1167 1168 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 1169 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 1170 ibs_request.ring = instance; 1171 ibs_request.number_of_ibs = 1; 1172 ibs_request.ibs = &ib_info; 1173 ibs_request.resources = bo_list; 1174 ibs_request.fence_info.handle = NULL; 1175 1176 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 1177 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 1178 CU_ASSERT_EQUAL(r, 0); 1179 1180 fence_status.context = context_handle; 1181 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 1182 fence_status.ip_instance = 0; 1183 fence_status.ring = instance; 1184 fence_status.fence = ibs_request.seq_no; 1185 1186 r = amdgpu_cs_query_fence_status(&fence_status, 1187 AMDGPU_TIMEOUT_INFINITE, 1188 0, &expired); 1189 CU_ASSERT_EQUAL(r, 0); 1190 1191 r = amdgpu_bo_list_destroy(bo_list); 1192 CU_ASSERT_EQUAL(r, 0); 1193 1194 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1195 ib_result_mc_address, 4096); 1196 CU_ASSERT_EQUAL(r, 0); 1197 } 1198 1199 r = amdgpu_cs_ctx_free(context_handle); 1200 CU_ASSERT_EQUAL(r, 0); 1201} 1202 1203static void amdgpu_command_submission_compute_cp_write_data(void) 1204{ 1205 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 1206} 1207 1208static void amdgpu_command_submission_compute_cp_const_fill(void) 1209{ 1210 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 1211} 1212 1213static void amdgpu_command_submission_compute_cp_copy_data(void) 1214{ 1215 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 1216} 1217 1218static void amdgpu_command_submission_compute(void) 1219{ 1220 /* write data using the CP */ 1221 amdgpu_command_submission_compute_cp_write_data(); 1222 /* const fill using the CP */ 1223 amdgpu_command_submission_compute_cp_const_fill(); 1224 /* copy data using the CP */ 1225 amdgpu_command_submission_compute_cp_copy_data(); 1226 /* nop test */ 1227 amdgpu_command_submission_compute_nop(); 1228} 1229 1230/* 1231 * caller need create/release: 1232 * pm4_src, resources, ib_info, and ibs_request 1233 * submit command stream described in ibs_request and wait for this IB accomplished 1234 */ 1235static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 1236 unsigned ip_type, 1237 int instance, int pm4_dw, uint32_t *pm4_src, 1238 int res_cnt, amdgpu_bo_handle *resources, 1239 struct amdgpu_cs_ib_info *ib_info, 1240 struct amdgpu_cs_request *ibs_request) 1241{ 1242 int r; 1243 uint32_t expired; 1244 uint32_t *ring_ptr; 1245 amdgpu_bo_handle ib_result_handle; 1246 void *ib_result_cpu; 1247 uint64_t ib_result_mc_address; 1248 struct amdgpu_cs_fence fence_status = {0}; 1249 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 1250 amdgpu_va_handle va_handle; 1251 1252 /* prepare CS */ 1253 CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 1254 CU_ASSERT_NOT_EQUAL(resources, NULL); 1255 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1256 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1257 CU_ASSERT_TRUE(pm4_dw <= 1024); 1258 1259 /* allocate IB */ 1260 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1261 AMDGPU_GEM_DOMAIN_GTT, 0, 1262 &ib_result_handle, &ib_result_cpu, 1263 &ib_result_mc_address, &va_handle); 1264 CU_ASSERT_EQUAL(r, 0); 1265 1266 /* copy PM4 packet to ring from caller */ 1267 ring_ptr = ib_result_cpu; 1268 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 1269 1270 ib_info->ib_mc_address = ib_result_mc_address; 1271 ib_info->size = pm4_dw; 1272 1273 ibs_request->ip_type = ip_type; 1274 ibs_request->ring = instance; 1275 ibs_request->number_of_ibs = 1; 1276 ibs_request->ibs = ib_info; 1277 ibs_request->fence_info.handle = NULL; 1278 1279 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 1280 all_res[res_cnt] = ib_result_handle; 1281 1282 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 1283 NULL, &ibs_request->resources); 1284 CU_ASSERT_EQUAL(r, 0); 1285 1286 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1287 1288 /* submit CS */ 1289 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 1290 CU_ASSERT_EQUAL(r, 0); 1291 1292 r = amdgpu_bo_list_destroy(ibs_request->resources); 1293 CU_ASSERT_EQUAL(r, 0); 1294 1295 fence_status.ip_type = ip_type; 1296 fence_status.ip_instance = 0; 1297 fence_status.ring = ibs_request->ring; 1298 fence_status.context = context_handle; 1299 fence_status.fence = ibs_request->seq_no; 1300 1301 /* wait for IB accomplished */ 1302 r = amdgpu_cs_query_fence_status(&fence_status, 1303 AMDGPU_TIMEOUT_INFINITE, 1304 0, &expired); 1305 CU_ASSERT_EQUAL(r, 0); 1306 CU_ASSERT_EQUAL(expired, true); 1307 1308 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1309 ib_result_mc_address, 4096); 1310 CU_ASSERT_EQUAL(r, 0); 1311} 1312 1313static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 1314{ 1315 const int sdma_write_length = 128; 1316 const int pm4_dw = 256; 1317 amdgpu_context_handle context_handle; 1318 amdgpu_bo_handle bo; 1319 amdgpu_bo_handle *resources; 1320 uint32_t *pm4; 1321 struct amdgpu_cs_ib_info *ib_info; 1322 struct amdgpu_cs_request *ibs_request; 1323 uint64_t bo_mc; 1324 volatile uint32_t *bo_cpu; 1325 int i, j, r, loop, ring_id; 1326 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1327 amdgpu_va_handle va_handle; 1328 struct drm_amdgpu_info_hw_ip hw_ip_info; 1329 1330 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1331 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1332 1333 ib_info = calloc(1, sizeof(*ib_info)); 1334 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1335 1336 ibs_request = calloc(1, sizeof(*ibs_request)); 1337 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1338 1339 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1340 CU_ASSERT_EQUAL(r, 0); 1341 1342 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1343 CU_ASSERT_EQUAL(r, 0); 1344 1345 /* prepare resource */ 1346 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1347 CU_ASSERT_NOT_EQUAL(resources, NULL); 1348 1349 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1350 loop = 0; 1351 while(loop < 2) { 1352 /* allocate UC bo for sDMA use */ 1353 r = amdgpu_bo_alloc_and_map(device_handle, 1354 sdma_write_length * sizeof(uint32_t), 1355 4096, AMDGPU_GEM_DOMAIN_GTT, 1356 gtt_flags[loop], &bo, (void**)&bo_cpu, 1357 &bo_mc, &va_handle); 1358 CU_ASSERT_EQUAL(r, 0); 1359 1360 /* clear bo */ 1361 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 1362 1363 resources[0] = bo; 1364 1365 /* fulfill PM4: test DMA write-linear */ 1366 i = j = 0; 1367 if (ip_type == AMDGPU_HW_IP_DMA) { 1368 if (family_id == AMDGPU_FAMILY_SI) 1369 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1370 sdma_write_length); 1371 else 1372 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1373 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 1374 pm4[i++] = 0xffffffff & bo_mc; 1375 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1376 if (family_id >= AMDGPU_FAMILY_AI) 1377 pm4[i++] = sdma_write_length - 1; 1378 else if (family_id != AMDGPU_FAMILY_SI) 1379 pm4[i++] = sdma_write_length; 1380 while(j++ < sdma_write_length) 1381 pm4[i++] = 0xdeadbeaf; 1382 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1383 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1384 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 1385 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1386 pm4[i++] = 0xfffffffc & bo_mc; 1387 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1388 while(j++ < sdma_write_length) 1389 pm4[i++] = 0xdeadbeaf; 1390 } 1391 1392 amdgpu_test_exec_cs_helper(context_handle, 1393 ip_type, ring_id, 1394 i, pm4, 1395 1, resources, 1396 ib_info, ibs_request); 1397 1398 /* verify if SDMA test result meets with expected */ 1399 i = 0; 1400 while(i < sdma_write_length) { 1401 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1402 } 1403 1404 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1405 sdma_write_length * sizeof(uint32_t)); 1406 CU_ASSERT_EQUAL(r, 0); 1407 loop++; 1408 } 1409 } 1410 /* clean resources */ 1411 free(resources); 1412 free(ibs_request); 1413 free(ib_info); 1414 free(pm4); 1415 1416 /* end of test */ 1417 r = amdgpu_cs_ctx_free(context_handle); 1418 CU_ASSERT_EQUAL(r, 0); 1419} 1420 1421static void amdgpu_command_submission_sdma_write_linear(void) 1422{ 1423 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 1424} 1425 1426static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 1427{ 1428 const int sdma_write_length = 1024 * 1024; 1429 const int pm4_dw = 256; 1430 amdgpu_context_handle context_handle; 1431 amdgpu_bo_handle bo; 1432 amdgpu_bo_handle *resources; 1433 uint32_t *pm4; 1434 struct amdgpu_cs_ib_info *ib_info; 1435 struct amdgpu_cs_request *ibs_request; 1436 uint64_t bo_mc; 1437 volatile uint32_t *bo_cpu; 1438 int i, j, r, loop, ring_id; 1439 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1440 amdgpu_va_handle va_handle; 1441 struct drm_amdgpu_info_hw_ip hw_ip_info; 1442 1443 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1444 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1445 1446 ib_info = calloc(1, sizeof(*ib_info)); 1447 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1448 1449 ibs_request = calloc(1, sizeof(*ibs_request)); 1450 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1451 1452 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1453 CU_ASSERT_EQUAL(r, 0); 1454 1455 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1456 CU_ASSERT_EQUAL(r, 0); 1457 1458 /* prepare resource */ 1459 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1460 CU_ASSERT_NOT_EQUAL(resources, NULL); 1461 1462 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1463 loop = 0; 1464 while(loop < 2) { 1465 /* allocate UC bo for sDMA use */ 1466 r = amdgpu_bo_alloc_and_map(device_handle, 1467 sdma_write_length, 4096, 1468 AMDGPU_GEM_DOMAIN_GTT, 1469 gtt_flags[loop], &bo, (void**)&bo_cpu, 1470 &bo_mc, &va_handle); 1471 CU_ASSERT_EQUAL(r, 0); 1472 1473 /* clear bo */ 1474 memset((void*)bo_cpu, 0, sdma_write_length); 1475 1476 resources[0] = bo; 1477 1478 /* fulfill PM4: test DMA const fill */ 1479 i = j = 0; 1480 if (ip_type == AMDGPU_HW_IP_DMA) { 1481 if (family_id == AMDGPU_FAMILY_SI) { 1482 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 1483 0, 0, 0, 1484 sdma_write_length / 4); 1485 pm4[i++] = 0xfffffffc & bo_mc; 1486 pm4[i++] = 0xdeadbeaf; 1487 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 1488 } else { 1489 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 1490 SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 1491 pm4[i++] = 0xffffffff & bo_mc; 1492 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1493 pm4[i++] = 0xdeadbeaf; 1494 if (family_id >= AMDGPU_FAMILY_AI) 1495 pm4[i++] = sdma_write_length - 1; 1496 else 1497 pm4[i++] = sdma_write_length; 1498 } 1499 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1500 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1501 if (family_id == AMDGPU_FAMILY_SI) { 1502 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1503 pm4[i++] = 0xdeadbeaf; 1504 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1505 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1506 PACKET3_DMA_DATA_SI_SRC_SEL(2) | 1507 PACKET3_DMA_DATA_SI_CP_SYNC; 1508 pm4[i++] = 0xffffffff & bo_mc; 1509 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1510 pm4[i++] = sdma_write_length; 1511 } else { 1512 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1513 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1514 PACKET3_DMA_DATA_DST_SEL(0) | 1515 PACKET3_DMA_DATA_SRC_SEL(2) | 1516 PACKET3_DMA_DATA_CP_SYNC; 1517 pm4[i++] = 0xdeadbeaf; 1518 pm4[i++] = 0; 1519 pm4[i++] = 0xfffffffc & bo_mc; 1520 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1521 pm4[i++] = sdma_write_length; 1522 } 1523 } 1524 1525 amdgpu_test_exec_cs_helper(context_handle, 1526 ip_type, ring_id, 1527 i, pm4, 1528 1, resources, 1529 ib_info, ibs_request); 1530 1531 /* verify if SDMA test result meets with expected */ 1532 i = 0; 1533 while(i < (sdma_write_length / 4)) { 1534 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1535 } 1536 1537 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1538 sdma_write_length); 1539 CU_ASSERT_EQUAL(r, 0); 1540 loop++; 1541 } 1542 } 1543 /* clean resources */ 1544 free(resources); 1545 free(ibs_request); 1546 free(ib_info); 1547 free(pm4); 1548 1549 /* end of test */ 1550 r = amdgpu_cs_ctx_free(context_handle); 1551 CU_ASSERT_EQUAL(r, 0); 1552} 1553 1554static void amdgpu_command_submission_sdma_const_fill(void) 1555{ 1556 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 1557} 1558 1559static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 1560{ 1561 const int sdma_write_length = 1024; 1562 const int pm4_dw = 256; 1563 amdgpu_context_handle context_handle; 1564 amdgpu_bo_handle bo1, bo2; 1565 amdgpu_bo_handle *resources; 1566 uint32_t *pm4; 1567 struct amdgpu_cs_ib_info *ib_info; 1568 struct amdgpu_cs_request *ibs_request; 1569 uint64_t bo1_mc, bo2_mc; 1570 volatile unsigned char *bo1_cpu, *bo2_cpu; 1571 int i, j, r, loop1, loop2, ring_id; 1572 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1573 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 1574 struct drm_amdgpu_info_hw_ip hw_ip_info; 1575 1576 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1577 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1578 1579 ib_info = calloc(1, sizeof(*ib_info)); 1580 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1581 1582 ibs_request = calloc(1, sizeof(*ibs_request)); 1583 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1584 1585 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1586 CU_ASSERT_EQUAL(r, 0); 1587 1588 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1589 CU_ASSERT_EQUAL(r, 0); 1590 1591 /* prepare resource */ 1592 resources = calloc(2, sizeof(amdgpu_bo_handle)); 1593 CU_ASSERT_NOT_EQUAL(resources, NULL); 1594 1595 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1596 loop1 = loop2 = 0; 1597 /* run 9 circle to test all mapping combination */ 1598 while(loop1 < 2) { 1599 while(loop2 < 2) { 1600 /* allocate UC bo1for sDMA use */ 1601 r = amdgpu_bo_alloc_and_map(device_handle, 1602 sdma_write_length, 4096, 1603 AMDGPU_GEM_DOMAIN_GTT, 1604 gtt_flags[loop1], &bo1, 1605 (void**)&bo1_cpu, &bo1_mc, 1606 &bo1_va_handle); 1607 CU_ASSERT_EQUAL(r, 0); 1608 1609 /* set bo1 */ 1610 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 1611 1612 /* allocate UC bo2 for sDMA use */ 1613 r = amdgpu_bo_alloc_and_map(device_handle, 1614 sdma_write_length, 4096, 1615 AMDGPU_GEM_DOMAIN_GTT, 1616 gtt_flags[loop2], &bo2, 1617 (void**)&bo2_cpu, &bo2_mc, 1618 &bo2_va_handle); 1619 CU_ASSERT_EQUAL(r, 0); 1620 1621 /* clear bo2 */ 1622 memset((void*)bo2_cpu, 0, sdma_write_length); 1623 1624 resources[0] = bo1; 1625 resources[1] = bo2; 1626 1627 /* fulfill PM4: test DMA copy linear */ 1628 i = j = 0; 1629 if (ip_type == AMDGPU_HW_IP_DMA) { 1630 if (family_id == AMDGPU_FAMILY_SI) { 1631 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 1632 0, 0, 0, 1633 sdma_write_length); 1634 pm4[i++] = 0xffffffff & bo2_mc; 1635 pm4[i++] = 0xffffffff & bo1_mc; 1636 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1637 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1638 } else { 1639 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 1640 SDMA_COPY_SUB_OPCODE_LINEAR, 1641 0); 1642 if (family_id >= AMDGPU_FAMILY_AI) 1643 pm4[i++] = sdma_write_length - 1; 1644 else 1645 pm4[i++] = sdma_write_length; 1646 pm4[i++] = 0; 1647 pm4[i++] = 0xffffffff & bo1_mc; 1648 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1649 pm4[i++] = 0xffffffff & bo2_mc; 1650 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1651 } 1652 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1653 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1654 if (family_id == AMDGPU_FAMILY_SI) { 1655 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1656 pm4[i++] = 0xfffffffc & bo1_mc; 1657 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1658 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1659 PACKET3_DMA_DATA_SI_SRC_SEL(0) | 1660 PACKET3_DMA_DATA_SI_CP_SYNC | 1661 (0xffff00000000 & bo1_mc) >> 32; 1662 pm4[i++] = 0xfffffffc & bo2_mc; 1663 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1664 pm4[i++] = sdma_write_length; 1665 } else { 1666 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1667 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1668 PACKET3_DMA_DATA_DST_SEL(0) | 1669 PACKET3_DMA_DATA_SRC_SEL(0) | 1670 PACKET3_DMA_DATA_CP_SYNC; 1671 pm4[i++] = 0xfffffffc & bo1_mc; 1672 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1673 pm4[i++] = 0xfffffffc & bo2_mc; 1674 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1675 pm4[i++] = sdma_write_length; 1676 } 1677 } 1678 1679 amdgpu_test_exec_cs_helper(context_handle, 1680 ip_type, ring_id, 1681 i, pm4, 1682 2, resources, 1683 ib_info, ibs_request); 1684 1685 /* verify if SDMA test result meets with expected */ 1686 i = 0; 1687 while(i < sdma_write_length) { 1688 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 1689 } 1690 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 1691 sdma_write_length); 1692 CU_ASSERT_EQUAL(r, 0); 1693 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 1694 sdma_write_length); 1695 CU_ASSERT_EQUAL(r, 0); 1696 loop2++; 1697 } 1698 loop1++; 1699 } 1700 } 1701 /* clean resources */ 1702 free(resources); 1703 free(ibs_request); 1704 free(ib_info); 1705 free(pm4); 1706 1707 /* end of test */ 1708 r = amdgpu_cs_ctx_free(context_handle); 1709 CU_ASSERT_EQUAL(r, 0); 1710} 1711 1712static void amdgpu_command_submission_sdma_copy_linear(void) 1713{ 1714 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 1715} 1716 1717static void amdgpu_command_submission_sdma(void) 1718{ 1719 amdgpu_command_submission_sdma_write_linear(); 1720 amdgpu_command_submission_sdma_const_fill(); 1721 amdgpu_command_submission_sdma_copy_linear(); 1722} 1723 1724static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1725{ 1726 amdgpu_context_handle context_handle; 1727 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1728 void *ib_result_cpu, *ib_result_ce_cpu; 1729 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1730 struct amdgpu_cs_request ibs_request[2] = {0}; 1731 struct amdgpu_cs_ib_info ib_info[2]; 1732 struct amdgpu_cs_fence fence_status[2] = {0}; 1733 uint32_t *ptr; 1734 uint32_t expired; 1735 amdgpu_bo_list_handle bo_list; 1736 amdgpu_va_handle va_handle, va_handle_ce; 1737 int r; 1738 int i = 0, ib_cs_num = 2; 1739 1740 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1741 CU_ASSERT_EQUAL(r, 0); 1742 1743 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1744 AMDGPU_GEM_DOMAIN_GTT, 0, 1745 &ib_result_handle, &ib_result_cpu, 1746 &ib_result_mc_address, &va_handle); 1747 CU_ASSERT_EQUAL(r, 0); 1748 1749 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1750 AMDGPU_GEM_DOMAIN_GTT, 0, 1751 &ib_result_ce_handle, &ib_result_ce_cpu, 1752 &ib_result_ce_mc_address, &va_handle_ce); 1753 CU_ASSERT_EQUAL(r, 0); 1754 1755 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1756 ib_result_ce_handle, &bo_list); 1757 CU_ASSERT_EQUAL(r, 0); 1758 1759 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1760 1761 /* IT_SET_CE_DE_COUNTERS */ 1762 ptr = ib_result_ce_cpu; 1763 if (family_id != AMDGPU_FAMILY_SI) { 1764 ptr[i++] = 0xc0008900; 1765 ptr[i++] = 0; 1766 } 1767 ptr[i++] = 0xc0008400; 1768 ptr[i++] = 1; 1769 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1770 ib_info[0].size = i; 1771 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1772 1773 /* IT_WAIT_ON_CE_COUNTER */ 1774 ptr = ib_result_cpu; 1775 ptr[0] = 0xc0008600; 1776 ptr[1] = 0x00000001; 1777 ib_info[1].ib_mc_address = ib_result_mc_address; 1778 ib_info[1].size = 2; 1779 1780 for (i = 0; i < ib_cs_num; i++) { 1781 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1782 ibs_request[i].number_of_ibs = 2; 1783 ibs_request[i].ibs = ib_info; 1784 ibs_request[i].resources = bo_list; 1785 ibs_request[i].fence_info.handle = NULL; 1786 } 1787 1788 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1789 1790 CU_ASSERT_EQUAL(r, 0); 1791 1792 for (i = 0; i < ib_cs_num; i++) { 1793 fence_status[i].context = context_handle; 1794 fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1795 fence_status[i].fence = ibs_request[i].seq_no; 1796 } 1797 1798 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1799 AMDGPU_TIMEOUT_INFINITE, 1800 &expired, NULL); 1801 CU_ASSERT_EQUAL(r, 0); 1802 1803 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1804 ib_result_mc_address, 4096); 1805 CU_ASSERT_EQUAL(r, 0); 1806 1807 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 1808 ib_result_ce_mc_address, 4096); 1809 CU_ASSERT_EQUAL(r, 0); 1810 1811 r = amdgpu_bo_list_destroy(bo_list); 1812 CU_ASSERT_EQUAL(r, 0); 1813 1814 r = amdgpu_cs_ctx_free(context_handle); 1815 CU_ASSERT_EQUAL(r, 0); 1816} 1817 1818static void amdgpu_command_submission_multi_fence(void) 1819{ 1820 amdgpu_command_submission_multi_fence_wait_all(true); 1821 amdgpu_command_submission_multi_fence_wait_all(false); 1822} 1823 1824static void amdgpu_userptr_test(void) 1825{ 1826 int i, r, j; 1827 uint32_t *pm4 = NULL; 1828 uint64_t bo_mc; 1829 void *ptr = NULL; 1830 int pm4_dw = 256; 1831 int sdma_write_length = 4; 1832 amdgpu_bo_handle handle; 1833 amdgpu_context_handle context_handle; 1834 struct amdgpu_cs_ib_info *ib_info; 1835 struct amdgpu_cs_request *ibs_request; 1836 amdgpu_bo_handle buf_handle; 1837 amdgpu_va_handle va_handle; 1838 1839 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1840 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1841 1842 ib_info = calloc(1, sizeof(*ib_info)); 1843 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1844 1845 ibs_request = calloc(1, sizeof(*ibs_request)); 1846 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1847 1848 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1849 CU_ASSERT_EQUAL(r, 0); 1850 1851 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 1852 CU_ASSERT_NOT_EQUAL(ptr, NULL); 1853 memset(ptr, 0, BUFFER_SIZE); 1854 1855 r = amdgpu_create_bo_from_user_mem(device_handle, 1856 ptr, BUFFER_SIZE, &buf_handle); 1857 CU_ASSERT_EQUAL(r, 0); 1858 1859 r = amdgpu_va_range_alloc(device_handle, 1860 amdgpu_gpu_va_range_general, 1861 BUFFER_SIZE, 1, 0, &bo_mc, 1862 &va_handle, 0); 1863 CU_ASSERT_EQUAL(r, 0); 1864 1865 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 1866 CU_ASSERT_EQUAL(r, 0); 1867 1868 handle = buf_handle; 1869 1870 j = i = 0; 1871 1872 if (family_id == AMDGPU_FAMILY_SI) 1873 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1874 sdma_write_length); 1875 else 1876 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1877 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 1878 pm4[i++] = 0xffffffff & bo_mc; 1879 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1880 if (family_id >= AMDGPU_FAMILY_AI) 1881 pm4[i++] = sdma_write_length - 1; 1882 else if (family_id != AMDGPU_FAMILY_SI) 1883 pm4[i++] = sdma_write_length; 1884 1885 while (j++ < sdma_write_length) 1886 pm4[i++] = 0xdeadbeaf; 1887 1888 if (!fork()) { 1889 pm4[0] = 0x0; 1890 exit(0); 1891 } 1892 1893 amdgpu_test_exec_cs_helper(context_handle, 1894 AMDGPU_HW_IP_DMA, 0, 1895 i, pm4, 1896 1, &handle, 1897 ib_info, ibs_request); 1898 i = 0; 1899 while (i < sdma_write_length) { 1900 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 1901 } 1902 free(ibs_request); 1903 free(ib_info); 1904 free(pm4); 1905 1906 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 1907 CU_ASSERT_EQUAL(r, 0); 1908 r = amdgpu_va_range_free(va_handle); 1909 CU_ASSERT_EQUAL(r, 0); 1910 r = amdgpu_bo_free(buf_handle); 1911 CU_ASSERT_EQUAL(r, 0); 1912 free(ptr); 1913 1914 r = amdgpu_cs_ctx_free(context_handle); 1915 CU_ASSERT_EQUAL(r, 0); 1916 1917 wait(NULL); 1918} 1919 1920static void amdgpu_sync_dependency_test(void) 1921{ 1922 amdgpu_context_handle context_handle[2]; 1923 amdgpu_bo_handle ib_result_handle; 1924 void *ib_result_cpu; 1925 uint64_t ib_result_mc_address; 1926 struct amdgpu_cs_request ibs_request; 1927 struct amdgpu_cs_ib_info ib_info; 1928 struct amdgpu_cs_fence fence_status; 1929 uint32_t expired; 1930 int i, j, r; 1931 amdgpu_bo_list_handle bo_list; 1932 amdgpu_va_handle va_handle; 1933 static uint32_t *ptr; 1934 uint64_t seq_no; 1935 1936 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 1937 CU_ASSERT_EQUAL(r, 0); 1938 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 1939 CU_ASSERT_EQUAL(r, 0); 1940 1941 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 1942 AMDGPU_GEM_DOMAIN_GTT, 0, 1943 &ib_result_handle, &ib_result_cpu, 1944 &ib_result_mc_address, &va_handle); 1945 CU_ASSERT_EQUAL(r, 0); 1946 1947 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 1948 &bo_list); 1949 CU_ASSERT_EQUAL(r, 0); 1950 1951 ptr = ib_result_cpu; 1952 i = 0; 1953 1954 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 1955 1956 /* Dispatch minimal init config and verify it's executed */ 1957 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 1958 ptr[i++] = 0x80000000; 1959 ptr[i++] = 0x80000000; 1960 1961 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 1962 ptr[i++] = 0x80000000; 1963 1964 1965 /* Program compute regs */ 1966 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 1967 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1968 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 1969 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 1970 1971 1972 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 1973 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 1974 /* 1975 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 1976 SGPRS = 1 1977 PRIORITY = 0 1978 FLOAT_MODE = 192 (0xc0) 1979 PRIV = 0 1980 DX10_CLAMP = 1 1981 DEBUG_MODE = 0 1982 IEEE_MODE = 0 1983 BULKY = 0 1984 CDBG_USER = 0 1985 * 1986 */ 1987 ptr[i++] = 0x002c0040; 1988 1989 1990 /* 1991 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 1992 USER_SGPR = 8 1993 TRAP_PRESENT = 0 1994 TGID_X_EN = 0 1995 TGID_Y_EN = 0 1996 TGID_Z_EN = 0 1997 TG_SIZE_EN = 0 1998 TIDIG_COMP_CNT = 0 1999 EXCP_EN_MSB = 0 2000 LDS_SIZE = 0 2001 EXCP_EN = 0 2002 * 2003 */ 2004 ptr[i++] = 0x00000010; 2005 2006 2007/* 2008 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 2009 WAVESIZE = 0 2010 * 2011 */ 2012 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2013 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 2014 ptr[i++] = 0x00000100; 2015 2016 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2017 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 2018 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 2019 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 2020 2021 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2022 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 2023 ptr[i++] = 0; 2024 2025 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 2026 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 2027 ptr[i++] = 1; 2028 ptr[i++] = 1; 2029 ptr[i++] = 1; 2030 2031 2032 /* Dispatch */ 2033 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 2034 ptr[i++] = 1; 2035 ptr[i++] = 1; 2036 ptr[i++] = 1; 2037 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 2038 2039 2040 while (i & 7) 2041 ptr[i++] = 0xffff1000; /* type3 nop packet */ 2042 2043 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 2044 ib_info.ib_mc_address = ib_result_mc_address; 2045 ib_info.size = i; 2046 2047 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 2048 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 2049 ibs_request.ring = 0; 2050 ibs_request.number_of_ibs = 1; 2051 ibs_request.ibs = &ib_info; 2052 ibs_request.resources = bo_list; 2053 ibs_request.fence_info.handle = NULL; 2054 2055 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 2056 CU_ASSERT_EQUAL(r, 0); 2057 seq_no = ibs_request.seq_no; 2058 2059 2060 2061 /* Prepare second command with dependency on the first */ 2062 j = i; 2063 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 2064 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 2065 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 2066 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 2067 ptr[i++] = 99; 2068 2069 while (i & 7) 2070 ptr[i++] = 0xffff1000; /* type3 nop packet */ 2071 2072 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 2073 ib_info.ib_mc_address = ib_result_mc_address + j * 4; 2074 ib_info.size = i - j; 2075 2076 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 2077 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 2078 ibs_request.ring = 0; 2079 ibs_request.number_of_ibs = 1; 2080 ibs_request.ibs = &ib_info; 2081 ibs_request.resources = bo_list; 2082 ibs_request.fence_info.handle = NULL; 2083 2084 ibs_request.number_of_dependencies = 1; 2085 2086 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 2087 ibs_request.dependencies[0].context = context_handle[1]; 2088 ibs_request.dependencies[0].ip_instance = 0; 2089 ibs_request.dependencies[0].ring = 0; 2090 ibs_request.dependencies[0].fence = seq_no; 2091 2092 2093 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 2094 CU_ASSERT_EQUAL(r, 0); 2095 2096 2097 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 2098 fence_status.context = context_handle[0]; 2099 fence_status.ip_type = AMDGPU_HW_IP_GFX; 2100 fence_status.ip_instance = 0; 2101 fence_status.ring = 0; 2102 fence_status.fence = ibs_request.seq_no; 2103 2104 r = amdgpu_cs_query_fence_status(&fence_status, 2105 AMDGPU_TIMEOUT_INFINITE,0, &expired); 2106 CU_ASSERT_EQUAL(r, 0); 2107 2108 /* Expect the second command to wait for shader to complete */ 2109 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 2110 2111 r = amdgpu_bo_list_destroy(bo_list); 2112 CU_ASSERT_EQUAL(r, 0); 2113 2114 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 2115 ib_result_mc_address, 4096); 2116 CU_ASSERT_EQUAL(r, 0); 2117 2118 r = amdgpu_cs_ctx_free(context_handle[0]); 2119 CU_ASSERT_EQUAL(r, 0); 2120 r = amdgpu_cs_ctx_free(context_handle[1]); 2121 CU_ASSERT_EQUAL(r, 0); 2122 2123 free(ibs_request.dependencies); 2124} 2125 2126static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family) 2127{ 2128 struct amdgpu_test_shader *shader; 2129 int i, loop = 0x10000; 2130 2131 switch (family) { 2132 case AMDGPU_FAMILY_AI: 2133 shader = &memcpy_cs_hang_slow_ai; 2134 break; 2135 case AMDGPU_FAMILY_RV: 2136 shader = &memcpy_cs_hang_slow_rv; 2137 break; 2138 default: 2139 return -1; 2140 break; 2141 } 2142 2143 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 2144 2145 for (i = 0; i < loop; i++) 2146 memcpy(ptr + shader->header_length + shader->body_length * i, 2147 shader->shader + shader->header_length, 2148 shader->body_length * sizeof(uint32_t)); 2149 2150 memcpy(ptr + shader->header_length + shader->body_length * loop, 2151 shader->shader + shader->header_length + shader->body_length, 2152 shader->foot_length * sizeof(uint32_t)); 2153 2154 return 0; 2155} 2156 2157static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, 2158 int cs_type) 2159{ 2160 uint32_t shader_size; 2161 const uint32_t *shader; 2162 2163 switch (cs_type) { 2164 case CS_BUFFERCLEAR: 2165 shader = bufferclear_cs_shader_gfx9; 2166 shader_size = sizeof(bufferclear_cs_shader_gfx9); 2167 break; 2168 case CS_BUFFERCOPY: 2169 shader = buffercopy_cs_shader_gfx9; 2170 shader_size = sizeof(buffercopy_cs_shader_gfx9); 2171 break; 2172 case CS_HANG: 2173 shader = memcpy_ps_hang; 2174 shader_size = sizeof(memcpy_ps_hang); 2175 break; 2176 default: 2177 return -1; 2178 break; 2179 } 2180 2181 memcpy(ptr, shader, shader_size); 2182 return 0; 2183} 2184 2185static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type) 2186{ 2187 int i = 0; 2188 2189 /* Write context control and load shadowing register if necessary */ 2190 if (ip_type == AMDGPU_HW_IP_GFX) { 2191 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 2192 ptr[i++] = 0x80000000; 2193 ptr[i++] = 0x80000000; 2194 } 2195 2196 /* Issue commands to set default compute state. */ 2197 /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ 2198 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3); 2199 ptr[i++] = 0x204; 2200 i += 3; 2201 2202 /* clear mmCOMPUTE_TMPRING_SIZE */ 2203 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2204 ptr[i++] = 0x218; 2205 ptr[i++] = 0; 2206 2207 return i; 2208} 2209 2210static int amdgpu_dispatch_write_cumask(uint32_t *ptr) 2211{ 2212 int i = 0; 2213 2214 /* Issue commands to set cu mask used in current dispatch */ 2215 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 2216 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2217 ptr[i++] = 0x216; 2218 ptr[i++] = 0xffffffff; 2219 ptr[i++] = 0xffffffff; 2220 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 2221 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2222 ptr[i++] = 0x219; 2223 ptr[i++] = 0xffffffff; 2224 ptr[i++] = 0xffffffff; 2225 2226 return i; 2227} 2228 2229static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr) 2230{ 2231 int i, j; 2232 2233 i = 0; 2234 2235 /* Writes shader state to HW */ 2236 /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ 2237 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2238 ptr[i++] = 0x20c; 2239 ptr[i++] = (shader_addr >> 8); 2240 ptr[i++] = (shader_addr >> 40); 2241 /* write sh regs*/ 2242 for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) { 2243 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2244 /* - Gfx9ShRegBase */ 2245 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00; 2246 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; 2247 } 2248 2249 return i; 2250} 2251 2252static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, 2253 uint32_t ip_type, 2254 uint32_t ring) 2255{ 2256 amdgpu_context_handle context_handle; 2257 amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; 2258 volatile unsigned char *ptr_dst; 2259 void *ptr_shader; 2260 uint32_t *ptr_cmd; 2261 uint64_t mc_address_dst, mc_address_shader, mc_address_cmd; 2262 amdgpu_va_handle va_dst, va_shader, va_cmd; 2263 int i, r; 2264 int bo_dst_size = 16384; 2265 int bo_shader_size = 4096; 2266 int bo_cmd_size = 4096; 2267 struct amdgpu_cs_request ibs_request = {0}; 2268 struct amdgpu_cs_ib_info ib_info= {0}; 2269 amdgpu_bo_list_handle bo_list; 2270 struct amdgpu_cs_fence fence_status = {0}; 2271 uint32_t expired; 2272 2273 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2274 CU_ASSERT_EQUAL(r, 0); 2275 2276 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2277 AMDGPU_GEM_DOMAIN_GTT, 0, 2278 &bo_cmd, (void **)&ptr_cmd, 2279 &mc_address_cmd, &va_cmd); 2280 CU_ASSERT_EQUAL(r, 0); 2281 memset(ptr_cmd, 0, bo_cmd_size); 2282 2283 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2284 AMDGPU_GEM_DOMAIN_VRAM, 0, 2285 &bo_shader, &ptr_shader, 2286 &mc_address_shader, &va_shader); 2287 CU_ASSERT_EQUAL(r, 0); 2288 memset(ptr_shader, 0, bo_shader_size); 2289 2290 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR); 2291 CU_ASSERT_EQUAL(r, 0); 2292 2293 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2294 AMDGPU_GEM_DOMAIN_VRAM, 0, 2295 &bo_dst, (void **)&ptr_dst, 2296 &mc_address_dst, &va_dst); 2297 CU_ASSERT_EQUAL(r, 0); 2298 2299 i = 0; 2300 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 2301 2302 /* Issue commands to set cu mask used in current dispatch */ 2303 i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 2304 2305 /* Writes shader state to HW */ 2306 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 2307 2308 /* Write constant data */ 2309 /* Writes the UAV constant data to the SGPRs. */ 2310 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2311 ptr_cmd[i++] = 0x240; 2312 ptr_cmd[i++] = mc_address_dst; 2313 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2314 ptr_cmd[i++] = 0x400; 2315 ptr_cmd[i++] = 0x74fac; 2316 2317 /* Sets a range of pixel shader constants */ 2318 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2319 ptr_cmd[i++] = 0x244; 2320 ptr_cmd[i++] = 0x22222222; 2321 ptr_cmd[i++] = 0x22222222; 2322 ptr_cmd[i++] = 0x22222222; 2323 ptr_cmd[i++] = 0x22222222; 2324 2325 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 2326 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2327 ptr_cmd[i++] = 0x215; 2328 ptr_cmd[i++] = 0; 2329 2330 /* dispatch direct command */ 2331 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2332 ptr_cmd[i++] = 0x10; 2333 ptr_cmd[i++] = 1; 2334 ptr_cmd[i++] = 1; 2335 ptr_cmd[i++] = 1; 2336 2337 while (i & 7) 2338 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2339 2340 resources[0] = bo_dst; 2341 resources[1] = bo_shader; 2342 resources[2] = bo_cmd; 2343 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 2344 CU_ASSERT_EQUAL(r, 0); 2345 2346 ib_info.ib_mc_address = mc_address_cmd; 2347 ib_info.size = i; 2348 ibs_request.ip_type = ip_type; 2349 ibs_request.ring = ring; 2350 ibs_request.resources = bo_list; 2351 ibs_request.number_of_ibs = 1; 2352 ibs_request.ibs = &ib_info; 2353 ibs_request.fence_info.handle = NULL; 2354 2355 /* submit CS */ 2356 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2357 CU_ASSERT_EQUAL(r, 0); 2358 2359 r = amdgpu_bo_list_destroy(bo_list); 2360 CU_ASSERT_EQUAL(r, 0); 2361 2362 fence_status.ip_type = ip_type; 2363 fence_status.ip_instance = 0; 2364 fence_status.ring = ring; 2365 fence_status.context = context_handle; 2366 fence_status.fence = ibs_request.seq_no; 2367 2368 /* wait for IB accomplished */ 2369 r = amdgpu_cs_query_fence_status(&fence_status, 2370 AMDGPU_TIMEOUT_INFINITE, 2371 0, &expired); 2372 CU_ASSERT_EQUAL(r, 0); 2373 CU_ASSERT_EQUAL(expired, true); 2374 2375 /* verify if memset test result meets with expected */ 2376 i = 0; 2377 while(i < bo_dst_size) { 2378 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22); 2379 } 2380 2381 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2382 CU_ASSERT_EQUAL(r, 0); 2383 2384 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2385 CU_ASSERT_EQUAL(r, 0); 2386 2387 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2388 CU_ASSERT_EQUAL(r, 0); 2389 2390 r = amdgpu_cs_ctx_free(context_handle); 2391 CU_ASSERT_EQUAL(r, 0); 2392} 2393 2394static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, 2395 uint32_t ip_type, 2396 uint32_t ring, 2397 int hang) 2398{ 2399 amdgpu_context_handle context_handle; 2400 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 2401 volatile unsigned char *ptr_dst; 2402 void *ptr_shader; 2403 unsigned char *ptr_src; 2404 uint32_t *ptr_cmd; 2405 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 2406 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 2407 int i, r; 2408 int bo_dst_size = 16384; 2409 int bo_shader_size = 4096; 2410 int bo_cmd_size = 4096; 2411 struct amdgpu_cs_request ibs_request = {0}; 2412 struct amdgpu_cs_ib_info ib_info= {0}; 2413 uint32_t expired, hang_state, hangs; 2414 enum cs_type cs_type; 2415 amdgpu_bo_list_handle bo_list; 2416 struct amdgpu_cs_fence fence_status = {0}; 2417 2418 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2419 CU_ASSERT_EQUAL(r, 0); 2420 2421 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2422 AMDGPU_GEM_DOMAIN_GTT, 0, 2423 &bo_cmd, (void **)&ptr_cmd, 2424 &mc_address_cmd, &va_cmd); 2425 CU_ASSERT_EQUAL(r, 0); 2426 memset(ptr_cmd, 0, bo_cmd_size); 2427 2428 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2429 AMDGPU_GEM_DOMAIN_VRAM, 0, 2430 &bo_shader, &ptr_shader, 2431 &mc_address_shader, &va_shader); 2432 CU_ASSERT_EQUAL(r, 0); 2433 memset(ptr_shader, 0, bo_shader_size); 2434 2435 cs_type = hang ? CS_HANG : CS_BUFFERCOPY; 2436 r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type); 2437 CU_ASSERT_EQUAL(r, 0); 2438 2439 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2440 AMDGPU_GEM_DOMAIN_VRAM, 0, 2441 &bo_src, (void **)&ptr_src, 2442 &mc_address_src, &va_src); 2443 CU_ASSERT_EQUAL(r, 0); 2444 2445 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2446 AMDGPU_GEM_DOMAIN_VRAM, 0, 2447 &bo_dst, (void **)&ptr_dst, 2448 &mc_address_dst, &va_dst); 2449 CU_ASSERT_EQUAL(r, 0); 2450 2451 memset(ptr_src, 0x55, bo_dst_size); 2452 2453 i = 0; 2454 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 2455 2456 /* Issue commands to set cu mask used in current dispatch */ 2457 i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 2458 2459 /* Writes shader state to HW */ 2460 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 2461 2462 /* Write constant data */ 2463 /* Writes the texture resource constants data to the SGPRs */ 2464 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2465 ptr_cmd[i++] = 0x240; 2466 ptr_cmd[i++] = mc_address_src; 2467 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 2468 ptr_cmd[i++] = 0x400; 2469 ptr_cmd[i++] = 0x74fac; 2470 2471 /* Writes the UAV constant data to the SGPRs. */ 2472 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2473 ptr_cmd[i++] = 0x244; 2474 ptr_cmd[i++] = mc_address_dst; 2475 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2476 ptr_cmd[i++] = 0x400; 2477 ptr_cmd[i++] = 0x74fac; 2478 2479 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 2480 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2481 ptr_cmd[i++] = 0x215; 2482 ptr_cmd[i++] = 0; 2483 2484 /* dispatch direct command */ 2485 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2486 ptr_cmd[i++] = 0x10; 2487 ptr_cmd[i++] = 1; 2488 ptr_cmd[i++] = 1; 2489 ptr_cmd[i++] = 1; 2490 2491 while (i & 7) 2492 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2493 2494 resources[0] = bo_shader; 2495 resources[1] = bo_src; 2496 resources[2] = bo_dst; 2497 resources[3] = bo_cmd; 2498 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 2499 CU_ASSERT_EQUAL(r, 0); 2500 2501 ib_info.ib_mc_address = mc_address_cmd; 2502 ib_info.size = i; 2503 ibs_request.ip_type = ip_type; 2504 ibs_request.ring = ring; 2505 ibs_request.resources = bo_list; 2506 ibs_request.number_of_ibs = 1; 2507 ibs_request.ibs = &ib_info; 2508 ibs_request.fence_info.handle = NULL; 2509 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2510 CU_ASSERT_EQUAL(r, 0); 2511 2512 fence_status.ip_type = ip_type; 2513 fence_status.ip_instance = 0; 2514 fence_status.ring = ring; 2515 fence_status.context = context_handle; 2516 fence_status.fence = ibs_request.seq_no; 2517 2518 /* wait for IB accomplished */ 2519 r = amdgpu_cs_query_fence_status(&fence_status, 2520 AMDGPU_TIMEOUT_INFINITE, 2521 0, &expired); 2522 2523 if (!hang) { 2524 CU_ASSERT_EQUAL(r, 0); 2525 CU_ASSERT_EQUAL(expired, true); 2526 2527 /* verify if memcpy test result meets with expected */ 2528 i = 0; 2529 while(i < bo_dst_size) { 2530 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 2531 i++; 2532 } 2533 } else { 2534 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 2535 CU_ASSERT_EQUAL(r, 0); 2536 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 2537 } 2538 2539 r = amdgpu_bo_list_destroy(bo_list); 2540 CU_ASSERT_EQUAL(r, 0); 2541 2542 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 2543 CU_ASSERT_EQUAL(r, 0); 2544 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2545 CU_ASSERT_EQUAL(r, 0); 2546 2547 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2548 CU_ASSERT_EQUAL(r, 0); 2549 2550 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2551 CU_ASSERT_EQUAL(r, 0); 2552 2553 r = amdgpu_cs_ctx_free(context_handle); 2554 CU_ASSERT_EQUAL(r, 0); 2555} 2556 2557static void amdgpu_compute_dispatch_test(void) 2558{ 2559 int r; 2560 struct drm_amdgpu_info_hw_ip info; 2561 uint32_t ring_id; 2562 2563 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 2564 CU_ASSERT_EQUAL(r, 0); 2565 if (!info.available_rings) 2566 printf("SKIP ... as there's no compute ring\n"); 2567 2568 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2569 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); 2570 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0); 2571 } 2572} 2573 2574static void amdgpu_gfx_dispatch_test(void) 2575{ 2576 int r; 2577 struct drm_amdgpu_info_hw_ip info; 2578 uint32_t ring_id; 2579 2580 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 2581 CU_ASSERT_EQUAL(r, 0); 2582 if (!info.available_rings) 2583 printf("SKIP ... as there's no graphics ring\n"); 2584 2585 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2586 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); 2587 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0); 2588 } 2589} 2590 2591void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 2592{ 2593 int r; 2594 struct drm_amdgpu_info_hw_ip info; 2595 uint32_t ring_id; 2596 2597 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 2598 CU_ASSERT_EQUAL(r, 0); 2599 if (!info.available_rings) 2600 printf("SKIP ... as there's no ring for ip %d\n", ip_type); 2601 2602 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2603 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 2604 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1); 2605 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 2606 } 2607} 2608 2609static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle, 2610 uint32_t ip_type, uint32_t ring) 2611{ 2612 amdgpu_context_handle context_handle; 2613 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 2614 volatile unsigned char *ptr_dst; 2615 void *ptr_shader; 2616 unsigned char *ptr_src; 2617 uint32_t *ptr_cmd; 2618 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 2619 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 2620 int i, r; 2621 int bo_dst_size = 0x4000000; 2622 int bo_shader_size = 0x400000; 2623 int bo_cmd_size = 4096; 2624 struct amdgpu_cs_request ibs_request = {0}; 2625 struct amdgpu_cs_ib_info ib_info= {0}; 2626 uint32_t hang_state, hangs, expired; 2627 struct amdgpu_gpu_info gpu_info = {0}; 2628 amdgpu_bo_list_handle bo_list; 2629 struct amdgpu_cs_fence fence_status = {0}; 2630 2631 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 2632 CU_ASSERT_EQUAL(r, 0); 2633 2634 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2635 CU_ASSERT_EQUAL(r, 0); 2636 2637 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2638 AMDGPU_GEM_DOMAIN_GTT, 0, 2639 &bo_cmd, (void **)&ptr_cmd, 2640 &mc_address_cmd, &va_cmd); 2641 CU_ASSERT_EQUAL(r, 0); 2642 memset(ptr_cmd, 0, bo_cmd_size); 2643 2644 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2645 AMDGPU_GEM_DOMAIN_VRAM, 0, 2646 &bo_shader, &ptr_shader, 2647 &mc_address_shader, &va_shader); 2648 CU_ASSERT_EQUAL(r, 0); 2649 memset(ptr_shader, 0, bo_shader_size); 2650 2651 r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id); 2652 CU_ASSERT_EQUAL(r, 0); 2653 2654 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2655 AMDGPU_GEM_DOMAIN_VRAM, 0, 2656 &bo_src, (void **)&ptr_src, 2657 &mc_address_src, &va_src); 2658 CU_ASSERT_EQUAL(r, 0); 2659 2660 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2661 AMDGPU_GEM_DOMAIN_VRAM, 0, 2662 &bo_dst, (void **)&ptr_dst, 2663 &mc_address_dst, &va_dst); 2664 CU_ASSERT_EQUAL(r, 0); 2665 2666 memset(ptr_src, 0x55, bo_dst_size); 2667 2668 i = 0; 2669 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 2670 2671 /* Issue commands to set cu mask used in current dispatch */ 2672 i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 2673 2674 /* Writes shader state to HW */ 2675 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 2676 2677 /* Write constant data */ 2678 /* Writes the texture resource constants data to the SGPRs */ 2679 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2680 ptr_cmd[i++] = 0x240; 2681 ptr_cmd[i++] = mc_address_src; 2682 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 2683 ptr_cmd[i++] = 0x400000; 2684 ptr_cmd[i++] = 0x74fac; 2685 2686 /* Writes the UAV constant data to the SGPRs. */ 2687 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2688 ptr_cmd[i++] = 0x244; 2689 ptr_cmd[i++] = mc_address_dst; 2690 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2691 ptr_cmd[i++] = 0x400000; 2692 ptr_cmd[i++] = 0x74fac; 2693 2694 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 2695 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2696 ptr_cmd[i++] = 0x215; 2697 ptr_cmd[i++] = 0; 2698 2699 /* dispatch direct command */ 2700 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2701 ptr_cmd[i++] = 0x10000; 2702 ptr_cmd[i++] = 1; 2703 ptr_cmd[i++] = 1; 2704 ptr_cmd[i++] = 1; 2705 2706 while (i & 7) 2707 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2708 2709 resources[0] = bo_shader; 2710 resources[1] = bo_src; 2711 resources[2] = bo_dst; 2712 resources[3] = bo_cmd; 2713 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 2714 CU_ASSERT_EQUAL(r, 0); 2715 2716 ib_info.ib_mc_address = mc_address_cmd; 2717 ib_info.size = i; 2718 ibs_request.ip_type = ip_type; 2719 ibs_request.ring = ring; 2720 ibs_request.resources = bo_list; 2721 ibs_request.number_of_ibs = 1; 2722 ibs_request.ibs = &ib_info; 2723 ibs_request.fence_info.handle = NULL; 2724 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2725 CU_ASSERT_EQUAL(r, 0); 2726 2727 fence_status.ip_type = ip_type; 2728 fence_status.ip_instance = 0; 2729 fence_status.ring = ring; 2730 fence_status.context = context_handle; 2731 fence_status.fence = ibs_request.seq_no; 2732 2733 /* wait for IB accomplished */ 2734 r = amdgpu_cs_query_fence_status(&fence_status, 2735 AMDGPU_TIMEOUT_INFINITE, 2736 0, &expired); 2737 2738 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 2739 CU_ASSERT_EQUAL(r, 0); 2740 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 2741 2742 r = amdgpu_bo_list_destroy(bo_list); 2743 CU_ASSERT_EQUAL(r, 0); 2744 2745 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 2746 CU_ASSERT_EQUAL(r, 0); 2747 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2748 CU_ASSERT_EQUAL(r, 0); 2749 2750 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2751 CU_ASSERT_EQUAL(r, 0); 2752 2753 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2754 CU_ASSERT_EQUAL(r, 0); 2755 2756 r = amdgpu_cs_ctx_free(context_handle); 2757 CU_ASSERT_EQUAL(r, 0); 2758} 2759 2760void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 2761{ 2762 int r; 2763 struct drm_amdgpu_info_hw_ip info; 2764 uint32_t ring_id; 2765 2766 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 2767 CU_ASSERT_EQUAL(r, 0); 2768 if (!info.available_rings) 2769 printf("SKIP ... as there's no ring for ip %d\n", ip_type); 2770 2771 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2772 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 2773 amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id); 2774 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); 2775 } 2776} 2777 2778static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family) 2779{ 2780 struct amdgpu_test_shader *shader; 2781 int i, loop = 0x40000; 2782 2783 switch (family) { 2784 case AMDGPU_FAMILY_AI: 2785 case AMDGPU_FAMILY_RV: 2786 shader = &memcpy_ps_hang_slow_ai; 2787 break; 2788 default: 2789 return -1; 2790 break; 2791 } 2792 2793 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 2794 2795 for (i = 0; i < loop; i++) 2796 memcpy(ptr + shader->header_length + shader->body_length * i, 2797 shader->shader + shader->header_length, 2798 shader->body_length * sizeof(uint32_t)); 2799 2800 memcpy(ptr + shader->header_length + shader->body_length * loop, 2801 shader->shader + shader->header_length + shader->body_length, 2802 shader->foot_length * sizeof(uint32_t)); 2803 2804 return 0; 2805} 2806 2807static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type) 2808{ 2809 int i; 2810 uint32_t shader_offset= 256; 2811 uint32_t mem_offset, patch_code_offset; 2812 uint32_t shader_size, patchinfo_code_size; 2813 const uint32_t *shader; 2814 const uint32_t *patchinfo_code; 2815 const uint32_t *patchcode_offset; 2816 2817 switch (ps_type) { 2818 case PS_CONST: 2819 shader = ps_const_shader_gfx9; 2820 shader_size = sizeof(ps_const_shader_gfx9); 2821 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; 2822 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; 2823 patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; 2824 break; 2825 case PS_TEX: 2826 shader = ps_tex_shader_gfx9; 2827 shader_size = sizeof(ps_tex_shader_gfx9); 2828 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; 2829 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; 2830 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; 2831 break; 2832 case PS_HANG: 2833 shader = memcpy_ps_hang; 2834 shader_size = sizeof(memcpy_ps_hang); 2835 2836 memcpy(ptr, shader, shader_size); 2837 return 0; 2838 default: 2839 return -1; 2840 break; 2841 } 2842 2843 /* write main shader program */ 2844 for (i = 0 ; i < 10; i++) { 2845 mem_offset = i * shader_offset; 2846 memcpy(ptr + mem_offset, shader, shader_size); 2847 } 2848 2849 /* overwrite patch codes */ 2850 for (i = 0 ; i < 10; i++) { 2851 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t); 2852 patch_code_offset = i * patchinfo_code_size; 2853 memcpy(ptr + mem_offset, 2854 patchinfo_code + patch_code_offset, 2855 patchinfo_code_size * sizeof(uint32_t)); 2856 } 2857 2858 return 0; 2859} 2860 2861/* load RectPosTexFast_VS */ 2862static int amdgpu_draw_load_vs_shader(uint8_t *ptr) 2863{ 2864 const uint32_t *shader; 2865 uint32_t shader_size; 2866 2867 shader = vs_RectPosTexFast_shader_gfx9; 2868 shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); 2869 2870 memcpy(ptr, shader, shader_size); 2871 2872 return 0; 2873} 2874 2875static int amdgpu_draw_init(uint32_t *ptr) 2876{ 2877 int i = 0; 2878 const uint32_t *preamblecache_ptr; 2879 uint32_t preamblecache_size; 2880 2881 /* Write context control and load shadowing register if necessary */ 2882 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 2883 ptr[i++] = 0x80000000; 2884 ptr[i++] = 0x80000000; 2885 2886 preamblecache_ptr = preamblecache_gfx9; 2887 preamblecache_size = sizeof(preamblecache_gfx9); 2888 2889 memcpy(ptr + i, preamblecache_ptr, preamblecache_size); 2890 return i + preamblecache_size/sizeof(uint32_t); 2891} 2892 2893static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, 2894 uint64_t dst_addr, 2895 int hang_slow) 2896{ 2897 int i = 0; 2898 2899 /* setup color buffer */ 2900 /* offset reg 2901 0xA318 CB_COLOR0_BASE 2902 0xA319 CB_COLOR0_BASE_EXT 2903 0xA31A CB_COLOR0_ATTRIB2 2904 0xA31B CB_COLOR0_VIEW 2905 0xA31C CB_COLOR0_INFO 2906 0xA31D CB_COLOR0_ATTRIB 2907 0xA31E CB_COLOR0_DCC_CONTROL 2908 0xA31F CB_COLOR0_CMASK 2909 0xA320 CB_COLOR0_CMASK_BASE_EXT 2910 0xA321 CB_COLOR0_FMASK 2911 0xA322 CB_COLOR0_FMASK_BASE_EXT 2912 0xA323 CB_COLOR0_CLEAR_WORD0 2913 0xA324 CB_COLOR0_CLEAR_WORD1 2914 0xA325 CB_COLOR0_DCC_BASE 2915 0xA326 CB_COLOR0_DCC_BASE_EXT */ 2916 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); 2917 ptr[i++] = 0x318; 2918 ptr[i++] = dst_addr >> 8; 2919 ptr[i++] = dst_addr >> 40; 2920 ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f; 2921 ptr[i++] = 0; 2922 ptr[i++] = 0x50438; 2923 ptr[i++] = 0x10140000; 2924 i += 9; 2925 2926 /* mmCB_MRT0_EPITCH */ 2927 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2928 ptr[i++] = 0x1e8; 2929 ptr[i++] = hang_slow ? 0x7ff : 0x1f; 2930 2931 /* 0xA32B CB_COLOR1_BASE */ 2932 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2933 ptr[i++] = 0x32b; 2934 ptr[i++] = 0; 2935 2936 /* 0xA33A CB_COLOR1_BASE */ 2937 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2938 ptr[i++] = 0x33a; 2939 ptr[i++] = 0; 2940 2941 /* SPI_SHADER_COL_FORMAT */ 2942 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2943 ptr[i++] = 0x1c5; 2944 ptr[i++] = 9; 2945 2946 /* Setup depth buffer */ 2947 /* mmDB_Z_INFO */ 2948 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 2949 ptr[i++] = 0xe; 2950 i += 2; 2951 2952 return i; 2953} 2954 2955static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow) 2956{ 2957 int i = 0; 2958 const uint32_t *cached_cmd_ptr; 2959 uint32_t cached_cmd_size; 2960 2961 /* mmPA_SC_TILE_STEERING_OVERRIDE */ 2962 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2963 ptr[i++] = 0xd7; 2964 ptr[i++] = 0; 2965 2966 ptr[i++] = 0xffff1000; 2967 ptr[i++] = 0xc0021000; 2968 2969 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2970 ptr[i++] = 0xd7; 2971 ptr[i++] = 1; 2972 2973 /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 2974 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); 2975 ptr[i++] = 0x2fe; 2976 i += 16; 2977 2978 /* mmPA_SC_CENTROID_PRIORITY_0 */ 2979 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 2980 ptr[i++] = 0x2f5; 2981 i += 2; 2982 2983 cached_cmd_ptr = cached_cmd_gfx9; 2984 cached_cmd_size = sizeof(cached_cmd_gfx9); 2985 2986 memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); 2987 if (hang_slow) 2988 *(ptr + i + 12) = 0x8000800; 2989 i += cached_cmd_size/sizeof(uint32_t); 2990 2991 return i; 2992} 2993 2994static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, 2995 int ps_type, 2996 uint64_t shader_addr, 2997 int hang_slow) 2998{ 2999 int i = 0; 3000 3001 /* mmPA_CL_VS_OUT_CNTL */ 3002 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3003 ptr[i++] = 0x207; 3004 ptr[i++] = 0; 3005 3006 /* mmSPI_SHADER_PGM_RSRC3_VS */ 3007 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3008 ptr[i++] = 0x46; 3009 ptr[i++] = 0xffff; 3010 3011 /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ 3012 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 3013 ptr[i++] = 0x48; 3014 ptr[i++] = shader_addr >> 8; 3015 ptr[i++] = shader_addr >> 40; 3016 3017 /* mmSPI_SHADER_PGM_RSRC1_VS */ 3018 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3019 ptr[i++] = 0x4a; 3020 ptr[i++] = 0xc0081; 3021 /* mmSPI_SHADER_PGM_RSRC2_VS */ 3022 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3023 ptr[i++] = 0x4b; 3024 ptr[i++] = 0x18; 3025 3026 /* mmSPI_VS_OUT_CONFIG */ 3027 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3028 ptr[i++] = 0x1b1; 3029 ptr[i++] = 2; 3030 3031 /* mmSPI_SHADER_POS_FORMAT */ 3032 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3033 ptr[i++] = 0x1c3; 3034 ptr[i++] = 4; 3035 3036 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3037 ptr[i++] = 0x4c; 3038 i += 2; 3039 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 3040 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 3041 3042 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3043 ptr[i++] = 0x50; 3044 i += 2; 3045 if (ps_type == PS_CONST) { 3046 i += 2; 3047 } else if (ps_type == PS_TEX) { 3048 ptr[i++] = 0x3f800000; 3049 ptr[i++] = 0x3f800000; 3050 } 3051 3052 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3053 ptr[i++] = 0x54; 3054 i += 4; 3055 3056 return i; 3057} 3058 3059static int amdgpu_draw_ps_write2hw(uint32_t *ptr, 3060 int ps_type, 3061 uint64_t shader_addr) 3062{ 3063 int i, j; 3064 const uint32_t *sh_registers; 3065 const uint32_t *context_registers; 3066 uint32_t num_sh_reg, num_context_reg; 3067 3068 if (ps_type == PS_CONST) { 3069 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; 3070 context_registers = (const uint32_t *)ps_const_context_reg_gfx9; 3071 num_sh_reg = ps_num_sh_registers_gfx9; 3072 num_context_reg = ps_num_context_registers_gfx9; 3073 } else if (ps_type == PS_TEX) { 3074 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; 3075 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9; 3076 num_sh_reg = ps_num_sh_registers_gfx9; 3077 num_context_reg = ps_num_context_registers_gfx9; 3078 } 3079 3080 i = 0; 3081 3082 /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS 3083 0x2c08 SPI_SHADER_PGM_LO_PS 3084 0x2c09 SPI_SHADER_PGM_HI_PS */ 3085 shader_addr += 256 * 9; 3086 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 3087 ptr[i++] = 0x7; 3088 ptr[i++] = 0xffff; 3089 ptr[i++] = shader_addr >> 8; 3090 ptr[i++] = shader_addr >> 40; 3091 3092 for (j = 0; j < num_sh_reg; j++) { 3093 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3094 ptr[i++] = sh_registers[j * 2] - 0x2c00; 3095 ptr[i++] = sh_registers[j * 2 + 1]; 3096 } 3097 3098 for (j = 0; j < num_context_reg; j++) { 3099 if (context_registers[j * 2] != 0xA1C5) { 3100 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3101 ptr[i++] = context_registers[j * 2] - 0xa000; 3102 ptr[i++] = context_registers[j * 2 + 1]; 3103 } 3104 3105 if (context_registers[j * 2] == 0xA1B4) { 3106 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3107 ptr[i++] = 0x1b3; 3108 ptr[i++] = 2; 3109 } 3110 } 3111 3112 return i; 3113} 3114 3115static int amdgpu_draw_draw(uint32_t *ptr) 3116{ 3117 int i = 0; 3118 3119 /* mmIA_MULTI_VGT_PARAM */ 3120 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3121 ptr[i++] = 0x40000258; 3122 ptr[i++] = 0xd00ff; 3123 3124 /* mmVGT_PRIMITIVE_TYPE */ 3125 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3126 ptr[i++] = 0x10000242; 3127 ptr[i++] = 0x11; 3128 3129 ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); 3130 ptr[i++] = 3; 3131 ptr[i++] = 2; 3132 3133 return i; 3134} 3135 3136void amdgpu_memset_draw(amdgpu_device_handle device_handle, 3137 amdgpu_bo_handle bo_shader_ps, 3138 amdgpu_bo_handle bo_shader_vs, 3139 uint64_t mc_address_shader_ps, 3140 uint64_t mc_address_shader_vs, 3141 uint32_t ring_id) 3142{ 3143 amdgpu_context_handle context_handle; 3144 amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; 3145 volatile unsigned char *ptr_dst; 3146 uint32_t *ptr_cmd; 3147 uint64_t mc_address_dst, mc_address_cmd; 3148 amdgpu_va_handle va_dst, va_cmd; 3149 int i, r; 3150 int bo_dst_size = 16384; 3151 int bo_cmd_size = 4096; 3152 struct amdgpu_cs_request ibs_request = {0}; 3153 struct amdgpu_cs_ib_info ib_info = {0}; 3154 struct amdgpu_cs_fence fence_status = {0}; 3155 uint32_t expired; 3156 amdgpu_bo_list_handle bo_list; 3157 3158 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3159 CU_ASSERT_EQUAL(r, 0); 3160 3161 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 3162 AMDGPU_GEM_DOMAIN_GTT, 0, 3163 &bo_cmd, (void **)&ptr_cmd, 3164 &mc_address_cmd, &va_cmd); 3165 CU_ASSERT_EQUAL(r, 0); 3166 memset(ptr_cmd, 0, bo_cmd_size); 3167 3168 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 3169 AMDGPU_GEM_DOMAIN_VRAM, 0, 3170 &bo_dst, (void **)&ptr_dst, 3171 &mc_address_dst, &va_dst); 3172 CU_ASSERT_EQUAL(r, 0); 3173 3174 i = 0; 3175 i += amdgpu_draw_init(ptr_cmd + i); 3176 3177 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); 3178 3179 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); 3180 3181 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0); 3182 3183 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps); 3184 3185 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3186 ptr_cmd[i++] = 0xc; 3187 ptr_cmd[i++] = 0x33333333; 3188 ptr_cmd[i++] = 0x33333333; 3189 ptr_cmd[i++] = 0x33333333; 3190 ptr_cmd[i++] = 0x33333333; 3191 3192 i += amdgpu_draw_draw(ptr_cmd + i); 3193 3194 while (i & 7) 3195 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3196 3197 resources[0] = bo_dst; 3198 resources[1] = bo_shader_ps; 3199 resources[2] = bo_shader_vs; 3200 resources[3] = bo_cmd; 3201 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 3202 CU_ASSERT_EQUAL(r, 0); 3203 3204 ib_info.ib_mc_address = mc_address_cmd; 3205 ib_info.size = i; 3206 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3207 ibs_request.ring = ring_id; 3208 ibs_request.resources = bo_list; 3209 ibs_request.number_of_ibs = 1; 3210 ibs_request.ibs = &ib_info; 3211 ibs_request.fence_info.handle = NULL; 3212 3213 /* submit CS */ 3214 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3215 CU_ASSERT_EQUAL(r, 0); 3216 3217 r = amdgpu_bo_list_destroy(bo_list); 3218 CU_ASSERT_EQUAL(r, 0); 3219 3220 fence_status.ip_type = AMDGPU_HW_IP_GFX; 3221 fence_status.ip_instance = 0; 3222 fence_status.ring = ring_id; 3223 fence_status.context = context_handle; 3224 fence_status.fence = ibs_request.seq_no; 3225 3226 /* wait for IB accomplished */ 3227 r = amdgpu_cs_query_fence_status(&fence_status, 3228 AMDGPU_TIMEOUT_INFINITE, 3229 0, &expired); 3230 CU_ASSERT_EQUAL(r, 0); 3231 CU_ASSERT_EQUAL(expired, true); 3232 3233 /* verify if memset test result meets with expected */ 3234 i = 0; 3235 while(i < bo_dst_size) { 3236 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33); 3237 } 3238 3239 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 3240 CU_ASSERT_EQUAL(r, 0); 3241 3242 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 3243 CU_ASSERT_EQUAL(r, 0); 3244 3245 r = amdgpu_cs_ctx_free(context_handle); 3246 CU_ASSERT_EQUAL(r, 0); 3247} 3248 3249static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, 3250 uint32_t ring) 3251{ 3252 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 3253 void *ptr_shader_ps; 3254 void *ptr_shader_vs; 3255 uint64_t mc_address_shader_ps, mc_address_shader_vs; 3256 amdgpu_va_handle va_shader_ps, va_shader_vs; 3257 int r; 3258 int bo_shader_size = 4096; 3259 3260 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3261 AMDGPU_GEM_DOMAIN_VRAM, 0, 3262 &bo_shader_ps, &ptr_shader_ps, 3263 &mc_address_shader_ps, &va_shader_ps); 3264 CU_ASSERT_EQUAL(r, 0); 3265 memset(ptr_shader_ps, 0, bo_shader_size); 3266 3267 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3268 AMDGPU_GEM_DOMAIN_VRAM, 0, 3269 &bo_shader_vs, &ptr_shader_vs, 3270 &mc_address_shader_vs, &va_shader_vs); 3271 CU_ASSERT_EQUAL(r, 0); 3272 memset(ptr_shader_vs, 0, bo_shader_size); 3273 3274 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST); 3275 CU_ASSERT_EQUAL(r, 0); 3276 3277 r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 3278 CU_ASSERT_EQUAL(r, 0); 3279 3280 amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, 3281 mc_address_shader_ps, mc_address_shader_vs, ring); 3282 3283 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 3284 CU_ASSERT_EQUAL(r, 0); 3285 3286 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 3287 CU_ASSERT_EQUAL(r, 0); 3288} 3289 3290static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, 3291 amdgpu_bo_handle bo_shader_ps, 3292 amdgpu_bo_handle bo_shader_vs, 3293 uint64_t mc_address_shader_ps, 3294 uint64_t mc_address_shader_vs, 3295 uint32_t ring, int hang) 3296{ 3297 amdgpu_context_handle context_handle; 3298 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 3299 volatile unsigned char *ptr_dst; 3300 unsigned char *ptr_src; 3301 uint32_t *ptr_cmd; 3302 uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 3303 amdgpu_va_handle va_dst, va_src, va_cmd; 3304 int i, r; 3305 int bo_size = 16384; 3306 int bo_cmd_size = 4096; 3307 struct amdgpu_cs_request ibs_request = {0}; 3308 struct amdgpu_cs_ib_info ib_info= {0}; 3309 uint32_t hang_state, hangs; 3310 uint32_t expired; 3311 amdgpu_bo_list_handle bo_list; 3312 struct amdgpu_cs_fence fence_status = {0}; 3313 3314 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3315 CU_ASSERT_EQUAL(r, 0); 3316 3317 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 3318 AMDGPU_GEM_DOMAIN_GTT, 0, 3319 &bo_cmd, (void **)&ptr_cmd, 3320 &mc_address_cmd, &va_cmd); 3321 CU_ASSERT_EQUAL(r, 0); 3322 memset(ptr_cmd, 0, bo_cmd_size); 3323 3324 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3325 AMDGPU_GEM_DOMAIN_VRAM, 0, 3326 &bo_src, (void **)&ptr_src, 3327 &mc_address_src, &va_src); 3328 CU_ASSERT_EQUAL(r, 0); 3329 3330 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3331 AMDGPU_GEM_DOMAIN_VRAM, 0, 3332 &bo_dst, (void **)&ptr_dst, 3333 &mc_address_dst, &va_dst); 3334 CU_ASSERT_EQUAL(r, 0); 3335 3336 memset(ptr_src, 0x55, bo_size); 3337 3338 i = 0; 3339 i += amdgpu_draw_init(ptr_cmd + i); 3340 3341 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); 3342 3343 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); 3344 3345 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0); 3346 3347 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 3348 3349 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 3350 ptr_cmd[i++] = 0xc; 3351 ptr_cmd[i++] = mc_address_src >> 8; 3352 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 3353 ptr_cmd[i++] = 0x7c01f; 3354 ptr_cmd[i++] = 0x90500fac; 3355 ptr_cmd[i++] = 0x3e000; 3356 i += 3; 3357 3358 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3359 ptr_cmd[i++] = 0x14; 3360 ptr_cmd[i++] = 0x92; 3361 i += 3; 3362 3363 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3364 ptr_cmd[i++] = 0x191; 3365 ptr_cmd[i++] = 0; 3366 3367 i += amdgpu_draw_draw(ptr_cmd + i); 3368 3369 while (i & 7) 3370 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3371 3372 resources[0] = bo_dst; 3373 resources[1] = bo_src; 3374 resources[2] = bo_shader_ps; 3375 resources[3] = bo_shader_vs; 3376 resources[4] = bo_cmd; 3377 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 3378 CU_ASSERT_EQUAL(r, 0); 3379 3380 ib_info.ib_mc_address = mc_address_cmd; 3381 ib_info.size = i; 3382 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3383 ibs_request.ring = ring; 3384 ibs_request.resources = bo_list; 3385 ibs_request.number_of_ibs = 1; 3386 ibs_request.ibs = &ib_info; 3387 ibs_request.fence_info.handle = NULL; 3388 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3389 CU_ASSERT_EQUAL(r, 0); 3390 3391 fence_status.ip_type = AMDGPU_HW_IP_GFX; 3392 fence_status.ip_instance = 0; 3393 fence_status.ring = ring; 3394 fence_status.context = context_handle; 3395 fence_status.fence = ibs_request.seq_no; 3396 3397 /* wait for IB accomplished */ 3398 r = amdgpu_cs_query_fence_status(&fence_status, 3399 AMDGPU_TIMEOUT_INFINITE, 3400 0, &expired); 3401 if (!hang) { 3402 CU_ASSERT_EQUAL(r, 0); 3403 CU_ASSERT_EQUAL(expired, true); 3404 3405 /* verify if memcpy test result meets with expected */ 3406 i = 0; 3407 while(i < bo_size) { 3408 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 3409 i++; 3410 } 3411 } else { 3412 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 3413 CU_ASSERT_EQUAL(r, 0); 3414 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 3415 } 3416 3417 r = amdgpu_bo_list_destroy(bo_list); 3418 CU_ASSERT_EQUAL(r, 0); 3419 3420 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 3421 CU_ASSERT_EQUAL(r, 0); 3422 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 3423 CU_ASSERT_EQUAL(r, 0); 3424 3425 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 3426 CU_ASSERT_EQUAL(r, 0); 3427 3428 r = amdgpu_cs_ctx_free(context_handle); 3429 CU_ASSERT_EQUAL(r, 0); 3430} 3431 3432void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, 3433 int hang) 3434{ 3435 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 3436 void *ptr_shader_ps; 3437 void *ptr_shader_vs; 3438 uint64_t mc_address_shader_ps, mc_address_shader_vs; 3439 amdgpu_va_handle va_shader_ps, va_shader_vs; 3440 int bo_shader_size = 4096; 3441 enum ps_type ps_type = hang ? PS_HANG : PS_TEX; 3442 int r; 3443 3444 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3445 AMDGPU_GEM_DOMAIN_VRAM, 0, 3446 &bo_shader_ps, &ptr_shader_ps, 3447 &mc_address_shader_ps, &va_shader_ps); 3448 CU_ASSERT_EQUAL(r, 0); 3449 memset(ptr_shader_ps, 0, bo_shader_size); 3450 3451 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3452 AMDGPU_GEM_DOMAIN_VRAM, 0, 3453 &bo_shader_vs, &ptr_shader_vs, 3454 &mc_address_shader_vs, &va_shader_vs); 3455 CU_ASSERT_EQUAL(r, 0); 3456 memset(ptr_shader_vs, 0, bo_shader_size); 3457 3458 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type); 3459 CU_ASSERT_EQUAL(r, 0); 3460 3461 r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 3462 CU_ASSERT_EQUAL(r, 0); 3463 3464 amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, 3465 mc_address_shader_ps, mc_address_shader_vs, ring, hang); 3466 3467 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 3468 CU_ASSERT_EQUAL(r, 0); 3469 3470 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 3471 CU_ASSERT_EQUAL(r, 0); 3472} 3473 3474static void amdgpu_draw_test(void) 3475{ 3476 int r; 3477 struct drm_amdgpu_info_hw_ip info; 3478 uint32_t ring_id; 3479 3480 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 3481 CU_ASSERT_EQUAL(r, 0); 3482 if (!info.available_rings) 3483 printf("SKIP ... as there's no graphics ring\n"); 3484 3485 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 3486 amdgpu_memset_draw_test(device_handle, ring_id); 3487 amdgpu_memcpy_draw_test(device_handle, ring_id, 0); 3488 } 3489} 3490 3491void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring) 3492{ 3493 amdgpu_context_handle context_handle; 3494 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 3495 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 3496 void *ptr_shader_ps; 3497 void *ptr_shader_vs; 3498 volatile unsigned char *ptr_dst; 3499 unsigned char *ptr_src; 3500 uint32_t *ptr_cmd; 3501 uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 3502 uint64_t mc_address_shader_ps, mc_address_shader_vs; 3503 amdgpu_va_handle va_shader_ps, va_shader_vs; 3504 amdgpu_va_handle va_dst, va_src, va_cmd; 3505 struct amdgpu_gpu_info gpu_info = {0}; 3506 int i, r; 3507 int bo_size = 0x4000000; 3508 int bo_shader_ps_size = 0x400000; 3509 int bo_shader_vs_size = 4096; 3510 int bo_cmd_size = 4096; 3511 struct amdgpu_cs_request ibs_request = {0}; 3512 struct amdgpu_cs_ib_info ib_info= {0}; 3513 uint32_t hang_state, hangs, expired; 3514 amdgpu_bo_list_handle bo_list; 3515 struct amdgpu_cs_fence fence_status = {0}; 3516 3517 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 3518 CU_ASSERT_EQUAL(r, 0); 3519 3520 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3521 CU_ASSERT_EQUAL(r, 0); 3522 3523 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 3524 AMDGPU_GEM_DOMAIN_GTT, 0, 3525 &bo_cmd, (void **)&ptr_cmd, 3526 &mc_address_cmd, &va_cmd); 3527 CU_ASSERT_EQUAL(r, 0); 3528 memset(ptr_cmd, 0, bo_cmd_size); 3529 3530 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096, 3531 AMDGPU_GEM_DOMAIN_VRAM, 0, 3532 &bo_shader_ps, &ptr_shader_ps, 3533 &mc_address_shader_ps, &va_shader_ps); 3534 CU_ASSERT_EQUAL(r, 0); 3535 memset(ptr_shader_ps, 0, bo_shader_ps_size); 3536 3537 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096, 3538 AMDGPU_GEM_DOMAIN_VRAM, 0, 3539 &bo_shader_vs, &ptr_shader_vs, 3540 &mc_address_shader_vs, &va_shader_vs); 3541 CU_ASSERT_EQUAL(r, 0); 3542 memset(ptr_shader_vs, 0, bo_shader_vs_size); 3543 3544 r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id); 3545 CU_ASSERT_EQUAL(r, 0); 3546 3547 r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 3548 CU_ASSERT_EQUAL(r, 0); 3549 3550 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3551 AMDGPU_GEM_DOMAIN_VRAM, 0, 3552 &bo_src, (void **)&ptr_src, 3553 &mc_address_src, &va_src); 3554 CU_ASSERT_EQUAL(r, 0); 3555 3556 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3557 AMDGPU_GEM_DOMAIN_VRAM, 0, 3558 &bo_dst, (void **)&ptr_dst, 3559 &mc_address_dst, &va_dst); 3560 CU_ASSERT_EQUAL(r, 0); 3561 3562 memset(ptr_src, 0x55, bo_size); 3563 3564 i = 0; 3565 i += amdgpu_draw_init(ptr_cmd + i); 3566 3567 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1); 3568 3569 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1); 3570 3571 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, 3572 mc_address_shader_vs, 1); 3573 3574 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 3575 3576 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 3577 ptr_cmd[i++] = 0xc; 3578 ptr_cmd[i++] = mc_address_src >> 8; 3579 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 3580 ptr_cmd[i++] = 0x1ffc7ff; 3581 ptr_cmd[i++] = 0x90500fac; 3582 ptr_cmd[i++] = 0xffe000; 3583 i += 3; 3584 3585 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3586 ptr_cmd[i++] = 0x14; 3587 ptr_cmd[i++] = 0x92; 3588 i += 3; 3589 3590 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3591 ptr_cmd[i++] = 0x191; 3592 ptr_cmd[i++] = 0; 3593 3594 i += amdgpu_draw_draw(ptr_cmd + i); 3595 3596 while (i & 7) 3597 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3598 3599 resources[0] = bo_dst; 3600 resources[1] = bo_src; 3601 resources[2] = bo_shader_ps; 3602 resources[3] = bo_shader_vs; 3603 resources[4] = bo_cmd; 3604 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 3605 CU_ASSERT_EQUAL(r, 0); 3606 3607 ib_info.ib_mc_address = mc_address_cmd; 3608 ib_info.size = i; 3609 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3610 ibs_request.ring = ring; 3611 ibs_request.resources = bo_list; 3612 ibs_request.number_of_ibs = 1; 3613 ibs_request.ibs = &ib_info; 3614 ibs_request.fence_info.handle = NULL; 3615 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3616 CU_ASSERT_EQUAL(r, 0); 3617 3618 fence_status.ip_type = AMDGPU_HW_IP_GFX; 3619 fence_status.ip_instance = 0; 3620 fence_status.ring = ring; 3621 fence_status.context = context_handle; 3622 fence_status.fence = ibs_request.seq_no; 3623 3624 /* wait for IB accomplished */ 3625 r = amdgpu_cs_query_fence_status(&fence_status, 3626 AMDGPU_TIMEOUT_INFINITE, 3627 0, &expired); 3628 3629 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 3630 CU_ASSERT_EQUAL(r, 0); 3631 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 3632 3633 r = amdgpu_bo_list_destroy(bo_list); 3634 CU_ASSERT_EQUAL(r, 0); 3635 3636 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 3637 CU_ASSERT_EQUAL(r, 0); 3638 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 3639 CU_ASSERT_EQUAL(r, 0); 3640 3641 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 3642 CU_ASSERT_EQUAL(r, 0); 3643 3644 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size); 3645 CU_ASSERT_EQUAL(r, 0); 3646 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size); 3647 CU_ASSERT_EQUAL(r, 0); 3648 3649 r = amdgpu_cs_ctx_free(context_handle); 3650 CU_ASSERT_EQUAL(r, 0); 3651} 3652 3653static void amdgpu_gpu_reset_test(void) 3654{ 3655 int r; 3656 char debugfs_path[256], tmp[10]; 3657 int fd; 3658 struct stat sbuf; 3659 amdgpu_context_handle context_handle; 3660 uint32_t hang_state, hangs; 3661 3662 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3663 CU_ASSERT_EQUAL(r, 0); 3664 3665 r = fstat(drm_amdgpu[0], &sbuf); 3666 CU_ASSERT_EQUAL(r, 0); 3667 3668 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev)); 3669 fd = open(debugfs_path, O_RDONLY); 3670 CU_ASSERT(fd >= 0); 3671 3672 r = read(fd, tmp, sizeof(tmp)/sizeof(char)); 3673 CU_ASSERT(r > 0); 3674 3675 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 3676 CU_ASSERT_EQUAL(r, 0); 3677 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 3678 3679 close(fd); 3680 r = amdgpu_cs_ctx_free(context_handle); 3681 CU_ASSERT_EQUAL(r, 0); 3682 3683 amdgpu_compute_dispatch_test(); 3684 amdgpu_gfx_dispatch_test(); 3685} 3686