basic_tests.c revision 5324fb0d
1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22*/ 23 24#include <stdio.h> 25#include <stdlib.h> 26#include <unistd.h> 27#ifdef HAVE_ALLOCA_H 28# include <alloca.h> 29#endif 30#include <sys/wait.h> 31 32#include "CUnit/Basic.h" 33 34#include "amdgpu_test.h" 35#include "amdgpu_drm.h" 36#include "util_math.h" 37 38static amdgpu_device_handle device_handle; 39static uint32_t major_version; 40static uint32_t minor_version; 41static uint32_t family_id; 42 43static void amdgpu_query_info_test(void); 44static void amdgpu_command_submission_gfx(void); 45static void amdgpu_command_submission_compute(void); 46static void amdgpu_command_submission_multi_fence(void); 47static void amdgpu_command_submission_sdma(void); 48static void amdgpu_userptr_test(void); 49static void amdgpu_semaphore_test(void); 50static void amdgpu_sync_dependency_test(void); 51static void amdgpu_bo_eviction_test(void); 52static void amdgpu_dispatch_test(void); 53static void amdgpu_draw_test(void); 54 55static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 56static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 57static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 58static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 59 unsigned ip_type, 60 int instance, int pm4_dw, uint32_t *pm4_src, 61 int res_cnt, amdgpu_bo_handle *resources, 62 struct amdgpu_cs_ib_info *ib_info, 63 struct amdgpu_cs_request *ibs_request); 64 65CU_TestInfo basic_tests[] = { 66 { "Query Info Test", amdgpu_query_info_test }, 67 { "Userptr Test", amdgpu_userptr_test }, 68 { "bo eviction Test", amdgpu_bo_eviction_test }, 69 { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 70 { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 71 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 72 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 73 { "SW semaphore Test", amdgpu_semaphore_test }, 74 { "Sync dependency Test", amdgpu_sync_dependency_test }, 75 { "Dispatch Test", amdgpu_dispatch_test }, 76 { "Draw Test", amdgpu_draw_test }, 77 CU_TEST_INFO_NULL, 78}; 79#define BUFFER_SIZE (8 * 1024) 80#define SDMA_PKT_HEADER_op_offset 0 81#define SDMA_PKT_HEADER_op_mask 0x000000FF 82#define SDMA_PKT_HEADER_op_shift 0 83#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 84#define SDMA_OPCODE_CONSTANT_FILL 11 85# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 86 /* 0 = byte fill 87 * 2 = DW fill 88 */ 89#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 90 (((sub_op) & 0xFF) << 8) | \ 91 (((op) & 0xFF) << 0)) 92#define SDMA_OPCODE_WRITE 2 93# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 94# define SDMA_WRTIE_SUB_OPCODE_TILED 1 95 96#define SDMA_OPCODE_COPY 1 97# define SDMA_COPY_SUB_OPCODE_LINEAR 0 98 99#define GFX_COMPUTE_NOP 0xffff1000 100#define SDMA_NOP 0x0 101 102/* PM4 */ 103#define PACKET_TYPE0 0 104#define PACKET_TYPE1 1 105#define PACKET_TYPE2 2 106#define PACKET_TYPE3 3 107 108#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 109#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 110#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 111#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 112#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 113 ((reg) & 0xFFFF) | \ 114 ((n) & 0x3FFF) << 16) 115#define CP_PACKET2 0x80000000 116#define PACKET2_PAD_SHIFT 0 117#define PACKET2_PAD_MASK (0x3fffffff << 0) 118 119#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 120 121#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 122 (((op) & 0xFF) << 8) | \ 123 ((n) & 0x3FFF) << 16) 124#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) 125 126/* Packet 3 types */ 127#define PACKET3_NOP 0x10 128 129#define PACKET3_WRITE_DATA 0x37 130#define WRITE_DATA_DST_SEL(x) ((x) << 8) 131 /* 0 - register 132 * 1 - memory (sync - via GRBM) 133 * 2 - gl2 134 * 3 - gds 135 * 4 - reserved 136 * 5 - memory (async - direct) 137 */ 138#define WR_ONE_ADDR (1 << 16) 139#define WR_CONFIRM (1 << 20) 140#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 141 /* 0 - LRU 142 * 1 - Stream 143 */ 144#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 145 /* 0 - me 146 * 1 - pfp 147 * 2 - ce 148 */ 149 150#define PACKET3_DMA_DATA 0x50 151/* 1. header 152 * 2. CONTROL 153 * 3. SRC_ADDR_LO or DATA [31:0] 154 * 4. SRC_ADDR_HI [31:0] 155 * 5. DST_ADDR_LO [31:0] 156 * 6. DST_ADDR_HI [7:0] 157 * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 158 */ 159/* CONTROL */ 160# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 161 /* 0 - ME 162 * 1 - PFP 163 */ 164# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 165 /* 0 - LRU 166 * 1 - Stream 167 * 2 - Bypass 168 */ 169# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 170# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 171 /* 0 - DST_ADDR using DAS 172 * 1 - GDS 173 * 3 - DST_ADDR using L2 174 */ 175# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 176 /* 0 - LRU 177 * 1 - Stream 178 * 2 - Bypass 179 */ 180# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 181# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 182 /* 0 - SRC_ADDR using SAS 183 * 1 - GDS 184 * 2 - DATA 185 * 3 - SRC_ADDR using L2 186 */ 187# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 188/* COMMAND */ 189# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 190# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 191 /* 0 - none 192 * 1 - 8 in 16 193 * 2 - 8 in 32 194 * 3 - 8 in 64 195 */ 196# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 197 /* 0 - none 198 * 1 - 8 in 16 199 * 2 - 8 in 32 200 * 3 - 8 in 64 201 */ 202# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 203 /* 0 - memory 204 * 1 - register 205 */ 206# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 207 /* 0 - memory 208 * 1 - register 209 */ 210# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 211# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 212# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 213 214#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 215 (((b) & 0x1) << 26) | \ 216 (((t) & 0x1) << 23) | \ 217 (((s) & 0x1) << 22) | \ 218 (((cnt) & 0xFFFFF) << 0)) 219#define SDMA_OPCODE_COPY_SI 3 220#define SDMA_OPCODE_CONSTANT_FILL_SI 13 221#define SDMA_NOP_SI 0xf 222#define GFX_COMPUTE_NOP_SI 0x80000000 223#define PACKET3_DMA_DATA_SI 0x41 224# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 225 /* 0 - ME 226 * 1 - PFP 227 */ 228# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 229 /* 0 - DST_ADDR using DAS 230 * 1 - GDS 231 * 3 - DST_ADDR using L2 232 */ 233# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 234 /* 0 - SRC_ADDR using SAS 235 * 1 - GDS 236 * 2 - DATA 237 * 3 - SRC_ADDR using L2 238 */ 239# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 240 241 242#define PKT3_CONTEXT_CONTROL 0x28 243#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 244#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 245#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 246 247#define PKT3_CLEAR_STATE 0x12 248 249#define PKT3_SET_SH_REG 0x76 250#define PACKET3_SET_SH_REG_START 0x00002c00 251 252#define PACKET3_DISPATCH_DIRECT 0x15 253#define PACKET3_EVENT_WRITE 0x46 254#define PACKET3_ACQUIRE_MEM 0x58 255#define PACKET3_SET_CONTEXT_REG 0x69 256#define PACKET3_SET_UCONFIG_REG 0x79 257#define PACKET3_DRAW_INDEX_AUTO 0x2D 258/* gfx 8 */ 259#define mmCOMPUTE_PGM_LO 0x2e0c 260#define mmCOMPUTE_PGM_RSRC1 0x2e12 261#define mmCOMPUTE_TMPRING_SIZE 0x2e18 262#define mmCOMPUTE_USER_DATA_0 0x2e40 263#define mmCOMPUTE_USER_DATA_1 0x2e41 264#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 265#define mmCOMPUTE_NUM_THREAD_X 0x2e07 266 267 268 269#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 270 ((num & 0x0000ff00) << 8) | \ 271 ((num & 0x00ff0000) >> 8) | \ 272 ((num & 0x000000ff) << 24)) 273 274 275/* Shader code 276 * void main() 277{ 278 279 float x = some_input; 280 for (unsigned i = 0; i < 1000000; i++) 281 x = sin(x); 282 283 u[0] = 42u; 284} 285*/ 286 287static uint32_t shader_bin[] = { 288 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 289 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 290 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 291 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 292}; 293 294#define CODE_OFFSET 512 295#define DATA_OFFSET 1024 296 297enum cs_type { 298 CS_BUFFERCLEAR, 299 CS_BUFFERCOPY 300}; 301 302static const uint32_t bufferclear_cs_shader_gfx9[] = { 303 0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205, 304 0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100, 305 0xBF810000 306}; 307 308static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { 309 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, 310 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, 311 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, 312 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, 313 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } 314}; 315 316static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5; 317 318static const uint32_t buffercopy_cs_shader_gfx9[] = { 319 0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100, 320 0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000 321}; 322 323static const uint32_t preamblecache_gfx9[] = { 324 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 325 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 326 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 327 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 328 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 329 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 330 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 331 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 332 0xc0026900, 0x311, 0x3, 0x0, 0xc0026900, 0x316, 0x1e, 0x20, 333 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 334 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, 335 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 336 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 337 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 338 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 339 0xc0016900, 0x314, 0x0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 340 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, 341 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 342 0xc0017900, 0x24b, 0x0 343}; 344 345enum ps_type { 346 PS_CONST, 347 PS_TEX 348}; 349 350static const uint32_t ps_const_shader_gfx9[] = { 351 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 352 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 353 0xC4001C0F, 0x00000100, 0xBF810000 354}; 355 356static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; 357 358static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { 359 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 360 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, 361 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, 362 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, 363 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, 364 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, 365 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, 366 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, 367 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, 368 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } 369 } 370}; 371 372static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { 373 0x00000004 374}; 375 376static const uint32_t ps_num_sh_registers_gfx9 = 2; 377 378static const uint32_t ps_const_sh_registers_gfx9[][2] = { 379 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, 380 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 381}; 382 383static const uint32_t ps_num_context_registers_gfx9 = 7; 384 385static const uint32_t ps_const_context_reg_gfx9[][2] = { 386 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 387 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, 388 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 389 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 390 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 391 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 392 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 393}; 394 395static const uint32_t ps_tex_shader_gfx9[] = { 396 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 397 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, 398 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, 399 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 400 0x00000100, 0xBF810000 401}; 402 403static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { 404 0x0000000B 405}; 406 407static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; 408 409static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { 410 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 411 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, 412 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, 413 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, 414 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 415 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 416 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 417 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 418 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 419 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } 420 } 421}; 422 423static const uint32_t ps_tex_sh_registers_gfx9[][2] = { 424 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, 425 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } 426}; 427 428static const uint32_t ps_tex_context_reg_gfx9[][2] = { 429 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 430 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, 431 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 432 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 433 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 434 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 435 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 436}; 437 438static const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 439 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 440 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, 441 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, 442 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, 443 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, 444 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, 445 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, 446 0xC400020F, 0x05060403, 0xBF810000 447}; 448 449static const uint32_t cached_cmd_gfx9[] = { 450 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 451 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 452 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 453 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12, 454 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 455 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 456 0xc0026900, 0x292, 0x20, 0x60201b8, 457 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 458}; 459 460int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, 461 unsigned alignment, unsigned heap, uint64_t alloc_flags, 462 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, 463 uint64_t *mc_address, 464 amdgpu_va_handle *va_handle) 465{ 466 struct amdgpu_bo_alloc_request request = {}; 467 amdgpu_bo_handle buf_handle; 468 amdgpu_va_handle handle; 469 uint64_t vmc_addr; 470 int r; 471 472 request.alloc_size = size; 473 request.phys_alignment = alignment; 474 request.preferred_heap = heap; 475 request.flags = alloc_flags; 476 477 r = amdgpu_bo_alloc(dev, &request, &buf_handle); 478 if (r) 479 return r; 480 481 r = amdgpu_va_range_alloc(dev, 482 amdgpu_gpu_va_range_general, 483 size, alignment, 0, &vmc_addr, 484 &handle, 0); 485 if (r) 486 goto error_va_alloc; 487 488 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr, 489 AMDGPU_VM_PAGE_READABLE | 490 AMDGPU_VM_PAGE_WRITEABLE | 491 AMDGPU_VM_PAGE_EXECUTABLE | 492 mapping_flags, 493 AMDGPU_VA_OP_MAP); 494 if (r) 495 goto error_va_map; 496 497 r = amdgpu_bo_cpu_map(buf_handle, cpu); 498 if (r) 499 goto error_cpu_map; 500 501 *bo = buf_handle; 502 *mc_address = vmc_addr; 503 *va_handle = handle; 504 505 return 0; 506 507 error_cpu_map: 508 amdgpu_bo_cpu_unmap(buf_handle); 509 510 error_va_map: 511 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 512 513 error_va_alloc: 514 amdgpu_bo_free(buf_handle); 515 return r; 516} 517 518 519 520int suite_basic_tests_init(void) 521{ 522 struct amdgpu_gpu_info gpu_info = {0}; 523 int r; 524 525 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 526 &minor_version, &device_handle); 527 528 if (r) { 529 if ((r == -EACCES) && (errno == EACCES)) 530 printf("\n\nError:%s. " 531 "Hint:Try to run this test program as root.", 532 strerror(errno)); 533 return CUE_SINIT_FAILED; 534 } 535 536 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 537 if (r) 538 return CUE_SINIT_FAILED; 539 540 family_id = gpu_info.family_id; 541 542 return CUE_SUCCESS; 543} 544 545int suite_basic_tests_clean(void) 546{ 547 int r = amdgpu_device_deinitialize(device_handle); 548 549 if (r == 0) 550 return CUE_SUCCESS; 551 else 552 return CUE_SCLEAN_FAILED; 553} 554 555static void amdgpu_query_info_test(void) 556{ 557 struct amdgpu_gpu_info gpu_info = {0}; 558 uint32_t version, feature; 559 int r; 560 561 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 562 CU_ASSERT_EQUAL(r, 0); 563 564 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 565 0, &version, &feature); 566 CU_ASSERT_EQUAL(r, 0); 567} 568 569static void amdgpu_command_submission_gfx_separate_ibs(void) 570{ 571 amdgpu_context_handle context_handle; 572 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 573 void *ib_result_cpu, *ib_result_ce_cpu; 574 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 575 struct amdgpu_cs_request ibs_request = {0}; 576 struct amdgpu_cs_ib_info ib_info[2]; 577 struct amdgpu_cs_fence fence_status = {0}; 578 uint32_t *ptr; 579 uint32_t expired; 580 amdgpu_bo_list_handle bo_list; 581 amdgpu_va_handle va_handle, va_handle_ce; 582 int r, i = 0; 583 584 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 585 CU_ASSERT_EQUAL(r, 0); 586 587 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 588 AMDGPU_GEM_DOMAIN_GTT, 0, 589 &ib_result_handle, &ib_result_cpu, 590 &ib_result_mc_address, &va_handle); 591 CU_ASSERT_EQUAL(r, 0); 592 593 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 594 AMDGPU_GEM_DOMAIN_GTT, 0, 595 &ib_result_ce_handle, &ib_result_ce_cpu, 596 &ib_result_ce_mc_address, &va_handle_ce); 597 CU_ASSERT_EQUAL(r, 0); 598 599 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 600 ib_result_ce_handle, &bo_list); 601 CU_ASSERT_EQUAL(r, 0); 602 603 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 604 605 /* IT_SET_CE_DE_COUNTERS */ 606 ptr = ib_result_ce_cpu; 607 if (family_id != AMDGPU_FAMILY_SI) { 608 ptr[i++] = 0xc0008900; 609 ptr[i++] = 0; 610 } 611 ptr[i++] = 0xc0008400; 612 ptr[i++] = 1; 613 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 614 ib_info[0].size = i; 615 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 616 617 /* IT_WAIT_ON_CE_COUNTER */ 618 ptr = ib_result_cpu; 619 ptr[0] = 0xc0008600; 620 ptr[1] = 0x00000001; 621 ib_info[1].ib_mc_address = ib_result_mc_address; 622 ib_info[1].size = 2; 623 624 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 625 ibs_request.number_of_ibs = 2; 626 ibs_request.ibs = ib_info; 627 ibs_request.resources = bo_list; 628 ibs_request.fence_info.handle = NULL; 629 630 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 631 632 CU_ASSERT_EQUAL(r, 0); 633 634 fence_status.context = context_handle; 635 fence_status.ip_type = AMDGPU_HW_IP_GFX; 636 fence_status.ip_instance = 0; 637 fence_status.fence = ibs_request.seq_no; 638 639 r = amdgpu_cs_query_fence_status(&fence_status, 640 AMDGPU_TIMEOUT_INFINITE, 641 0, &expired); 642 CU_ASSERT_EQUAL(r, 0); 643 644 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 645 ib_result_mc_address, 4096); 646 CU_ASSERT_EQUAL(r, 0); 647 648 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 649 ib_result_ce_mc_address, 4096); 650 CU_ASSERT_EQUAL(r, 0); 651 652 r = amdgpu_bo_list_destroy(bo_list); 653 CU_ASSERT_EQUAL(r, 0); 654 655 r = amdgpu_cs_ctx_free(context_handle); 656 CU_ASSERT_EQUAL(r, 0); 657 658} 659 660static void amdgpu_command_submission_gfx_shared_ib(void) 661{ 662 amdgpu_context_handle context_handle; 663 amdgpu_bo_handle ib_result_handle; 664 void *ib_result_cpu; 665 uint64_t ib_result_mc_address; 666 struct amdgpu_cs_request ibs_request = {0}; 667 struct amdgpu_cs_ib_info ib_info[2]; 668 struct amdgpu_cs_fence fence_status = {0}; 669 uint32_t *ptr; 670 uint32_t expired; 671 amdgpu_bo_list_handle bo_list; 672 amdgpu_va_handle va_handle; 673 int r, i = 0; 674 675 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 676 CU_ASSERT_EQUAL(r, 0); 677 678 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 679 AMDGPU_GEM_DOMAIN_GTT, 0, 680 &ib_result_handle, &ib_result_cpu, 681 &ib_result_mc_address, &va_handle); 682 CU_ASSERT_EQUAL(r, 0); 683 684 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 685 &bo_list); 686 CU_ASSERT_EQUAL(r, 0); 687 688 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 689 690 /* IT_SET_CE_DE_COUNTERS */ 691 ptr = ib_result_cpu; 692 if (family_id != AMDGPU_FAMILY_SI) { 693 ptr[i++] = 0xc0008900; 694 ptr[i++] = 0; 695 } 696 ptr[i++] = 0xc0008400; 697 ptr[i++] = 1; 698 ib_info[0].ib_mc_address = ib_result_mc_address; 699 ib_info[0].size = i; 700 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 701 702 ptr = (uint32_t *)ib_result_cpu + 4; 703 ptr[0] = 0xc0008600; 704 ptr[1] = 0x00000001; 705 ib_info[1].ib_mc_address = ib_result_mc_address + 16; 706 ib_info[1].size = 2; 707 708 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 709 ibs_request.number_of_ibs = 2; 710 ibs_request.ibs = ib_info; 711 ibs_request.resources = bo_list; 712 ibs_request.fence_info.handle = NULL; 713 714 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 715 716 CU_ASSERT_EQUAL(r, 0); 717 718 fence_status.context = context_handle; 719 fence_status.ip_type = AMDGPU_HW_IP_GFX; 720 fence_status.ip_instance = 0; 721 fence_status.fence = ibs_request.seq_no; 722 723 r = amdgpu_cs_query_fence_status(&fence_status, 724 AMDGPU_TIMEOUT_INFINITE, 725 0, &expired); 726 CU_ASSERT_EQUAL(r, 0); 727 728 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 729 ib_result_mc_address, 4096); 730 CU_ASSERT_EQUAL(r, 0); 731 732 r = amdgpu_bo_list_destroy(bo_list); 733 CU_ASSERT_EQUAL(r, 0); 734 735 r = amdgpu_cs_ctx_free(context_handle); 736 CU_ASSERT_EQUAL(r, 0); 737} 738 739static void amdgpu_command_submission_gfx_cp_write_data(void) 740{ 741 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 742} 743 744static void amdgpu_command_submission_gfx_cp_const_fill(void) 745{ 746 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 747} 748 749static void amdgpu_command_submission_gfx_cp_copy_data(void) 750{ 751 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 752} 753 754static void amdgpu_bo_eviction_test(void) 755{ 756 const int sdma_write_length = 1024; 757 const int pm4_dw = 256; 758 amdgpu_context_handle context_handle; 759 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 760 amdgpu_bo_handle *resources; 761 uint32_t *pm4; 762 struct amdgpu_cs_ib_info *ib_info; 763 struct amdgpu_cs_request *ibs_request; 764 uint64_t bo1_mc, bo2_mc; 765 volatile unsigned char *bo1_cpu, *bo2_cpu; 766 int i, j, r, loop1, loop2; 767 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 768 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 769 struct amdgpu_heap_info vram_info, gtt_info; 770 771 pm4 = calloc(pm4_dw, sizeof(*pm4)); 772 CU_ASSERT_NOT_EQUAL(pm4, NULL); 773 774 ib_info = calloc(1, sizeof(*ib_info)); 775 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 776 777 ibs_request = calloc(1, sizeof(*ibs_request)); 778 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 779 780 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 781 CU_ASSERT_EQUAL(r, 0); 782 783 /* prepare resource */ 784 resources = calloc(4, sizeof(amdgpu_bo_handle)); 785 CU_ASSERT_NOT_EQUAL(resources, NULL); 786 787 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 788 0, &vram_info); 789 CU_ASSERT_EQUAL(r, 0); 790 791 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 792 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 793 CU_ASSERT_EQUAL(r, 0); 794 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 795 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 796 CU_ASSERT_EQUAL(r, 0); 797 798 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 799 0, >t_info); 800 CU_ASSERT_EQUAL(r, 0); 801 802 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 803 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 804 CU_ASSERT_EQUAL(r, 0); 805 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 806 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 807 CU_ASSERT_EQUAL(r, 0); 808 809 810 811 loop1 = loop2 = 0; 812 /* run 9 circle to test all mapping combination */ 813 while(loop1 < 2) { 814 while(loop2 < 2) { 815 /* allocate UC bo1for sDMA use */ 816 r = amdgpu_bo_alloc_and_map(device_handle, 817 sdma_write_length, 4096, 818 AMDGPU_GEM_DOMAIN_GTT, 819 gtt_flags[loop1], &bo1, 820 (void**)&bo1_cpu, &bo1_mc, 821 &bo1_va_handle); 822 CU_ASSERT_EQUAL(r, 0); 823 824 /* set bo1 */ 825 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 826 827 /* allocate UC bo2 for sDMA use */ 828 r = amdgpu_bo_alloc_and_map(device_handle, 829 sdma_write_length, 4096, 830 AMDGPU_GEM_DOMAIN_GTT, 831 gtt_flags[loop2], &bo2, 832 (void**)&bo2_cpu, &bo2_mc, 833 &bo2_va_handle); 834 CU_ASSERT_EQUAL(r, 0); 835 836 /* clear bo2 */ 837 memset((void*)bo2_cpu, 0, sdma_write_length); 838 839 resources[0] = bo1; 840 resources[1] = bo2; 841 resources[2] = vram_max[loop2]; 842 resources[3] = gtt_max[loop2]; 843 844 /* fulfill PM4: test DMA copy linear */ 845 i = j = 0; 846 if (family_id == AMDGPU_FAMILY_SI) { 847 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 848 sdma_write_length); 849 pm4[i++] = 0xffffffff & bo2_mc; 850 pm4[i++] = 0xffffffff & bo1_mc; 851 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 852 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 853 } else { 854 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 855 if (family_id >= AMDGPU_FAMILY_AI) 856 pm4[i++] = sdma_write_length - 1; 857 else 858 pm4[i++] = sdma_write_length; 859 pm4[i++] = 0; 860 pm4[i++] = 0xffffffff & bo1_mc; 861 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 862 pm4[i++] = 0xffffffff & bo2_mc; 863 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 864 } 865 866 amdgpu_test_exec_cs_helper(context_handle, 867 AMDGPU_HW_IP_DMA, 0, 868 i, pm4, 869 4, resources, 870 ib_info, ibs_request); 871 872 /* verify if SDMA test result meets with expected */ 873 i = 0; 874 while(i < sdma_write_length) { 875 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 876 } 877 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 878 sdma_write_length); 879 CU_ASSERT_EQUAL(r, 0); 880 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 881 sdma_write_length); 882 CU_ASSERT_EQUAL(r, 0); 883 loop2++; 884 } 885 loop2 = 0; 886 loop1++; 887 } 888 amdgpu_bo_free(vram_max[0]); 889 amdgpu_bo_free(vram_max[1]); 890 amdgpu_bo_free(gtt_max[0]); 891 amdgpu_bo_free(gtt_max[1]); 892 /* clean resources */ 893 free(resources); 894 free(ibs_request); 895 free(ib_info); 896 free(pm4); 897 898 /* end of test */ 899 r = amdgpu_cs_ctx_free(context_handle); 900 CU_ASSERT_EQUAL(r, 0); 901} 902 903 904static void amdgpu_command_submission_gfx(void) 905{ 906 /* write data using the CP */ 907 amdgpu_command_submission_gfx_cp_write_data(); 908 /* const fill using the CP */ 909 amdgpu_command_submission_gfx_cp_const_fill(); 910 /* copy data using the CP */ 911 amdgpu_command_submission_gfx_cp_copy_data(); 912 /* separate IB buffers for multi-IB submission */ 913 amdgpu_command_submission_gfx_separate_ibs(); 914 /* shared IB buffer for multi-IB submission */ 915 amdgpu_command_submission_gfx_shared_ib(); 916} 917 918static void amdgpu_semaphore_test(void) 919{ 920 amdgpu_context_handle context_handle[2]; 921 amdgpu_semaphore_handle sem; 922 amdgpu_bo_handle ib_result_handle[2]; 923 void *ib_result_cpu[2]; 924 uint64_t ib_result_mc_address[2]; 925 struct amdgpu_cs_request ibs_request[2] = {0}; 926 struct amdgpu_cs_ib_info ib_info[2] = {0}; 927 struct amdgpu_cs_fence fence_status = {0}; 928 uint32_t *ptr; 929 uint32_t expired; 930 uint32_t sdma_nop, gfx_nop; 931 amdgpu_bo_list_handle bo_list[2]; 932 amdgpu_va_handle va_handle[2]; 933 int r, i; 934 935 if (family_id == AMDGPU_FAMILY_SI) { 936 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 937 gfx_nop = GFX_COMPUTE_NOP_SI; 938 } else { 939 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 940 gfx_nop = GFX_COMPUTE_NOP; 941 } 942 943 r = amdgpu_cs_create_semaphore(&sem); 944 CU_ASSERT_EQUAL(r, 0); 945 for (i = 0; i < 2; i++) { 946 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 947 CU_ASSERT_EQUAL(r, 0); 948 949 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 950 AMDGPU_GEM_DOMAIN_GTT, 0, 951 &ib_result_handle[i], &ib_result_cpu[i], 952 &ib_result_mc_address[i], &va_handle[i]); 953 CU_ASSERT_EQUAL(r, 0); 954 955 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 956 NULL, &bo_list[i]); 957 CU_ASSERT_EQUAL(r, 0); 958 } 959 960 /* 1. same context different engine */ 961 ptr = ib_result_cpu[0]; 962 ptr[0] = sdma_nop; 963 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 964 ib_info[0].size = 1; 965 966 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 967 ibs_request[0].number_of_ibs = 1; 968 ibs_request[0].ibs = &ib_info[0]; 969 ibs_request[0].resources = bo_list[0]; 970 ibs_request[0].fence_info.handle = NULL; 971 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 972 CU_ASSERT_EQUAL(r, 0); 973 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 974 CU_ASSERT_EQUAL(r, 0); 975 976 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 977 CU_ASSERT_EQUAL(r, 0); 978 ptr = ib_result_cpu[1]; 979 ptr[0] = gfx_nop; 980 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 981 ib_info[1].size = 1; 982 983 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 984 ibs_request[1].number_of_ibs = 1; 985 ibs_request[1].ibs = &ib_info[1]; 986 ibs_request[1].resources = bo_list[1]; 987 ibs_request[1].fence_info.handle = NULL; 988 989 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 990 CU_ASSERT_EQUAL(r, 0); 991 992 fence_status.context = context_handle[0]; 993 fence_status.ip_type = AMDGPU_HW_IP_GFX; 994 fence_status.ip_instance = 0; 995 fence_status.fence = ibs_request[1].seq_no; 996 r = amdgpu_cs_query_fence_status(&fence_status, 997 500000000, 0, &expired); 998 CU_ASSERT_EQUAL(r, 0); 999 CU_ASSERT_EQUAL(expired, true); 1000 1001 /* 2. same engine different context */ 1002 ptr = ib_result_cpu[0]; 1003 ptr[0] = gfx_nop; 1004 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 1005 ib_info[0].size = 1; 1006 1007 ibs_request[0].ip_type = AMDGPU_HW_IP_GFX; 1008 ibs_request[0].number_of_ibs = 1; 1009 ibs_request[0].ibs = &ib_info[0]; 1010 ibs_request[0].resources = bo_list[0]; 1011 ibs_request[0].fence_info.handle = NULL; 1012 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 1013 CU_ASSERT_EQUAL(r, 0); 1014 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 1015 CU_ASSERT_EQUAL(r, 0); 1016 1017 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem); 1018 CU_ASSERT_EQUAL(r, 0); 1019 ptr = ib_result_cpu[1]; 1020 ptr[0] = gfx_nop; 1021 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 1022 ib_info[1].size = 1; 1023 1024 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 1025 ibs_request[1].number_of_ibs = 1; 1026 ibs_request[1].ibs = &ib_info[1]; 1027 ibs_request[1].resources = bo_list[1]; 1028 ibs_request[1].fence_info.handle = NULL; 1029 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 1030 1031 CU_ASSERT_EQUAL(r, 0); 1032 1033 fence_status.context = context_handle[1]; 1034 fence_status.ip_type = AMDGPU_HW_IP_GFX; 1035 fence_status.ip_instance = 0; 1036 fence_status.fence = ibs_request[1].seq_no; 1037 r = amdgpu_cs_query_fence_status(&fence_status, 1038 500000000, 0, &expired); 1039 CU_ASSERT_EQUAL(r, 0); 1040 CU_ASSERT_EQUAL(expired, true); 1041 1042 for (i = 0; i < 2; i++) { 1043 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 1044 ib_result_mc_address[i], 4096); 1045 CU_ASSERT_EQUAL(r, 0); 1046 1047 r = amdgpu_bo_list_destroy(bo_list[i]); 1048 CU_ASSERT_EQUAL(r, 0); 1049 1050 r = amdgpu_cs_ctx_free(context_handle[i]); 1051 CU_ASSERT_EQUAL(r, 0); 1052 } 1053 1054 r = amdgpu_cs_destroy_semaphore(sem); 1055 CU_ASSERT_EQUAL(r, 0); 1056} 1057 1058static void amdgpu_command_submission_compute_nop(void) 1059{ 1060 amdgpu_context_handle context_handle; 1061 amdgpu_bo_handle ib_result_handle; 1062 void *ib_result_cpu; 1063 uint64_t ib_result_mc_address; 1064 struct amdgpu_cs_request ibs_request; 1065 struct amdgpu_cs_ib_info ib_info; 1066 struct amdgpu_cs_fence fence_status; 1067 uint32_t *ptr; 1068 uint32_t expired; 1069 int r, instance; 1070 amdgpu_bo_list_handle bo_list; 1071 amdgpu_va_handle va_handle; 1072 struct drm_amdgpu_info_hw_ip info; 1073 1074 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 1075 CU_ASSERT_EQUAL(r, 0); 1076 1077 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1078 CU_ASSERT_EQUAL(r, 0); 1079 1080 for (instance = 0; (1 << instance) & info.available_rings; instance++) { 1081 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1082 AMDGPU_GEM_DOMAIN_GTT, 0, 1083 &ib_result_handle, &ib_result_cpu, 1084 &ib_result_mc_address, &va_handle); 1085 CU_ASSERT_EQUAL(r, 0); 1086 1087 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 1088 &bo_list); 1089 CU_ASSERT_EQUAL(r, 0); 1090 1091 ptr = ib_result_cpu; 1092 memset(ptr, 0, 16); 1093 ptr[0]=PACKET3(PACKET3_NOP, 14); 1094 1095 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 1096 ib_info.ib_mc_address = ib_result_mc_address; 1097 ib_info.size = 16; 1098 1099 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 1100 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 1101 ibs_request.ring = instance; 1102 ibs_request.number_of_ibs = 1; 1103 ibs_request.ibs = &ib_info; 1104 ibs_request.resources = bo_list; 1105 ibs_request.fence_info.handle = NULL; 1106 1107 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 1108 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 1109 CU_ASSERT_EQUAL(r, 0); 1110 1111 fence_status.context = context_handle; 1112 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 1113 fence_status.ip_instance = 0; 1114 fence_status.ring = instance; 1115 fence_status.fence = ibs_request.seq_no; 1116 1117 r = amdgpu_cs_query_fence_status(&fence_status, 1118 AMDGPU_TIMEOUT_INFINITE, 1119 0, &expired); 1120 CU_ASSERT_EQUAL(r, 0); 1121 1122 r = amdgpu_bo_list_destroy(bo_list); 1123 CU_ASSERT_EQUAL(r, 0); 1124 1125 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1126 ib_result_mc_address, 4096); 1127 CU_ASSERT_EQUAL(r, 0); 1128 } 1129 1130 r = amdgpu_cs_ctx_free(context_handle); 1131 CU_ASSERT_EQUAL(r, 0); 1132} 1133 1134static void amdgpu_command_submission_compute_cp_write_data(void) 1135{ 1136 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 1137} 1138 1139static void amdgpu_command_submission_compute_cp_const_fill(void) 1140{ 1141 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 1142} 1143 1144static void amdgpu_command_submission_compute_cp_copy_data(void) 1145{ 1146 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 1147} 1148 1149static void amdgpu_command_submission_compute(void) 1150{ 1151 /* write data using the CP */ 1152 amdgpu_command_submission_compute_cp_write_data(); 1153 /* const fill using the CP */ 1154 amdgpu_command_submission_compute_cp_const_fill(); 1155 /* copy data using the CP */ 1156 amdgpu_command_submission_compute_cp_copy_data(); 1157 /* nop test */ 1158 amdgpu_command_submission_compute_nop(); 1159} 1160 1161/* 1162 * caller need create/release: 1163 * pm4_src, resources, ib_info, and ibs_request 1164 * submit command stream described in ibs_request and wait for this IB accomplished 1165 */ 1166static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 1167 unsigned ip_type, 1168 int instance, int pm4_dw, uint32_t *pm4_src, 1169 int res_cnt, amdgpu_bo_handle *resources, 1170 struct amdgpu_cs_ib_info *ib_info, 1171 struct amdgpu_cs_request *ibs_request) 1172{ 1173 int r; 1174 uint32_t expired; 1175 uint32_t *ring_ptr; 1176 amdgpu_bo_handle ib_result_handle; 1177 void *ib_result_cpu; 1178 uint64_t ib_result_mc_address; 1179 struct amdgpu_cs_fence fence_status = {0}; 1180 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 1181 amdgpu_va_handle va_handle; 1182 1183 /* prepare CS */ 1184 CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 1185 CU_ASSERT_NOT_EQUAL(resources, NULL); 1186 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1187 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1188 CU_ASSERT_TRUE(pm4_dw <= 1024); 1189 1190 /* allocate IB */ 1191 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1192 AMDGPU_GEM_DOMAIN_GTT, 0, 1193 &ib_result_handle, &ib_result_cpu, 1194 &ib_result_mc_address, &va_handle); 1195 CU_ASSERT_EQUAL(r, 0); 1196 1197 /* copy PM4 packet to ring from caller */ 1198 ring_ptr = ib_result_cpu; 1199 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 1200 1201 ib_info->ib_mc_address = ib_result_mc_address; 1202 ib_info->size = pm4_dw; 1203 1204 ibs_request->ip_type = ip_type; 1205 ibs_request->ring = instance; 1206 ibs_request->number_of_ibs = 1; 1207 ibs_request->ibs = ib_info; 1208 ibs_request->fence_info.handle = NULL; 1209 1210 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 1211 all_res[res_cnt] = ib_result_handle; 1212 1213 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 1214 NULL, &ibs_request->resources); 1215 CU_ASSERT_EQUAL(r, 0); 1216 1217 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1218 1219 /* submit CS */ 1220 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 1221 CU_ASSERT_EQUAL(r, 0); 1222 1223 r = amdgpu_bo_list_destroy(ibs_request->resources); 1224 CU_ASSERT_EQUAL(r, 0); 1225 1226 fence_status.ip_type = ip_type; 1227 fence_status.ip_instance = 0; 1228 fence_status.ring = ibs_request->ring; 1229 fence_status.context = context_handle; 1230 fence_status.fence = ibs_request->seq_no; 1231 1232 /* wait for IB accomplished */ 1233 r = amdgpu_cs_query_fence_status(&fence_status, 1234 AMDGPU_TIMEOUT_INFINITE, 1235 0, &expired); 1236 CU_ASSERT_EQUAL(r, 0); 1237 CU_ASSERT_EQUAL(expired, true); 1238 1239 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1240 ib_result_mc_address, 4096); 1241 CU_ASSERT_EQUAL(r, 0); 1242} 1243 1244static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 1245{ 1246 const int sdma_write_length = 128; 1247 const int pm4_dw = 256; 1248 amdgpu_context_handle context_handle; 1249 amdgpu_bo_handle bo; 1250 amdgpu_bo_handle *resources; 1251 uint32_t *pm4; 1252 struct amdgpu_cs_ib_info *ib_info; 1253 struct amdgpu_cs_request *ibs_request; 1254 uint64_t bo_mc; 1255 volatile uint32_t *bo_cpu; 1256 int i, j, r, loop, ring_id; 1257 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1258 amdgpu_va_handle va_handle; 1259 struct drm_amdgpu_info_hw_ip hw_ip_info; 1260 1261 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1262 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1263 1264 ib_info = calloc(1, sizeof(*ib_info)); 1265 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1266 1267 ibs_request = calloc(1, sizeof(*ibs_request)); 1268 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1269 1270 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1271 CU_ASSERT_EQUAL(r, 0); 1272 1273 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1274 CU_ASSERT_EQUAL(r, 0); 1275 1276 /* prepare resource */ 1277 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1278 CU_ASSERT_NOT_EQUAL(resources, NULL); 1279 1280 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1281 loop = 0; 1282 while(loop < 2) { 1283 /* allocate UC bo for sDMA use */ 1284 r = amdgpu_bo_alloc_and_map(device_handle, 1285 sdma_write_length * sizeof(uint32_t), 1286 4096, AMDGPU_GEM_DOMAIN_GTT, 1287 gtt_flags[loop], &bo, (void**)&bo_cpu, 1288 &bo_mc, &va_handle); 1289 CU_ASSERT_EQUAL(r, 0); 1290 1291 /* clear bo */ 1292 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 1293 1294 resources[0] = bo; 1295 1296 /* fulfill PM4: test DMA write-linear */ 1297 i = j = 0; 1298 if (ip_type == AMDGPU_HW_IP_DMA) { 1299 if (family_id == AMDGPU_FAMILY_SI) 1300 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1301 sdma_write_length); 1302 else 1303 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1304 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 1305 pm4[i++] = 0xffffffff & bo_mc; 1306 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1307 if (family_id >= AMDGPU_FAMILY_AI) 1308 pm4[i++] = sdma_write_length - 1; 1309 else if (family_id != AMDGPU_FAMILY_SI) 1310 pm4[i++] = sdma_write_length; 1311 while(j++ < sdma_write_length) 1312 pm4[i++] = 0xdeadbeaf; 1313 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1314 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1315 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 1316 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1317 pm4[i++] = 0xfffffffc & bo_mc; 1318 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1319 while(j++ < sdma_write_length) 1320 pm4[i++] = 0xdeadbeaf; 1321 } 1322 1323 amdgpu_test_exec_cs_helper(context_handle, 1324 ip_type, ring_id, 1325 i, pm4, 1326 1, resources, 1327 ib_info, ibs_request); 1328 1329 /* verify if SDMA test result meets with expected */ 1330 i = 0; 1331 while(i < sdma_write_length) { 1332 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1333 } 1334 1335 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1336 sdma_write_length * sizeof(uint32_t)); 1337 CU_ASSERT_EQUAL(r, 0); 1338 loop++; 1339 } 1340 } 1341 /* clean resources */ 1342 free(resources); 1343 free(ibs_request); 1344 free(ib_info); 1345 free(pm4); 1346 1347 /* end of test */ 1348 r = amdgpu_cs_ctx_free(context_handle); 1349 CU_ASSERT_EQUAL(r, 0); 1350} 1351 1352static void amdgpu_command_submission_sdma_write_linear(void) 1353{ 1354 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 1355} 1356 1357static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 1358{ 1359 const int sdma_write_length = 1024 * 1024; 1360 const int pm4_dw = 256; 1361 amdgpu_context_handle context_handle; 1362 amdgpu_bo_handle bo; 1363 amdgpu_bo_handle *resources; 1364 uint32_t *pm4; 1365 struct amdgpu_cs_ib_info *ib_info; 1366 struct amdgpu_cs_request *ibs_request; 1367 uint64_t bo_mc; 1368 volatile uint32_t *bo_cpu; 1369 int i, j, r, loop, ring_id; 1370 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1371 amdgpu_va_handle va_handle; 1372 struct drm_amdgpu_info_hw_ip hw_ip_info; 1373 1374 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1375 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1376 1377 ib_info = calloc(1, sizeof(*ib_info)); 1378 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1379 1380 ibs_request = calloc(1, sizeof(*ibs_request)); 1381 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1382 1383 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1384 CU_ASSERT_EQUAL(r, 0); 1385 1386 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1387 CU_ASSERT_EQUAL(r, 0); 1388 1389 /* prepare resource */ 1390 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1391 CU_ASSERT_NOT_EQUAL(resources, NULL); 1392 1393 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1394 loop = 0; 1395 while(loop < 2) { 1396 /* allocate UC bo for sDMA use */ 1397 r = amdgpu_bo_alloc_and_map(device_handle, 1398 sdma_write_length, 4096, 1399 AMDGPU_GEM_DOMAIN_GTT, 1400 gtt_flags[loop], &bo, (void**)&bo_cpu, 1401 &bo_mc, &va_handle); 1402 CU_ASSERT_EQUAL(r, 0); 1403 1404 /* clear bo */ 1405 memset((void*)bo_cpu, 0, sdma_write_length); 1406 1407 resources[0] = bo; 1408 1409 /* fulfill PM4: test DMA const fill */ 1410 i = j = 0; 1411 if (ip_type == AMDGPU_HW_IP_DMA) { 1412 if (family_id == AMDGPU_FAMILY_SI) { 1413 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 1414 0, 0, 0, 1415 sdma_write_length / 4); 1416 pm4[i++] = 0xfffffffc & bo_mc; 1417 pm4[i++] = 0xdeadbeaf; 1418 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 1419 } else { 1420 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 1421 SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 1422 pm4[i++] = 0xffffffff & bo_mc; 1423 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1424 pm4[i++] = 0xdeadbeaf; 1425 if (family_id >= AMDGPU_FAMILY_AI) 1426 pm4[i++] = sdma_write_length - 1; 1427 else 1428 pm4[i++] = sdma_write_length; 1429 } 1430 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1431 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1432 if (family_id == AMDGPU_FAMILY_SI) { 1433 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1434 pm4[i++] = 0xdeadbeaf; 1435 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1436 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1437 PACKET3_DMA_DATA_SI_SRC_SEL(2) | 1438 PACKET3_DMA_DATA_SI_CP_SYNC; 1439 pm4[i++] = 0xffffffff & bo_mc; 1440 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1441 pm4[i++] = sdma_write_length; 1442 } else { 1443 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1444 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1445 PACKET3_DMA_DATA_DST_SEL(0) | 1446 PACKET3_DMA_DATA_SRC_SEL(2) | 1447 PACKET3_DMA_DATA_CP_SYNC; 1448 pm4[i++] = 0xdeadbeaf; 1449 pm4[i++] = 0; 1450 pm4[i++] = 0xfffffffc & bo_mc; 1451 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1452 pm4[i++] = sdma_write_length; 1453 } 1454 } 1455 1456 amdgpu_test_exec_cs_helper(context_handle, 1457 ip_type, ring_id, 1458 i, pm4, 1459 1, resources, 1460 ib_info, ibs_request); 1461 1462 /* verify if SDMA test result meets with expected */ 1463 i = 0; 1464 while(i < (sdma_write_length / 4)) { 1465 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1466 } 1467 1468 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1469 sdma_write_length); 1470 CU_ASSERT_EQUAL(r, 0); 1471 loop++; 1472 } 1473 } 1474 /* clean resources */ 1475 free(resources); 1476 free(ibs_request); 1477 free(ib_info); 1478 free(pm4); 1479 1480 /* end of test */ 1481 r = amdgpu_cs_ctx_free(context_handle); 1482 CU_ASSERT_EQUAL(r, 0); 1483} 1484 1485static void amdgpu_command_submission_sdma_const_fill(void) 1486{ 1487 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 1488} 1489 1490static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 1491{ 1492 const int sdma_write_length = 1024; 1493 const int pm4_dw = 256; 1494 amdgpu_context_handle context_handle; 1495 amdgpu_bo_handle bo1, bo2; 1496 amdgpu_bo_handle *resources; 1497 uint32_t *pm4; 1498 struct amdgpu_cs_ib_info *ib_info; 1499 struct amdgpu_cs_request *ibs_request; 1500 uint64_t bo1_mc, bo2_mc; 1501 volatile unsigned char *bo1_cpu, *bo2_cpu; 1502 int i, j, r, loop1, loop2, ring_id; 1503 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1504 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 1505 struct drm_amdgpu_info_hw_ip hw_ip_info; 1506 1507 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1508 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1509 1510 ib_info = calloc(1, sizeof(*ib_info)); 1511 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1512 1513 ibs_request = calloc(1, sizeof(*ibs_request)); 1514 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1515 1516 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1517 CU_ASSERT_EQUAL(r, 0); 1518 1519 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1520 CU_ASSERT_EQUAL(r, 0); 1521 1522 /* prepare resource */ 1523 resources = calloc(2, sizeof(amdgpu_bo_handle)); 1524 CU_ASSERT_NOT_EQUAL(resources, NULL); 1525 1526 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1527 loop1 = loop2 = 0; 1528 /* run 9 circle to test all mapping combination */ 1529 while(loop1 < 2) { 1530 while(loop2 < 2) { 1531 /* allocate UC bo1for sDMA use */ 1532 r = amdgpu_bo_alloc_and_map(device_handle, 1533 sdma_write_length, 4096, 1534 AMDGPU_GEM_DOMAIN_GTT, 1535 gtt_flags[loop1], &bo1, 1536 (void**)&bo1_cpu, &bo1_mc, 1537 &bo1_va_handle); 1538 CU_ASSERT_EQUAL(r, 0); 1539 1540 /* set bo1 */ 1541 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 1542 1543 /* allocate UC bo2 for sDMA use */ 1544 r = amdgpu_bo_alloc_and_map(device_handle, 1545 sdma_write_length, 4096, 1546 AMDGPU_GEM_DOMAIN_GTT, 1547 gtt_flags[loop2], &bo2, 1548 (void**)&bo2_cpu, &bo2_mc, 1549 &bo2_va_handle); 1550 CU_ASSERT_EQUAL(r, 0); 1551 1552 /* clear bo2 */ 1553 memset((void*)bo2_cpu, 0, sdma_write_length); 1554 1555 resources[0] = bo1; 1556 resources[1] = bo2; 1557 1558 /* fulfill PM4: test DMA copy linear */ 1559 i = j = 0; 1560 if (ip_type == AMDGPU_HW_IP_DMA) { 1561 if (family_id == AMDGPU_FAMILY_SI) { 1562 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 1563 0, 0, 0, 1564 sdma_write_length); 1565 pm4[i++] = 0xffffffff & bo2_mc; 1566 pm4[i++] = 0xffffffff & bo1_mc; 1567 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1568 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1569 } else { 1570 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 1571 SDMA_COPY_SUB_OPCODE_LINEAR, 1572 0); 1573 if (family_id >= AMDGPU_FAMILY_AI) 1574 pm4[i++] = sdma_write_length - 1; 1575 else 1576 pm4[i++] = sdma_write_length; 1577 pm4[i++] = 0; 1578 pm4[i++] = 0xffffffff & bo1_mc; 1579 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1580 pm4[i++] = 0xffffffff & bo2_mc; 1581 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1582 } 1583 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1584 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1585 if (family_id == AMDGPU_FAMILY_SI) { 1586 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1587 pm4[i++] = 0xfffffffc & bo1_mc; 1588 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1589 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1590 PACKET3_DMA_DATA_SI_SRC_SEL(0) | 1591 PACKET3_DMA_DATA_SI_CP_SYNC | 1592 (0xffff00000000 & bo1_mc) >> 32; 1593 pm4[i++] = 0xfffffffc & bo2_mc; 1594 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1595 pm4[i++] = sdma_write_length; 1596 } else { 1597 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1598 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1599 PACKET3_DMA_DATA_DST_SEL(0) | 1600 PACKET3_DMA_DATA_SRC_SEL(0) | 1601 PACKET3_DMA_DATA_CP_SYNC; 1602 pm4[i++] = 0xfffffffc & bo1_mc; 1603 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1604 pm4[i++] = 0xfffffffc & bo2_mc; 1605 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1606 pm4[i++] = sdma_write_length; 1607 } 1608 } 1609 1610 amdgpu_test_exec_cs_helper(context_handle, 1611 ip_type, ring_id, 1612 i, pm4, 1613 2, resources, 1614 ib_info, ibs_request); 1615 1616 /* verify if SDMA test result meets with expected */ 1617 i = 0; 1618 while(i < sdma_write_length) { 1619 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 1620 } 1621 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 1622 sdma_write_length); 1623 CU_ASSERT_EQUAL(r, 0); 1624 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 1625 sdma_write_length); 1626 CU_ASSERT_EQUAL(r, 0); 1627 loop2++; 1628 } 1629 loop1++; 1630 } 1631 } 1632 /* clean resources */ 1633 free(resources); 1634 free(ibs_request); 1635 free(ib_info); 1636 free(pm4); 1637 1638 /* end of test */ 1639 r = amdgpu_cs_ctx_free(context_handle); 1640 CU_ASSERT_EQUAL(r, 0); 1641} 1642 1643static void amdgpu_command_submission_sdma_copy_linear(void) 1644{ 1645 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 1646} 1647 1648static void amdgpu_command_submission_sdma(void) 1649{ 1650 amdgpu_command_submission_sdma_write_linear(); 1651 amdgpu_command_submission_sdma_const_fill(); 1652 amdgpu_command_submission_sdma_copy_linear(); 1653} 1654 1655static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1656{ 1657 amdgpu_context_handle context_handle; 1658 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1659 void *ib_result_cpu, *ib_result_ce_cpu; 1660 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1661 struct amdgpu_cs_request ibs_request[2] = {0}; 1662 struct amdgpu_cs_ib_info ib_info[2]; 1663 struct amdgpu_cs_fence fence_status[2] = {0}; 1664 uint32_t *ptr; 1665 uint32_t expired; 1666 amdgpu_bo_list_handle bo_list; 1667 amdgpu_va_handle va_handle, va_handle_ce; 1668 int r; 1669 int i = 0, ib_cs_num = 2; 1670 1671 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1672 CU_ASSERT_EQUAL(r, 0); 1673 1674 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1675 AMDGPU_GEM_DOMAIN_GTT, 0, 1676 &ib_result_handle, &ib_result_cpu, 1677 &ib_result_mc_address, &va_handle); 1678 CU_ASSERT_EQUAL(r, 0); 1679 1680 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1681 AMDGPU_GEM_DOMAIN_GTT, 0, 1682 &ib_result_ce_handle, &ib_result_ce_cpu, 1683 &ib_result_ce_mc_address, &va_handle_ce); 1684 CU_ASSERT_EQUAL(r, 0); 1685 1686 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1687 ib_result_ce_handle, &bo_list); 1688 CU_ASSERT_EQUAL(r, 0); 1689 1690 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1691 1692 /* IT_SET_CE_DE_COUNTERS */ 1693 ptr = ib_result_ce_cpu; 1694 if (family_id != AMDGPU_FAMILY_SI) { 1695 ptr[i++] = 0xc0008900; 1696 ptr[i++] = 0; 1697 } 1698 ptr[i++] = 0xc0008400; 1699 ptr[i++] = 1; 1700 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1701 ib_info[0].size = i; 1702 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1703 1704 /* IT_WAIT_ON_CE_COUNTER */ 1705 ptr = ib_result_cpu; 1706 ptr[0] = 0xc0008600; 1707 ptr[1] = 0x00000001; 1708 ib_info[1].ib_mc_address = ib_result_mc_address; 1709 ib_info[1].size = 2; 1710 1711 for (i = 0; i < ib_cs_num; i++) { 1712 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1713 ibs_request[i].number_of_ibs = 2; 1714 ibs_request[i].ibs = ib_info; 1715 ibs_request[i].resources = bo_list; 1716 ibs_request[i].fence_info.handle = NULL; 1717 } 1718 1719 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1720 1721 CU_ASSERT_EQUAL(r, 0); 1722 1723 for (i = 0; i < ib_cs_num; i++) { 1724 fence_status[i].context = context_handle; 1725 fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1726 fence_status[i].fence = ibs_request[i].seq_no; 1727 } 1728 1729 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1730 AMDGPU_TIMEOUT_INFINITE, 1731 &expired, NULL); 1732 CU_ASSERT_EQUAL(r, 0); 1733 1734 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1735 ib_result_mc_address, 4096); 1736 CU_ASSERT_EQUAL(r, 0); 1737 1738 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 1739 ib_result_ce_mc_address, 4096); 1740 CU_ASSERT_EQUAL(r, 0); 1741 1742 r = amdgpu_bo_list_destroy(bo_list); 1743 CU_ASSERT_EQUAL(r, 0); 1744 1745 r = amdgpu_cs_ctx_free(context_handle); 1746 CU_ASSERT_EQUAL(r, 0); 1747} 1748 1749static void amdgpu_command_submission_multi_fence(void) 1750{ 1751 amdgpu_command_submission_multi_fence_wait_all(true); 1752 amdgpu_command_submission_multi_fence_wait_all(false); 1753} 1754 1755static void amdgpu_userptr_test(void) 1756{ 1757 int i, r, j; 1758 uint32_t *pm4 = NULL; 1759 uint64_t bo_mc; 1760 void *ptr = NULL; 1761 int pm4_dw = 256; 1762 int sdma_write_length = 4; 1763 amdgpu_bo_handle handle; 1764 amdgpu_context_handle context_handle; 1765 struct amdgpu_cs_ib_info *ib_info; 1766 struct amdgpu_cs_request *ibs_request; 1767 amdgpu_bo_handle buf_handle; 1768 amdgpu_va_handle va_handle; 1769 1770 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1771 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1772 1773 ib_info = calloc(1, sizeof(*ib_info)); 1774 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1775 1776 ibs_request = calloc(1, sizeof(*ibs_request)); 1777 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1778 1779 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1780 CU_ASSERT_EQUAL(r, 0); 1781 1782 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 1783 CU_ASSERT_NOT_EQUAL(ptr, NULL); 1784 memset(ptr, 0, BUFFER_SIZE); 1785 1786 r = amdgpu_create_bo_from_user_mem(device_handle, 1787 ptr, BUFFER_SIZE, &buf_handle); 1788 CU_ASSERT_EQUAL(r, 0); 1789 1790 r = amdgpu_va_range_alloc(device_handle, 1791 amdgpu_gpu_va_range_general, 1792 BUFFER_SIZE, 1, 0, &bo_mc, 1793 &va_handle, 0); 1794 CU_ASSERT_EQUAL(r, 0); 1795 1796 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 1797 CU_ASSERT_EQUAL(r, 0); 1798 1799 handle = buf_handle; 1800 1801 j = i = 0; 1802 1803 if (family_id == AMDGPU_FAMILY_SI) 1804 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1805 sdma_write_length); 1806 else 1807 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1808 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 1809 pm4[i++] = 0xffffffff & bo_mc; 1810 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1811 if (family_id >= AMDGPU_FAMILY_AI) 1812 pm4[i++] = sdma_write_length - 1; 1813 else if (family_id != AMDGPU_FAMILY_SI) 1814 pm4[i++] = sdma_write_length; 1815 1816 while (j++ < sdma_write_length) 1817 pm4[i++] = 0xdeadbeaf; 1818 1819 if (!fork()) { 1820 pm4[0] = 0x0; 1821 exit(0); 1822 } 1823 1824 amdgpu_test_exec_cs_helper(context_handle, 1825 AMDGPU_HW_IP_DMA, 0, 1826 i, pm4, 1827 1, &handle, 1828 ib_info, ibs_request); 1829 i = 0; 1830 while (i < sdma_write_length) { 1831 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 1832 } 1833 free(ibs_request); 1834 free(ib_info); 1835 free(pm4); 1836 1837 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 1838 CU_ASSERT_EQUAL(r, 0); 1839 r = amdgpu_va_range_free(va_handle); 1840 CU_ASSERT_EQUAL(r, 0); 1841 r = amdgpu_bo_free(buf_handle); 1842 CU_ASSERT_EQUAL(r, 0); 1843 free(ptr); 1844 1845 r = amdgpu_cs_ctx_free(context_handle); 1846 CU_ASSERT_EQUAL(r, 0); 1847 1848 wait(NULL); 1849} 1850 1851static void amdgpu_sync_dependency_test(void) 1852{ 1853 amdgpu_context_handle context_handle[2]; 1854 amdgpu_bo_handle ib_result_handle; 1855 void *ib_result_cpu; 1856 uint64_t ib_result_mc_address; 1857 struct amdgpu_cs_request ibs_request; 1858 struct amdgpu_cs_ib_info ib_info; 1859 struct amdgpu_cs_fence fence_status; 1860 uint32_t expired; 1861 int i, j, r; 1862 amdgpu_bo_list_handle bo_list; 1863 amdgpu_va_handle va_handle; 1864 static uint32_t *ptr; 1865 uint64_t seq_no; 1866 1867 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 1868 CU_ASSERT_EQUAL(r, 0); 1869 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 1870 CU_ASSERT_EQUAL(r, 0); 1871 1872 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 1873 AMDGPU_GEM_DOMAIN_GTT, 0, 1874 &ib_result_handle, &ib_result_cpu, 1875 &ib_result_mc_address, &va_handle); 1876 CU_ASSERT_EQUAL(r, 0); 1877 1878 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 1879 &bo_list); 1880 CU_ASSERT_EQUAL(r, 0); 1881 1882 ptr = ib_result_cpu; 1883 i = 0; 1884 1885 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 1886 1887 /* Dispatch minimal init config and verify it's executed */ 1888 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 1889 ptr[i++] = 0x80000000; 1890 ptr[i++] = 0x80000000; 1891 1892 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 1893 ptr[i++] = 0x80000000; 1894 1895 1896 /* Program compute regs */ 1897 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 1898 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1899 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 1900 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 1901 1902 1903 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 1904 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 1905 /* 1906 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 1907 SGPRS = 1 1908 PRIORITY = 0 1909 FLOAT_MODE = 192 (0xc0) 1910 PRIV = 0 1911 DX10_CLAMP = 1 1912 DEBUG_MODE = 0 1913 IEEE_MODE = 0 1914 BULKY = 0 1915 CDBG_USER = 0 1916 * 1917 */ 1918 ptr[i++] = 0x002c0040; 1919 1920 1921 /* 1922 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 1923 USER_SGPR = 8 1924 TRAP_PRESENT = 0 1925 TGID_X_EN = 0 1926 TGID_Y_EN = 0 1927 TGID_Z_EN = 0 1928 TG_SIZE_EN = 0 1929 TIDIG_COMP_CNT = 0 1930 EXCP_EN_MSB = 0 1931 LDS_SIZE = 0 1932 EXCP_EN = 0 1933 * 1934 */ 1935 ptr[i++] = 0x00000010; 1936 1937 1938/* 1939 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 1940 WAVESIZE = 0 1941 * 1942 */ 1943 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 1944 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 1945 ptr[i++] = 0x00000100; 1946 1947 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 1948 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 1949 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 1950 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 1951 1952 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 1953 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 1954 ptr[i++] = 0; 1955 1956 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 1957 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 1958 ptr[i++] = 1; 1959 ptr[i++] = 1; 1960 ptr[i++] = 1; 1961 1962 1963 /* Dispatch */ 1964 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1965 ptr[i++] = 1; 1966 ptr[i++] = 1; 1967 ptr[i++] = 1; 1968 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 1969 1970 1971 while (i & 7) 1972 ptr[i++] = 0xffff1000; /* type3 nop packet */ 1973 1974 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 1975 ib_info.ib_mc_address = ib_result_mc_address; 1976 ib_info.size = i; 1977 1978 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 1979 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 1980 ibs_request.ring = 0; 1981 ibs_request.number_of_ibs = 1; 1982 ibs_request.ibs = &ib_info; 1983 ibs_request.resources = bo_list; 1984 ibs_request.fence_info.handle = NULL; 1985 1986 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 1987 CU_ASSERT_EQUAL(r, 0); 1988 seq_no = ibs_request.seq_no; 1989 1990 1991 1992 /* Prepare second command with dependency on the first */ 1993 j = i; 1994 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 1995 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1996 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 1997 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 1998 ptr[i++] = 99; 1999 2000 while (i & 7) 2001 ptr[i++] = 0xffff1000; /* type3 nop packet */ 2002 2003 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 2004 ib_info.ib_mc_address = ib_result_mc_address + j * 4; 2005 ib_info.size = i - j; 2006 2007 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 2008 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 2009 ibs_request.ring = 0; 2010 ibs_request.number_of_ibs = 1; 2011 ibs_request.ibs = &ib_info; 2012 ibs_request.resources = bo_list; 2013 ibs_request.fence_info.handle = NULL; 2014 2015 ibs_request.number_of_dependencies = 1; 2016 2017 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 2018 ibs_request.dependencies[0].context = context_handle[1]; 2019 ibs_request.dependencies[0].ip_instance = 0; 2020 ibs_request.dependencies[0].ring = 0; 2021 ibs_request.dependencies[0].fence = seq_no; 2022 2023 2024 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 2025 CU_ASSERT_EQUAL(r, 0); 2026 2027 2028 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 2029 fence_status.context = context_handle[0]; 2030 fence_status.ip_type = AMDGPU_HW_IP_GFX; 2031 fence_status.ip_instance = 0; 2032 fence_status.ring = 0; 2033 fence_status.fence = ibs_request.seq_no; 2034 2035 r = amdgpu_cs_query_fence_status(&fence_status, 2036 AMDGPU_TIMEOUT_INFINITE,0, &expired); 2037 CU_ASSERT_EQUAL(r, 0); 2038 2039 /* Expect the second command to wait for shader to complete */ 2040 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 2041 2042 r = amdgpu_bo_list_destroy(bo_list); 2043 CU_ASSERT_EQUAL(r, 0); 2044 2045 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 2046 ib_result_mc_address, 4096); 2047 CU_ASSERT_EQUAL(r, 0); 2048 2049 r = amdgpu_cs_ctx_free(context_handle[0]); 2050 CU_ASSERT_EQUAL(r, 0); 2051 r = amdgpu_cs_ctx_free(context_handle[1]); 2052 CU_ASSERT_EQUAL(r, 0); 2053 2054 free(ibs_request.dependencies); 2055} 2056 2057static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, 2058 int cs_type) 2059{ 2060 uint32_t shader_size; 2061 const uint32_t *shader; 2062 2063 switch (cs_type) { 2064 case CS_BUFFERCLEAR: 2065 shader = bufferclear_cs_shader_gfx9; 2066 shader_size = sizeof(bufferclear_cs_shader_gfx9); 2067 break; 2068 case CS_BUFFERCOPY: 2069 shader = buffercopy_cs_shader_gfx9; 2070 shader_size = sizeof(buffercopy_cs_shader_gfx9); 2071 break; 2072 default: 2073 return -1; 2074 break; 2075 } 2076 2077 memcpy(ptr, shader, shader_size); 2078 return 0; 2079} 2080 2081static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type) 2082{ 2083 int i = 0; 2084 2085 /* Write context control and load shadowing register if necessary */ 2086 if (ip_type == AMDGPU_HW_IP_GFX) { 2087 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 2088 ptr[i++] = 0x80000000; 2089 ptr[i++] = 0x80000000; 2090 } 2091 2092 /* Issue commands to set default compute state. */ 2093 /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ 2094 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3); 2095 ptr[i++] = 0x204; 2096 i += 3; 2097 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 2098 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2099 ptr[i++] = 0x215; 2100 ptr[i++] = 0; 2101 /* clear mmCOMPUTE_TMPRING_SIZE */ 2102 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2103 ptr[i++] = 0x218; 2104 ptr[i++] = 0; 2105 2106 return i; 2107} 2108 2109static int amdgpu_dispatch_write_cumask(uint32_t *ptr) 2110{ 2111 int i = 0; 2112 2113 /* Issue commands to set cu mask used in current dispatch */ 2114 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 2115 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2116 ptr[i++] = 0x216; 2117 ptr[i++] = 0xffffffff; 2118 ptr[i++] = 0xffffffff; 2119 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 2120 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2121 ptr[i++] = 0x219; 2122 ptr[i++] = 0xffffffff; 2123 ptr[i++] = 0xffffffff; 2124 2125 return i; 2126} 2127 2128static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr) 2129{ 2130 int i, j; 2131 2132 i = 0; 2133 2134 /* Writes shader state to HW */ 2135 /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ 2136 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2137 ptr[i++] = 0x20c; 2138 ptr[i++] = (shader_addr >> 8); 2139 ptr[i++] = (shader_addr >> 40); 2140 /* write sh regs*/ 2141 for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) { 2142 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2143 /* - Gfx9ShRegBase */ 2144 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00; 2145 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; 2146 } 2147 2148 return i; 2149} 2150 2151static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, 2152 uint32_t ip_type, 2153 uint32_t ring) 2154{ 2155 amdgpu_context_handle context_handle; 2156 amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; 2157 volatile unsigned char *ptr_dst; 2158 void *ptr_shader; 2159 uint32_t *ptr_cmd; 2160 uint64_t mc_address_dst, mc_address_shader, mc_address_cmd; 2161 amdgpu_va_handle va_dst, va_shader, va_cmd; 2162 int i, r; 2163 int bo_dst_size = 16384; 2164 int bo_shader_size = 4096; 2165 int bo_cmd_size = 4096; 2166 struct amdgpu_cs_request ibs_request = {0}; 2167 struct amdgpu_cs_ib_info ib_info= {0}; 2168 amdgpu_bo_list_handle bo_list; 2169 struct amdgpu_cs_fence fence_status = {0}; 2170 uint32_t expired; 2171 2172 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2173 CU_ASSERT_EQUAL(r, 0); 2174 2175 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2176 AMDGPU_GEM_DOMAIN_GTT, 0, 2177 &bo_cmd, (void **)&ptr_cmd, 2178 &mc_address_cmd, &va_cmd); 2179 CU_ASSERT_EQUAL(r, 0); 2180 memset(ptr_cmd, 0, bo_cmd_size); 2181 2182 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2183 AMDGPU_GEM_DOMAIN_VRAM, 0, 2184 &bo_shader, &ptr_shader, 2185 &mc_address_shader, &va_shader); 2186 CU_ASSERT_EQUAL(r, 0); 2187 2188 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR); 2189 CU_ASSERT_EQUAL(r, 0); 2190 2191 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2192 AMDGPU_GEM_DOMAIN_VRAM, 0, 2193 &bo_dst, (void **)&ptr_dst, 2194 &mc_address_dst, &va_dst); 2195 CU_ASSERT_EQUAL(r, 0); 2196 2197 i = 0; 2198 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 2199 2200 /* Issue commands to set cu mask used in current dispatch */ 2201 i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 2202 2203 /* Writes shader state to HW */ 2204 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 2205 2206 /* Write constant data */ 2207 /* Writes the UAV constant data to the SGPRs. */ 2208 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2209 ptr_cmd[i++] = 0x240; 2210 ptr_cmd[i++] = mc_address_dst; 2211 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2212 ptr_cmd[i++] = 0x400; 2213 ptr_cmd[i++] = 0x74fac; 2214 2215 /* Sets a range of pixel shader constants */ 2216 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2217 ptr_cmd[i++] = 0x244; 2218 ptr_cmd[i++] = 0x22222222; 2219 ptr_cmd[i++] = 0x22222222; 2220 ptr_cmd[i++] = 0x22222222; 2221 ptr_cmd[i++] = 0x22222222; 2222 2223 /* dispatch direct command */ 2224 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2225 ptr_cmd[i++] = 0x10; 2226 ptr_cmd[i++] = 1; 2227 ptr_cmd[i++] = 1; 2228 ptr_cmd[i++] = 1; 2229 2230 while (i & 7) 2231 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2232 2233 resources[0] = bo_dst; 2234 resources[1] = bo_shader; 2235 resources[2] = bo_cmd; 2236 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 2237 CU_ASSERT_EQUAL(r, 0); 2238 2239 ib_info.ib_mc_address = mc_address_cmd; 2240 ib_info.size = i; 2241 ibs_request.ip_type = ip_type; 2242 ibs_request.ring = ring; 2243 ibs_request.resources = bo_list; 2244 ibs_request.number_of_ibs = 1; 2245 ibs_request.ibs = &ib_info; 2246 ibs_request.fence_info.handle = NULL; 2247 2248 /* submit CS */ 2249 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2250 CU_ASSERT_EQUAL(r, 0); 2251 2252 r = amdgpu_bo_list_destroy(bo_list); 2253 CU_ASSERT_EQUAL(r, 0); 2254 2255 fence_status.ip_type = ip_type; 2256 fence_status.ip_instance = 0; 2257 fence_status.ring = ring; 2258 fence_status.context = context_handle; 2259 fence_status.fence = ibs_request.seq_no; 2260 2261 /* wait for IB accomplished */ 2262 r = amdgpu_cs_query_fence_status(&fence_status, 2263 AMDGPU_TIMEOUT_INFINITE, 2264 0, &expired); 2265 CU_ASSERT_EQUAL(r, 0); 2266 CU_ASSERT_EQUAL(expired, true); 2267 2268 /* verify if memset test result meets with expected */ 2269 i = 0; 2270 while(i < bo_dst_size) { 2271 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22); 2272 } 2273 2274 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2275 CU_ASSERT_EQUAL(r, 0); 2276 2277 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2278 CU_ASSERT_EQUAL(r, 0); 2279 2280 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2281 CU_ASSERT_EQUAL(r, 0); 2282 2283 r = amdgpu_cs_ctx_free(context_handle); 2284 CU_ASSERT_EQUAL(r, 0); 2285} 2286 2287static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, 2288 uint32_t ip_type, 2289 uint32_t ring) 2290{ 2291 amdgpu_context_handle context_handle; 2292 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 2293 volatile unsigned char *ptr_dst; 2294 void *ptr_shader; 2295 unsigned char *ptr_src; 2296 uint32_t *ptr_cmd; 2297 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 2298 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 2299 int i, r; 2300 int bo_dst_size = 16384; 2301 int bo_shader_size = 4096; 2302 int bo_cmd_size = 4096; 2303 struct amdgpu_cs_request ibs_request = {0}; 2304 struct amdgpu_cs_ib_info ib_info= {0}; 2305 uint32_t expired; 2306 amdgpu_bo_list_handle bo_list; 2307 struct amdgpu_cs_fence fence_status = {0}; 2308 2309 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2310 CU_ASSERT_EQUAL(r, 0); 2311 2312 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2313 AMDGPU_GEM_DOMAIN_GTT, 0, 2314 &bo_cmd, (void **)&ptr_cmd, 2315 &mc_address_cmd, &va_cmd); 2316 CU_ASSERT_EQUAL(r, 0); 2317 memset(ptr_cmd, 0, bo_cmd_size); 2318 2319 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2320 AMDGPU_GEM_DOMAIN_VRAM, 0, 2321 &bo_shader, &ptr_shader, 2322 &mc_address_shader, &va_shader); 2323 CU_ASSERT_EQUAL(r, 0); 2324 2325 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCOPY ); 2326 CU_ASSERT_EQUAL(r, 0); 2327 2328 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2329 AMDGPU_GEM_DOMAIN_VRAM, 0, 2330 &bo_src, (void **)&ptr_src, 2331 &mc_address_src, &va_src); 2332 CU_ASSERT_EQUAL(r, 0); 2333 2334 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2335 AMDGPU_GEM_DOMAIN_VRAM, 0, 2336 &bo_dst, (void **)&ptr_dst, 2337 &mc_address_dst, &va_dst); 2338 CU_ASSERT_EQUAL(r, 0); 2339 2340 memset(ptr_src, 0x55, bo_dst_size); 2341 2342 i = 0; 2343 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); 2344 2345 /* Issue commands to set cu mask used in current dispatch */ 2346 i += amdgpu_dispatch_write_cumask(ptr_cmd + i); 2347 2348 /* Writes shader state to HW */ 2349 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); 2350 2351 /* Write constant data */ 2352 /* Writes the texture resource constants data to the SGPRs */ 2353 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2354 ptr_cmd[i++] = 0x240; 2355 ptr_cmd[i++] = mc_address_src; 2356 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 2357 ptr_cmd[i++] = 0x400; 2358 ptr_cmd[i++] = 0x74fac; 2359 2360 /* Writes the UAV constant data to the SGPRs. */ 2361 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2362 ptr_cmd[i++] = 0x244; 2363 ptr_cmd[i++] = mc_address_dst; 2364 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2365 ptr_cmd[i++] = 0x400; 2366 ptr_cmd[i++] = 0x74fac; 2367 2368 /* dispatch direct command */ 2369 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2370 ptr_cmd[i++] = 0x10; 2371 ptr_cmd[i++] = 1; 2372 ptr_cmd[i++] = 1; 2373 ptr_cmd[i++] = 1; 2374 2375 while (i & 7) 2376 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2377 2378 resources[0] = bo_shader; 2379 resources[1] = bo_src; 2380 resources[2] = bo_dst; 2381 resources[3] = bo_cmd; 2382 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 2383 CU_ASSERT_EQUAL(r, 0); 2384 2385 ib_info.ib_mc_address = mc_address_cmd; 2386 ib_info.size = i; 2387 ibs_request.ip_type = ip_type; 2388 ibs_request.ring = ring; 2389 ibs_request.resources = bo_list; 2390 ibs_request.number_of_ibs = 1; 2391 ibs_request.ibs = &ib_info; 2392 ibs_request.fence_info.handle = NULL; 2393 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2394 CU_ASSERT_EQUAL(r, 0); 2395 2396 fence_status.ip_type = ip_type; 2397 fence_status.ip_instance = 0; 2398 fence_status.ring = ring; 2399 fence_status.context = context_handle; 2400 fence_status.fence = ibs_request.seq_no; 2401 2402 /* wait for IB accomplished */ 2403 r = amdgpu_cs_query_fence_status(&fence_status, 2404 AMDGPU_TIMEOUT_INFINITE, 2405 0, &expired); 2406 CU_ASSERT_EQUAL(r, 0); 2407 CU_ASSERT_EQUAL(expired, true); 2408 2409 /* verify if memcpy test result meets with expected */ 2410 i = 0; 2411 while(i < bo_dst_size) { 2412 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 2413 i++; 2414 } 2415 2416 r = amdgpu_bo_list_destroy(bo_list); 2417 CU_ASSERT_EQUAL(r, 0); 2418 2419 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 2420 CU_ASSERT_EQUAL(r, 0); 2421 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2422 CU_ASSERT_EQUAL(r, 0); 2423 2424 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2425 CU_ASSERT_EQUAL(r, 0); 2426 2427 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2428 CU_ASSERT_EQUAL(r, 0); 2429 2430 r = amdgpu_cs_ctx_free(context_handle); 2431 CU_ASSERT_EQUAL(r, 0); 2432} 2433static void amdgpu_dispatch_test(void) 2434{ 2435 int r; 2436 struct drm_amdgpu_info_hw_ip info; 2437 uint32_t ring_id; 2438 2439 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 2440 CU_ASSERT_EQUAL(r, 0); 2441 2442 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2443 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); 2444 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); 2445 } 2446 2447 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 2448 CU_ASSERT_EQUAL(r, 0); 2449 2450 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2451 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); 2452 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); 2453 } 2454} 2455 2456static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type) 2457{ 2458 int i; 2459 uint32_t shader_offset= 256; 2460 uint32_t mem_offset, patch_code_offset; 2461 uint32_t shader_size, patchinfo_code_size; 2462 const uint32_t *shader; 2463 const uint32_t *patchinfo_code; 2464 const uint32_t *patchcode_offset; 2465 2466 switch (ps_type) { 2467 case PS_CONST: 2468 shader = ps_const_shader_gfx9; 2469 shader_size = sizeof(ps_const_shader_gfx9); 2470 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; 2471 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; 2472 patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; 2473 break; 2474 case PS_TEX: 2475 shader = ps_tex_shader_gfx9; 2476 shader_size = sizeof(ps_tex_shader_gfx9); 2477 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; 2478 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; 2479 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; 2480 break; 2481 default: 2482 return -1; 2483 break; 2484 } 2485 2486 /* write main shader program */ 2487 for (i = 0 ; i < 10; i++) { 2488 mem_offset = i * shader_offset; 2489 memcpy(ptr + mem_offset, shader, shader_size); 2490 } 2491 2492 /* overwrite patch codes */ 2493 for (i = 0 ; i < 10; i++) { 2494 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t); 2495 patch_code_offset = i * patchinfo_code_size; 2496 memcpy(ptr + mem_offset, 2497 patchinfo_code + patch_code_offset, 2498 patchinfo_code_size * sizeof(uint32_t)); 2499 } 2500 2501 return 0; 2502} 2503 2504/* load RectPosTexFast_VS */ 2505static int amdgpu_draw_load_vs_shader(uint8_t *ptr) 2506{ 2507 const uint32_t *shader; 2508 uint32_t shader_size; 2509 2510 shader = vs_RectPosTexFast_shader_gfx9; 2511 shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); 2512 2513 memcpy(ptr, shader, shader_size); 2514 2515 return 0; 2516} 2517 2518static int amdgpu_draw_init(uint32_t *ptr) 2519{ 2520 int i = 0; 2521 const uint32_t *preamblecache_ptr; 2522 uint32_t preamblecache_size; 2523 2524 /* Write context control and load shadowing register if necessary */ 2525 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 2526 ptr[i++] = 0x80000000; 2527 ptr[i++] = 0x80000000; 2528 2529 preamblecache_ptr = preamblecache_gfx9; 2530 preamblecache_size = sizeof(preamblecache_gfx9); 2531 2532 memcpy(ptr + i, preamblecache_ptr, preamblecache_size); 2533 return i + preamblecache_size/sizeof(uint32_t); 2534} 2535 2536static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, 2537 uint64_t dst_addr) 2538{ 2539 int i = 0; 2540 2541 /* setup color buffer */ 2542 /* offset reg 2543 0xA318 CB_COLOR0_BASE 2544 0xA319 CB_COLOR0_BASE_EXT 2545 0xA31A CB_COLOR0_ATTRIB2 2546 0xA31B CB_COLOR0_VIEW 2547 0xA31C CB_COLOR0_INFO 2548 0xA31D CB_COLOR0_ATTRIB 2549 0xA31E CB_COLOR0_DCC_CONTROL 2550 0xA31F CB_COLOR0_CMASK 2551 0xA320 CB_COLOR0_CMASK_BASE_EXT 2552 0xA321 CB_COLOR0_FMASK 2553 0xA322 CB_COLOR0_FMASK_BASE_EXT 2554 0xA323 CB_COLOR0_CLEAR_WORD0 2555 0xA324 CB_COLOR0_CLEAR_WORD1 2556 0xA325 CB_COLOR0_DCC_BASE 2557 0xA326 CB_COLOR0_DCC_BASE_EXT */ 2558 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); 2559 ptr[i++] = 0x318; 2560 ptr[i++] = dst_addr >> 8; 2561 ptr[i++] = dst_addr >> 40; 2562 ptr[i++] = 0x7c01f; 2563 ptr[i++] = 0; 2564 ptr[i++] = 0x50438; 2565 ptr[i++] = 0x10140000; 2566 i += 9; 2567 2568 /* mmCB_MRT0_EPITCH */ 2569 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2570 ptr[i++] = 0x1e8; 2571 ptr[i++] = 0x1f; 2572 2573 /* 0xA32B CB_COLOR1_BASE */ 2574 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2575 ptr[i++] = 0x32b; 2576 ptr[i++] = 0; 2577 2578 /* 0xA33A CB_COLOR1_BASE */ 2579 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2580 ptr[i++] = 0x33a; 2581 ptr[i++] = 0; 2582 2583 /* SPI_SHADER_COL_FORMAT */ 2584 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2585 ptr[i++] = 0x1c5; 2586 ptr[i++] = 9; 2587 2588 /* Setup depth buffer */ 2589 /* mmDB_Z_INFO */ 2590 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 2591 ptr[i++] = 0xe; 2592 i += 2; 2593 2594 return i; 2595} 2596 2597static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr) 2598{ 2599 int i = 0; 2600 const uint32_t *cached_cmd_ptr; 2601 uint32_t cached_cmd_size; 2602 2603 /* mmPA_SC_TILE_STEERING_OVERRIDE */ 2604 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2605 ptr[i++] = 0xd7; 2606 ptr[i++] = 0; 2607 2608 ptr[i++] = 0xffff1000; 2609 ptr[i++] = 0xc0021000; 2610 2611 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2612 ptr[i++] = 0xd7; 2613 ptr[i++] = 1; 2614 2615 /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 2616 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); 2617 ptr[i++] = 0x2fe; 2618 i += 16; 2619 2620 /* mmPA_SC_CENTROID_PRIORITY_0 */ 2621 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 2622 ptr[i++] = 0x2f5; 2623 i += 2; 2624 2625 cached_cmd_ptr = cached_cmd_gfx9; 2626 cached_cmd_size = sizeof(cached_cmd_gfx9); 2627 2628 memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); 2629 i += cached_cmd_size/sizeof(uint32_t); 2630 2631 return i; 2632} 2633 2634static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, 2635 int ps_type, 2636 uint64_t shader_addr) 2637{ 2638 int i = 0; 2639 2640 /* mmPA_CL_VS_OUT_CNTL */ 2641 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2642 ptr[i++] = 0x207; 2643 ptr[i++] = 0; 2644 2645 /* mmSPI_SHADER_PGM_RSRC3_VS */ 2646 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2647 ptr[i++] = 0x46; 2648 ptr[i++] = 0xffff; 2649 2650 /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ 2651 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2652 ptr[i++] = 0x48; 2653 ptr[i++] = shader_addr >> 8; 2654 ptr[i++] = shader_addr >> 40; 2655 2656 /* mmSPI_SHADER_PGM_RSRC1_VS */ 2657 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2658 ptr[i++] = 0x4a; 2659 ptr[i++] = 0xc0081; 2660 /* mmSPI_SHADER_PGM_RSRC2_VS */ 2661 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2662 ptr[i++] = 0x4b; 2663 ptr[i++] = 0x18; 2664 2665 /* mmSPI_VS_OUT_CONFIG */ 2666 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2667 ptr[i++] = 0x1b1; 2668 ptr[i++] = 2; 2669 2670 /* mmSPI_SHADER_POS_FORMAT */ 2671 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2672 ptr[i++] = 0x1c3; 2673 ptr[i++] = 4; 2674 2675 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 2676 ptr[i++] = 0x4c; 2677 i += 2; 2678 ptr[i++] = 0x42000000; 2679 ptr[i++] = 0x42000000; 2680 2681 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 2682 ptr[i++] = 0x50; 2683 i += 2; 2684 if (ps_type == PS_CONST) { 2685 i += 2; 2686 } else if (ps_type == PS_TEX) { 2687 ptr[i++] = 0x3f800000; 2688 ptr[i++] = 0x3f800000; 2689 } 2690 2691 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 2692 ptr[i++] = 0x54; 2693 i += 4; 2694 2695 return i; 2696} 2697 2698static int amdgpu_draw_ps_write2hw(uint32_t *ptr, 2699 int ps_type, 2700 uint64_t shader_addr) 2701{ 2702 int i, j; 2703 const uint32_t *sh_registers; 2704 const uint32_t *context_registers; 2705 uint32_t num_sh_reg, num_context_reg; 2706 2707 if (ps_type == PS_CONST) { 2708 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; 2709 context_registers = (const uint32_t *)ps_const_context_reg_gfx9; 2710 num_sh_reg = ps_num_sh_registers_gfx9; 2711 num_context_reg = ps_num_context_registers_gfx9; 2712 } else if (ps_type == PS_TEX) { 2713 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; 2714 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9; 2715 num_sh_reg = ps_num_sh_registers_gfx9; 2716 num_context_reg = ps_num_context_registers_gfx9; 2717 } 2718 2719 i = 0; 2720 2721 /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS 2722 0x2c08 SPI_SHADER_PGM_LO_PS 2723 0x2c09 SPI_SHADER_PGM_HI_PS */ 2724 shader_addr += 256 * 9; 2725 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 2726 ptr[i++] = 0x7; 2727 ptr[i++] = 0xffff; 2728 ptr[i++] = shader_addr >> 8; 2729 ptr[i++] = shader_addr >> 40; 2730 2731 for (j = 0; j < num_sh_reg; j++) { 2732 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2733 ptr[i++] = sh_registers[j * 2] - 0x2c00; 2734 ptr[i++] = sh_registers[j * 2 + 1]; 2735 } 2736 2737 for (j = 0; j < num_context_reg; j++) { 2738 if (context_registers[j * 2] != 0xA1C5) { 2739 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2740 ptr[i++] = context_registers[j * 2] - 0xa000; 2741 ptr[i++] = context_registers[j * 2 + 1]; 2742 } 2743 2744 if (context_registers[j * 2] == 0xA1B4) { 2745 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 2746 ptr[i++] = 0x1b3; 2747 ptr[i++] = 2; 2748 } 2749 } 2750 2751 return i; 2752} 2753 2754static int amdgpu_draw_draw(uint32_t *ptr) 2755{ 2756 int i = 0; 2757 2758 /* mmIA_MULTI_VGT_PARAM */ 2759 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 2760 ptr[i++] = 0x40000258; 2761 ptr[i++] = 0xd00ff; 2762 2763 /* mmVGT_PRIMITIVE_TYPE */ 2764 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 2765 ptr[i++] = 0x10000242; 2766 ptr[i++] = 0x11; 2767 2768 ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); 2769 ptr[i++] = 3; 2770 ptr[i++] = 2; 2771 2772 return i; 2773} 2774 2775void amdgpu_memset_draw(amdgpu_device_handle device_handle, 2776 amdgpu_bo_handle bo_shader_ps, 2777 amdgpu_bo_handle bo_shader_vs, 2778 uint64_t mc_address_shader_ps, 2779 uint64_t mc_address_shader_vs, 2780 uint32_t ring_id) 2781{ 2782 amdgpu_context_handle context_handle; 2783 amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; 2784 volatile unsigned char *ptr_dst; 2785 uint32_t *ptr_cmd; 2786 uint64_t mc_address_dst, mc_address_cmd; 2787 amdgpu_va_handle va_dst, va_cmd; 2788 int i, r; 2789 int bo_dst_size = 16384; 2790 int bo_cmd_size = 4096; 2791 struct amdgpu_cs_request ibs_request = {0}; 2792 struct amdgpu_cs_ib_info ib_info = {0}; 2793 struct amdgpu_cs_fence fence_status = {0}; 2794 uint32_t expired; 2795 amdgpu_bo_list_handle bo_list; 2796 2797 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2798 CU_ASSERT_EQUAL(r, 0); 2799 2800 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2801 AMDGPU_GEM_DOMAIN_GTT, 0, 2802 &bo_cmd, (void **)&ptr_cmd, 2803 &mc_address_cmd, &va_cmd); 2804 CU_ASSERT_EQUAL(r, 0); 2805 memset(ptr_cmd, 0, bo_cmd_size); 2806 2807 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2808 AMDGPU_GEM_DOMAIN_VRAM, 0, 2809 &bo_dst, (void **)&ptr_dst, 2810 &mc_address_dst, &va_dst); 2811 CU_ASSERT_EQUAL(r, 0); 2812 2813 i = 0; 2814 i += amdgpu_draw_init(ptr_cmd + i); 2815 2816 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst); 2817 2818 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i); 2819 2820 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs); 2821 2822 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps); 2823 2824 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 2825 ptr_cmd[i++] = 0xc; 2826 ptr_cmd[i++] = 0x33333333; 2827 ptr_cmd[i++] = 0x33333333; 2828 ptr_cmd[i++] = 0x33333333; 2829 ptr_cmd[i++] = 0x33333333; 2830 2831 i += amdgpu_draw_draw(ptr_cmd + i); 2832 2833 while (i & 7) 2834 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2835 2836 resources[0] = bo_dst; 2837 resources[1] = bo_shader_ps; 2838 resources[2] = bo_shader_vs; 2839 resources[3] = bo_cmd; 2840 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 2841 CU_ASSERT_EQUAL(r, 0); 2842 2843 ib_info.ib_mc_address = mc_address_cmd; 2844 ib_info.size = i; 2845 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 2846 ibs_request.ring = ring_id; 2847 ibs_request.resources = bo_list; 2848 ibs_request.number_of_ibs = 1; 2849 ibs_request.ibs = &ib_info; 2850 ibs_request.fence_info.handle = NULL; 2851 2852 /* submit CS */ 2853 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2854 CU_ASSERT_EQUAL(r, 0); 2855 2856 r = amdgpu_bo_list_destroy(bo_list); 2857 CU_ASSERT_EQUAL(r, 0); 2858 2859 fence_status.ip_type = AMDGPU_HW_IP_GFX; 2860 fence_status.ip_instance = 0; 2861 fence_status.ring = ring_id; 2862 fence_status.context = context_handle; 2863 fence_status.fence = ibs_request.seq_no; 2864 2865 /* wait for IB accomplished */ 2866 r = amdgpu_cs_query_fence_status(&fence_status, 2867 AMDGPU_TIMEOUT_INFINITE, 2868 0, &expired); 2869 CU_ASSERT_EQUAL(r, 0); 2870 CU_ASSERT_EQUAL(expired, true); 2871 2872 /* verify if memset test result meets with expected */ 2873 i = 0; 2874 while(i < bo_dst_size) { 2875 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33); 2876 } 2877 2878 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2879 CU_ASSERT_EQUAL(r, 0); 2880 2881 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2882 CU_ASSERT_EQUAL(r, 0); 2883 2884 r = amdgpu_cs_ctx_free(context_handle); 2885 CU_ASSERT_EQUAL(r, 0); 2886} 2887 2888static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, 2889 uint32_t ring) 2890{ 2891 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 2892 void *ptr_shader_ps; 2893 void *ptr_shader_vs; 2894 uint64_t mc_address_shader_ps, mc_address_shader_vs; 2895 amdgpu_va_handle va_shader_ps, va_shader_vs; 2896 int r; 2897 int bo_shader_size = 4096; 2898 2899 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2900 AMDGPU_GEM_DOMAIN_VRAM, 0, 2901 &bo_shader_ps, &ptr_shader_ps, 2902 &mc_address_shader_ps, &va_shader_ps); 2903 CU_ASSERT_EQUAL(r, 0); 2904 2905 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2906 AMDGPU_GEM_DOMAIN_VRAM, 0, 2907 &bo_shader_vs, &ptr_shader_vs, 2908 &mc_address_shader_vs, &va_shader_vs); 2909 CU_ASSERT_EQUAL(r, 0); 2910 2911 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST); 2912 CU_ASSERT_EQUAL(r, 0); 2913 2914 r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 2915 CU_ASSERT_EQUAL(r, 0); 2916 2917 amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, 2918 mc_address_shader_ps, mc_address_shader_vs, ring); 2919 2920 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 2921 CU_ASSERT_EQUAL(r, 0); 2922 2923 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 2924 CU_ASSERT_EQUAL(r, 0); 2925} 2926 2927static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, 2928 amdgpu_bo_handle bo_shader_ps, 2929 amdgpu_bo_handle bo_shader_vs, 2930 uint64_t mc_address_shader_ps, 2931 uint64_t mc_address_shader_vs, 2932 uint32_t ring) 2933{ 2934 amdgpu_context_handle context_handle; 2935 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 2936 volatile unsigned char *ptr_dst; 2937 unsigned char *ptr_src; 2938 uint32_t *ptr_cmd; 2939 uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 2940 amdgpu_va_handle va_dst, va_src, va_cmd; 2941 int i, r; 2942 int bo_size = 16384; 2943 int bo_cmd_size = 4096; 2944 struct amdgpu_cs_request ibs_request = {0}; 2945 struct amdgpu_cs_ib_info ib_info= {0}; 2946 uint32_t hang_state, hangs, expired; 2947 amdgpu_bo_list_handle bo_list; 2948 struct amdgpu_cs_fence fence_status = {0}; 2949 2950 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2951 CU_ASSERT_EQUAL(r, 0); 2952 2953 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2954 AMDGPU_GEM_DOMAIN_GTT, 0, 2955 &bo_cmd, (void **)&ptr_cmd, 2956 &mc_address_cmd, &va_cmd); 2957 CU_ASSERT_EQUAL(r, 0); 2958 memset(ptr_cmd, 0, bo_cmd_size); 2959 2960 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 2961 AMDGPU_GEM_DOMAIN_VRAM, 0, 2962 &bo_src, (void **)&ptr_src, 2963 &mc_address_src, &va_src); 2964 CU_ASSERT_EQUAL(r, 0); 2965 2966 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 2967 AMDGPU_GEM_DOMAIN_VRAM, 0, 2968 &bo_dst, (void **)&ptr_dst, 2969 &mc_address_dst, &va_dst); 2970 CU_ASSERT_EQUAL(r, 0); 2971 2972 memset(ptr_src, 0x55, bo_size); 2973 2974 i = 0; 2975 i += amdgpu_draw_init(ptr_cmd + i); 2976 2977 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst); 2978 2979 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i); 2980 2981 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs); 2982 2983 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); 2984 2985 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 2986 ptr_cmd[i++] = 0xc; 2987 ptr_cmd[i++] = mc_address_src >> 8; 2988 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 2989 ptr_cmd[i++] = 0x7c01f; 2990 ptr_cmd[i++] = 0x90500fac; 2991 ptr_cmd[i++] = 0x3e000; 2992 i += 3; 2993 2994 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 2995 ptr_cmd[i++] = 0x14; 2996 ptr_cmd[i++] = 0x92; 2997 i += 3; 2998 2999 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3000 ptr_cmd[i++] = 0x191; 3001 ptr_cmd[i++] = 0; 3002 3003 i += amdgpu_draw_draw(ptr_cmd + i); 3004 3005 while (i & 7) 3006 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3007 3008 resources[0] = bo_dst; 3009 resources[1] = bo_src; 3010 resources[2] = bo_shader_ps; 3011 resources[3] = bo_shader_vs; 3012 resources[4] = bo_cmd; 3013 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 3014 CU_ASSERT_EQUAL(r, 0); 3015 3016 ib_info.ib_mc_address = mc_address_cmd; 3017 ib_info.size = i; 3018 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3019 ibs_request.ring = ring; 3020 ibs_request.resources = bo_list; 3021 ibs_request.number_of_ibs = 1; 3022 ibs_request.ibs = &ib_info; 3023 ibs_request.fence_info.handle = NULL; 3024 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3025 CU_ASSERT_EQUAL(r, 0); 3026 3027 fence_status.ip_type = AMDGPU_HW_IP_GFX; 3028 fence_status.ip_instance = 0; 3029 fence_status.ring = ring; 3030 fence_status.context = context_handle; 3031 fence_status.fence = ibs_request.seq_no; 3032 3033 /* wait for IB accomplished */ 3034 r = amdgpu_cs_query_fence_status(&fence_status, 3035 AMDGPU_TIMEOUT_INFINITE, 3036 0, &expired); 3037 CU_ASSERT_EQUAL(r, 0); 3038 CU_ASSERT_EQUAL(expired, true); 3039 3040 /* verify if memcpy test result meets with expected */ 3041 i = 0; 3042 while(i < bo_size) { 3043 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 3044 i++; 3045 } 3046 3047 r = amdgpu_bo_list_destroy(bo_list); 3048 CU_ASSERT_EQUAL(r, 0); 3049 3050 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 3051 CU_ASSERT_EQUAL(r, 0); 3052 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 3053 CU_ASSERT_EQUAL(r, 0); 3054 3055 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 3056 CU_ASSERT_EQUAL(r, 0); 3057 3058 r = amdgpu_cs_ctx_free(context_handle); 3059 CU_ASSERT_EQUAL(r, 0); 3060} 3061 3062static void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring) 3063{ 3064 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 3065 void *ptr_shader_ps; 3066 void *ptr_shader_vs; 3067 uint64_t mc_address_shader_ps, mc_address_shader_vs; 3068 amdgpu_va_handle va_shader_ps, va_shader_vs; 3069 int bo_shader_size = 4096; 3070 int r; 3071 3072 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3073 AMDGPU_GEM_DOMAIN_VRAM, 0, 3074 &bo_shader_ps, &ptr_shader_ps, 3075 &mc_address_shader_ps, &va_shader_ps); 3076 CU_ASSERT_EQUAL(r, 0); 3077 3078 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3079 AMDGPU_GEM_DOMAIN_VRAM, 0, 3080 &bo_shader_vs, &ptr_shader_vs, 3081 &mc_address_shader_vs, &va_shader_vs); 3082 CU_ASSERT_EQUAL(r, 0); 3083 3084 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_TEX); 3085 CU_ASSERT_EQUAL(r, 0); 3086 3087 r = amdgpu_draw_load_vs_shader(ptr_shader_vs); 3088 CU_ASSERT_EQUAL(r, 0); 3089 3090 amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, 3091 mc_address_shader_ps, mc_address_shader_vs, ring); 3092 3093 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 3094 CU_ASSERT_EQUAL(r, 0); 3095 3096 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 3097 CU_ASSERT_EQUAL(r, 0); 3098} 3099 3100static void amdgpu_draw_test(void) 3101{ 3102 int r; 3103 struct drm_amdgpu_info_hw_ip info; 3104 uint32_t ring_id; 3105 3106 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 3107 CU_ASSERT_EQUAL(r, 0); 3108 3109 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 3110 amdgpu_memset_draw_test(device_handle, ring_id); 3111 amdgpu_memcpy_draw_test(device_handle, ring_id); 3112 } 3113} 3114