basic_tests.c revision 00a23bda
1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22*/ 23 24#ifdef HAVE_CONFIG_H 25#include "config.h" 26#endif 27 28#include <stdio.h> 29#include <stdlib.h> 30#include <unistd.h> 31#ifdef HAVE_ALLOCA_H 32# include <alloca.h> 33#endif 34#include <sys/wait.h> 35 36#include "CUnit/Basic.h" 37 38#include "amdgpu_test.h" 39#include "amdgpu_drm.h" 40 41static amdgpu_device_handle device_handle; 42static uint32_t major_version; 43static uint32_t minor_version; 44static uint32_t family_id; 45 46static void amdgpu_query_info_test(void); 47static void amdgpu_command_submission_gfx(void); 48static void amdgpu_command_submission_compute(void); 49static void amdgpu_command_submission_multi_fence(void); 50static void amdgpu_command_submission_sdma(void); 51static void amdgpu_userptr_test(void); 52static void amdgpu_semaphore_test(void); 53static void amdgpu_sync_dependency_test(void); 54static void amdgpu_bo_eviction_test(void); 55 56static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 57static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 58static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 59static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 60 unsigned ip_type, 61 int instance, int pm4_dw, uint32_t *pm4_src, 62 int res_cnt, amdgpu_bo_handle *resources, 63 struct amdgpu_cs_ib_info *ib_info, 64 struct amdgpu_cs_request *ibs_request); 65 66CU_TestInfo basic_tests[] = { 67 { "Query Info Test", amdgpu_query_info_test }, 68 { "Userptr Test", amdgpu_userptr_test }, 69 { "bo eviction Test", amdgpu_bo_eviction_test }, 70 { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 71 { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 72 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 73 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 74 { "SW semaphore Test", amdgpu_semaphore_test }, 75 { "Sync dependency Test", amdgpu_sync_dependency_test }, 76 CU_TEST_INFO_NULL, 77}; 78#define BUFFER_SIZE (8 * 1024) 79#define SDMA_PKT_HEADER_op_offset 0 80#define SDMA_PKT_HEADER_op_mask 0x000000FF 81#define SDMA_PKT_HEADER_op_shift 0 82#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 83#define SDMA_OPCODE_CONSTANT_FILL 11 84# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 85 /* 0 = byte fill 86 * 2 = DW fill 87 */ 88#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 89 (((sub_op) & 0xFF) << 8) | \ 90 (((op) & 0xFF) << 0)) 91#define SDMA_OPCODE_WRITE 2 92# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 93# define SDMA_WRTIE_SUB_OPCODE_TILED 1 94 95#define SDMA_OPCODE_COPY 1 96# define SDMA_COPY_SUB_OPCODE_LINEAR 0 97 98#define GFX_COMPUTE_NOP 0xffff1000 99#define SDMA_NOP 0x0 100 101/* PM4 */ 102#define PACKET_TYPE0 0 103#define PACKET_TYPE1 1 104#define PACKET_TYPE2 2 105#define PACKET_TYPE3 3 106 107#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 108#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 109#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 110#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 111#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 112 ((reg) & 0xFFFF) | \ 113 ((n) & 0x3FFF) << 16) 114#define CP_PACKET2 0x80000000 115#define PACKET2_PAD_SHIFT 0 116#define PACKET2_PAD_MASK (0x3fffffff << 0) 117 118#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 119 120#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 121 (((op) & 0xFF) << 8) | \ 122 ((n) & 0x3FFF) << 16) 123 124/* Packet 3 types */ 125#define PACKET3_NOP 0x10 126 127#define PACKET3_WRITE_DATA 0x37 128#define WRITE_DATA_DST_SEL(x) ((x) << 8) 129 /* 0 - register 130 * 1 - memory (sync - via GRBM) 131 * 2 - gl2 132 * 3 - gds 133 * 4 - reserved 134 * 5 - memory (async - direct) 135 */ 136#define WR_ONE_ADDR (1 << 16) 137#define WR_CONFIRM (1 << 20) 138#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 139 /* 0 - LRU 140 * 1 - Stream 141 */ 142#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 143 /* 0 - me 144 * 1 - pfp 145 * 2 - ce 146 */ 147 148#define PACKET3_DMA_DATA 0x50 149/* 1. header 150 * 2. CONTROL 151 * 3. SRC_ADDR_LO or DATA [31:0] 152 * 4. SRC_ADDR_HI [31:0] 153 * 5. DST_ADDR_LO [31:0] 154 * 6. DST_ADDR_HI [7:0] 155 * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 156 */ 157/* CONTROL */ 158# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 159 /* 0 - ME 160 * 1 - PFP 161 */ 162# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 163 /* 0 - LRU 164 * 1 - Stream 165 * 2 - Bypass 166 */ 167# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 168# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 169 /* 0 - DST_ADDR using DAS 170 * 1 - GDS 171 * 3 - DST_ADDR using L2 172 */ 173# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 174 /* 0 - LRU 175 * 1 - Stream 176 * 2 - Bypass 177 */ 178# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 179# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 180 /* 0 - SRC_ADDR using SAS 181 * 1 - GDS 182 * 2 - DATA 183 * 3 - SRC_ADDR using L2 184 */ 185# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 186/* COMMAND */ 187# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 188# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 189 /* 0 - none 190 * 1 - 8 in 16 191 * 2 - 8 in 32 192 * 3 - 8 in 64 193 */ 194# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 195 /* 0 - none 196 * 1 - 8 in 16 197 * 2 - 8 in 32 198 * 3 - 8 in 64 199 */ 200# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 201 /* 0 - memory 202 * 1 - register 203 */ 204# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 205 /* 0 - memory 206 * 1 - register 207 */ 208# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 209# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 210# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 211 212#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 213 (((b) & 0x1) << 26) | \ 214 (((t) & 0x1) << 23) | \ 215 (((s) & 0x1) << 22) | \ 216 (((cnt) & 0xFFFFF) << 0)) 217#define SDMA_OPCODE_COPY_SI 3 218#define SDMA_OPCODE_CONSTANT_FILL_SI 13 219#define SDMA_NOP_SI 0xf 220#define GFX_COMPUTE_NOP_SI 0x80000000 221#define PACKET3_DMA_DATA_SI 0x41 222# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 223 /* 0 - ME 224 * 1 - PFP 225 */ 226# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 227 /* 0 - DST_ADDR using DAS 228 * 1 - GDS 229 * 3 - DST_ADDR using L2 230 */ 231# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 232 /* 0 - SRC_ADDR using SAS 233 * 1 - GDS 234 * 2 - DATA 235 * 3 - SRC_ADDR using L2 236 */ 237# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 238 239 240#define PKT3_CONTEXT_CONTROL 0x28 241#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 242#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 243#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 244 245#define PKT3_CLEAR_STATE 0x12 246 247#define PKT3_SET_SH_REG 0x76 248#define PACKET3_SET_SH_REG_START 0x00002c00 249 250#define PACKET3_DISPATCH_DIRECT 0x15 251 252 253/* gfx 8 */ 254#define mmCOMPUTE_PGM_LO 0x2e0c 255#define mmCOMPUTE_PGM_RSRC1 0x2e12 256#define mmCOMPUTE_TMPRING_SIZE 0x2e18 257#define mmCOMPUTE_USER_DATA_0 0x2e40 258#define mmCOMPUTE_USER_DATA_1 0x2e41 259#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 260#define mmCOMPUTE_NUM_THREAD_X 0x2e07 261 262 263 264#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 265 ((num & 0x0000ff00) << 8) | \ 266 ((num & 0x00ff0000) >> 8) | \ 267 ((num & 0x000000ff) << 24)) 268 269 270/* Shader code 271 * void main() 272{ 273 274 float x = some_input; 275 for (unsigned i = 0; i < 1000000; i++) 276 x = sin(x); 277 278 u[0] = 42u; 279} 280*/ 281 282static uint32_t shader_bin[] = { 283 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 284 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 285 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 286 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 287}; 288 289#define CODE_OFFSET 512 290#define DATA_OFFSET 1024 291 292 293int suite_basic_tests_init(void) 294{ 295 struct amdgpu_gpu_info gpu_info = {0}; 296 int r; 297 298 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 299 &minor_version, &device_handle); 300 301 if (r) { 302 if ((r == -EACCES) && (errno == EACCES)) 303 printf("\n\nError:%s. " 304 "Hint:Try to run this test program as root.", 305 strerror(errno)); 306 return CUE_SINIT_FAILED; 307 } 308 309 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 310 if (r) 311 return CUE_SINIT_FAILED; 312 313 family_id = gpu_info.family_id; 314 315 return CUE_SUCCESS; 316} 317 318int suite_basic_tests_clean(void) 319{ 320 int r = amdgpu_device_deinitialize(device_handle); 321 322 if (r == 0) 323 return CUE_SUCCESS; 324 else 325 return CUE_SCLEAN_FAILED; 326} 327 328static void amdgpu_query_info_test(void) 329{ 330 struct amdgpu_gpu_info gpu_info = {0}; 331 uint32_t version, feature; 332 int r; 333 334 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 335 CU_ASSERT_EQUAL(r, 0); 336 337 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 338 0, &version, &feature); 339 CU_ASSERT_EQUAL(r, 0); 340} 341 342static void amdgpu_command_submission_gfx_separate_ibs(void) 343{ 344 amdgpu_context_handle context_handle; 345 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 346 void *ib_result_cpu, *ib_result_ce_cpu; 347 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 348 struct amdgpu_cs_request ibs_request = {0}; 349 struct amdgpu_cs_ib_info ib_info[2]; 350 struct amdgpu_cs_fence fence_status = {0}; 351 uint32_t *ptr; 352 uint32_t expired; 353 amdgpu_bo_list_handle bo_list; 354 amdgpu_va_handle va_handle, va_handle_ce; 355 int r, i = 0; 356 357 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 358 CU_ASSERT_EQUAL(r, 0); 359 360 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 361 AMDGPU_GEM_DOMAIN_GTT, 0, 362 &ib_result_handle, &ib_result_cpu, 363 &ib_result_mc_address, &va_handle); 364 CU_ASSERT_EQUAL(r, 0); 365 366 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 367 AMDGPU_GEM_DOMAIN_GTT, 0, 368 &ib_result_ce_handle, &ib_result_ce_cpu, 369 &ib_result_ce_mc_address, &va_handle_ce); 370 CU_ASSERT_EQUAL(r, 0); 371 372 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 373 ib_result_ce_handle, &bo_list); 374 CU_ASSERT_EQUAL(r, 0); 375 376 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 377 378 /* IT_SET_CE_DE_COUNTERS */ 379 ptr = ib_result_ce_cpu; 380 if (family_id != AMDGPU_FAMILY_SI) { 381 ptr[i++] = 0xc0008900; 382 ptr[i++] = 0; 383 } 384 ptr[i++] = 0xc0008400; 385 ptr[i++] = 1; 386 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 387 ib_info[0].size = i; 388 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 389 390 /* IT_WAIT_ON_CE_COUNTER */ 391 ptr = ib_result_cpu; 392 ptr[0] = 0xc0008600; 393 ptr[1] = 0x00000001; 394 ib_info[1].ib_mc_address = ib_result_mc_address; 395 ib_info[1].size = 2; 396 397 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 398 ibs_request.number_of_ibs = 2; 399 ibs_request.ibs = ib_info; 400 ibs_request.resources = bo_list; 401 ibs_request.fence_info.handle = NULL; 402 403 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 404 405 CU_ASSERT_EQUAL(r, 0); 406 407 fence_status.context = context_handle; 408 fence_status.ip_type = AMDGPU_HW_IP_GFX; 409 fence_status.ip_instance = 0; 410 fence_status.fence = ibs_request.seq_no; 411 412 r = amdgpu_cs_query_fence_status(&fence_status, 413 AMDGPU_TIMEOUT_INFINITE, 414 0, &expired); 415 CU_ASSERT_EQUAL(r, 0); 416 417 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 418 ib_result_mc_address, 4096); 419 CU_ASSERT_EQUAL(r, 0); 420 421 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 422 ib_result_ce_mc_address, 4096); 423 CU_ASSERT_EQUAL(r, 0); 424 425 r = amdgpu_bo_list_destroy(bo_list); 426 CU_ASSERT_EQUAL(r, 0); 427 428 r = amdgpu_cs_ctx_free(context_handle); 429 CU_ASSERT_EQUAL(r, 0); 430 431} 432 433static void amdgpu_command_submission_gfx_shared_ib(void) 434{ 435 amdgpu_context_handle context_handle; 436 amdgpu_bo_handle ib_result_handle; 437 void *ib_result_cpu; 438 uint64_t ib_result_mc_address; 439 struct amdgpu_cs_request ibs_request = {0}; 440 struct amdgpu_cs_ib_info ib_info[2]; 441 struct amdgpu_cs_fence fence_status = {0}; 442 uint32_t *ptr; 443 uint32_t expired; 444 amdgpu_bo_list_handle bo_list; 445 amdgpu_va_handle va_handle; 446 int r, i = 0; 447 448 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 449 CU_ASSERT_EQUAL(r, 0); 450 451 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 452 AMDGPU_GEM_DOMAIN_GTT, 0, 453 &ib_result_handle, &ib_result_cpu, 454 &ib_result_mc_address, &va_handle); 455 CU_ASSERT_EQUAL(r, 0); 456 457 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 458 &bo_list); 459 CU_ASSERT_EQUAL(r, 0); 460 461 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 462 463 /* IT_SET_CE_DE_COUNTERS */ 464 ptr = ib_result_cpu; 465 if (family_id != AMDGPU_FAMILY_SI) { 466 ptr[i++] = 0xc0008900; 467 ptr[i++] = 0; 468 } 469 ptr[i++] = 0xc0008400; 470 ptr[i++] = 1; 471 ib_info[0].ib_mc_address = ib_result_mc_address; 472 ib_info[0].size = i; 473 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 474 475 ptr = (uint32_t *)ib_result_cpu + 4; 476 ptr[0] = 0xc0008600; 477 ptr[1] = 0x00000001; 478 ib_info[1].ib_mc_address = ib_result_mc_address + 16; 479 ib_info[1].size = 2; 480 481 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 482 ibs_request.number_of_ibs = 2; 483 ibs_request.ibs = ib_info; 484 ibs_request.resources = bo_list; 485 ibs_request.fence_info.handle = NULL; 486 487 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 488 489 CU_ASSERT_EQUAL(r, 0); 490 491 fence_status.context = context_handle; 492 fence_status.ip_type = AMDGPU_HW_IP_GFX; 493 fence_status.ip_instance = 0; 494 fence_status.fence = ibs_request.seq_no; 495 496 r = amdgpu_cs_query_fence_status(&fence_status, 497 AMDGPU_TIMEOUT_INFINITE, 498 0, &expired); 499 CU_ASSERT_EQUAL(r, 0); 500 501 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 502 ib_result_mc_address, 4096); 503 CU_ASSERT_EQUAL(r, 0); 504 505 r = amdgpu_bo_list_destroy(bo_list); 506 CU_ASSERT_EQUAL(r, 0); 507 508 r = amdgpu_cs_ctx_free(context_handle); 509 CU_ASSERT_EQUAL(r, 0); 510} 511 512static void amdgpu_command_submission_gfx_cp_write_data(void) 513{ 514 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 515} 516 517static void amdgpu_command_submission_gfx_cp_const_fill(void) 518{ 519 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 520} 521 522static void amdgpu_command_submission_gfx_cp_copy_data(void) 523{ 524 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 525} 526 527static void amdgpu_bo_eviction_test(void) 528{ 529 const int sdma_write_length = 1024; 530 const int pm4_dw = 256; 531 amdgpu_context_handle context_handle; 532 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 533 amdgpu_bo_handle *resources; 534 uint32_t *pm4; 535 struct amdgpu_cs_ib_info *ib_info; 536 struct amdgpu_cs_request *ibs_request; 537 uint64_t bo1_mc, bo2_mc; 538 volatile unsigned char *bo1_cpu, *bo2_cpu; 539 int i, j, r, loop1, loop2; 540 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 541 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 542 struct amdgpu_heap_info vram_info, gtt_info; 543 544 pm4 = calloc(pm4_dw, sizeof(*pm4)); 545 CU_ASSERT_NOT_EQUAL(pm4, NULL); 546 547 ib_info = calloc(1, sizeof(*ib_info)); 548 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 549 550 ibs_request = calloc(1, sizeof(*ibs_request)); 551 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 552 553 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 554 CU_ASSERT_EQUAL(r, 0); 555 556 /* prepare resource */ 557 resources = calloc(4, sizeof(amdgpu_bo_handle)); 558 CU_ASSERT_NOT_EQUAL(resources, NULL); 559 560 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 561 0, &vram_info); 562 CU_ASSERT_EQUAL(r, 0); 563 564 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 565 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 566 CU_ASSERT_EQUAL(r, 0); 567 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 568 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 569 CU_ASSERT_EQUAL(r, 0); 570 571 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 572 0, >t_info); 573 CU_ASSERT_EQUAL(r, 0); 574 575 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 576 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 577 CU_ASSERT_EQUAL(r, 0); 578 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 579 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 580 CU_ASSERT_EQUAL(r, 0); 581 582 583 584 loop1 = loop2 = 0; 585 /* run 9 circle to test all mapping combination */ 586 while(loop1 < 2) { 587 while(loop2 < 2) { 588 /* allocate UC bo1for sDMA use */ 589 r = amdgpu_bo_alloc_and_map(device_handle, 590 sdma_write_length, 4096, 591 AMDGPU_GEM_DOMAIN_GTT, 592 gtt_flags[loop1], &bo1, 593 (void**)&bo1_cpu, &bo1_mc, 594 &bo1_va_handle); 595 CU_ASSERT_EQUAL(r, 0); 596 597 /* set bo1 */ 598 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 599 600 /* allocate UC bo2 for sDMA use */ 601 r = amdgpu_bo_alloc_and_map(device_handle, 602 sdma_write_length, 4096, 603 AMDGPU_GEM_DOMAIN_GTT, 604 gtt_flags[loop2], &bo2, 605 (void**)&bo2_cpu, &bo2_mc, 606 &bo2_va_handle); 607 CU_ASSERT_EQUAL(r, 0); 608 609 /* clear bo2 */ 610 memset((void*)bo2_cpu, 0, sdma_write_length); 611 612 resources[0] = bo1; 613 resources[1] = bo2; 614 resources[2] = vram_max[loop2]; 615 resources[3] = gtt_max[loop2]; 616 617 /* fulfill PM4: test DMA copy linear */ 618 i = j = 0; 619 if (family_id == AMDGPU_FAMILY_SI) { 620 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 621 sdma_write_length); 622 pm4[i++] = 0xffffffff & bo2_mc; 623 pm4[i++] = 0xffffffff & bo1_mc; 624 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 625 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 626 } else { 627 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 628 if (family_id >= AMDGPU_FAMILY_AI) 629 pm4[i++] = sdma_write_length - 1; 630 else 631 pm4[i++] = sdma_write_length; 632 pm4[i++] = 0; 633 pm4[i++] = 0xffffffff & bo1_mc; 634 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 635 pm4[i++] = 0xffffffff & bo2_mc; 636 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 637 } 638 639 amdgpu_test_exec_cs_helper(context_handle, 640 AMDGPU_HW_IP_DMA, 0, 641 i, pm4, 642 4, resources, 643 ib_info, ibs_request); 644 645 /* verify if SDMA test result meets with expected */ 646 i = 0; 647 while(i < sdma_write_length) { 648 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 649 } 650 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 651 sdma_write_length); 652 CU_ASSERT_EQUAL(r, 0); 653 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 654 sdma_write_length); 655 CU_ASSERT_EQUAL(r, 0); 656 loop2++; 657 } 658 loop2 = 0; 659 loop1++; 660 } 661 amdgpu_bo_free(vram_max[0]); 662 amdgpu_bo_free(vram_max[1]); 663 amdgpu_bo_free(gtt_max[0]); 664 amdgpu_bo_free(gtt_max[1]); 665 /* clean resources */ 666 free(resources); 667 free(ibs_request); 668 free(ib_info); 669 free(pm4); 670 671 /* end of test */ 672 r = amdgpu_cs_ctx_free(context_handle); 673 CU_ASSERT_EQUAL(r, 0); 674} 675 676 677static void amdgpu_command_submission_gfx(void) 678{ 679 /* write data using the CP */ 680 amdgpu_command_submission_gfx_cp_write_data(); 681 /* const fill using the CP */ 682 amdgpu_command_submission_gfx_cp_const_fill(); 683 /* copy data using the CP */ 684 amdgpu_command_submission_gfx_cp_copy_data(); 685 /* separate IB buffers for multi-IB submission */ 686 amdgpu_command_submission_gfx_separate_ibs(); 687 /* shared IB buffer for multi-IB submission */ 688 amdgpu_command_submission_gfx_shared_ib(); 689} 690 691static void amdgpu_semaphore_test(void) 692{ 693 amdgpu_context_handle context_handle[2]; 694 amdgpu_semaphore_handle sem; 695 amdgpu_bo_handle ib_result_handle[2]; 696 void *ib_result_cpu[2]; 697 uint64_t ib_result_mc_address[2]; 698 struct amdgpu_cs_request ibs_request[2] = {0}; 699 struct amdgpu_cs_ib_info ib_info[2] = {0}; 700 struct amdgpu_cs_fence fence_status = {0}; 701 uint32_t *ptr; 702 uint32_t expired; 703 uint32_t sdma_nop, gfx_nop; 704 amdgpu_bo_list_handle bo_list[2]; 705 amdgpu_va_handle va_handle[2]; 706 int r, i; 707 708 if (family_id == AMDGPU_FAMILY_SI) { 709 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 710 gfx_nop = GFX_COMPUTE_NOP_SI; 711 } else { 712 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 713 gfx_nop = GFX_COMPUTE_NOP; 714 } 715 716 r = amdgpu_cs_create_semaphore(&sem); 717 CU_ASSERT_EQUAL(r, 0); 718 for (i = 0; i < 2; i++) { 719 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 720 CU_ASSERT_EQUAL(r, 0); 721 722 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 723 AMDGPU_GEM_DOMAIN_GTT, 0, 724 &ib_result_handle[i], &ib_result_cpu[i], 725 &ib_result_mc_address[i], &va_handle[i]); 726 CU_ASSERT_EQUAL(r, 0); 727 728 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 729 NULL, &bo_list[i]); 730 CU_ASSERT_EQUAL(r, 0); 731 } 732 733 /* 1. same context different engine */ 734 ptr = ib_result_cpu[0]; 735 ptr[0] = sdma_nop; 736 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 737 ib_info[0].size = 1; 738 739 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 740 ibs_request[0].number_of_ibs = 1; 741 ibs_request[0].ibs = &ib_info[0]; 742 ibs_request[0].resources = bo_list[0]; 743 ibs_request[0].fence_info.handle = NULL; 744 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 745 CU_ASSERT_EQUAL(r, 0); 746 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 747 CU_ASSERT_EQUAL(r, 0); 748 749 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 750 CU_ASSERT_EQUAL(r, 0); 751 ptr = ib_result_cpu[1]; 752 ptr[0] = gfx_nop; 753 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 754 ib_info[1].size = 1; 755 756 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 757 ibs_request[1].number_of_ibs = 1; 758 ibs_request[1].ibs = &ib_info[1]; 759 ibs_request[1].resources = bo_list[1]; 760 ibs_request[1].fence_info.handle = NULL; 761 762 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 763 CU_ASSERT_EQUAL(r, 0); 764 765 fence_status.context = context_handle[0]; 766 fence_status.ip_type = AMDGPU_HW_IP_GFX; 767 fence_status.ip_instance = 0; 768 fence_status.fence = ibs_request[1].seq_no; 769 r = amdgpu_cs_query_fence_status(&fence_status, 770 500000000, 0, &expired); 771 CU_ASSERT_EQUAL(r, 0); 772 CU_ASSERT_EQUAL(expired, true); 773 774 /* 2. same engine different context */ 775 ptr = ib_result_cpu[0]; 776 ptr[0] = gfx_nop; 777 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 778 ib_info[0].size = 1; 779 780 ibs_request[0].ip_type = AMDGPU_HW_IP_GFX; 781 ibs_request[0].number_of_ibs = 1; 782 ibs_request[0].ibs = &ib_info[0]; 783 ibs_request[0].resources = bo_list[0]; 784 ibs_request[0].fence_info.handle = NULL; 785 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 786 CU_ASSERT_EQUAL(r, 0); 787 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 788 CU_ASSERT_EQUAL(r, 0); 789 790 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem); 791 CU_ASSERT_EQUAL(r, 0); 792 ptr = ib_result_cpu[1]; 793 ptr[0] = gfx_nop; 794 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 795 ib_info[1].size = 1; 796 797 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 798 ibs_request[1].number_of_ibs = 1; 799 ibs_request[1].ibs = &ib_info[1]; 800 ibs_request[1].resources = bo_list[1]; 801 ibs_request[1].fence_info.handle = NULL; 802 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 803 804 CU_ASSERT_EQUAL(r, 0); 805 806 fence_status.context = context_handle[1]; 807 fence_status.ip_type = AMDGPU_HW_IP_GFX; 808 fence_status.ip_instance = 0; 809 fence_status.fence = ibs_request[1].seq_no; 810 r = amdgpu_cs_query_fence_status(&fence_status, 811 500000000, 0, &expired); 812 CU_ASSERT_EQUAL(r, 0); 813 CU_ASSERT_EQUAL(expired, true); 814 815 for (i = 0; i < 2; i++) { 816 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 817 ib_result_mc_address[i], 4096); 818 CU_ASSERT_EQUAL(r, 0); 819 820 r = amdgpu_bo_list_destroy(bo_list[i]); 821 CU_ASSERT_EQUAL(r, 0); 822 823 r = amdgpu_cs_ctx_free(context_handle[i]); 824 CU_ASSERT_EQUAL(r, 0); 825 } 826 827 r = amdgpu_cs_destroy_semaphore(sem); 828 CU_ASSERT_EQUAL(r, 0); 829} 830 831static void amdgpu_command_submission_compute_nop(void) 832{ 833 amdgpu_context_handle context_handle; 834 amdgpu_bo_handle ib_result_handle; 835 void *ib_result_cpu; 836 uint64_t ib_result_mc_address; 837 struct amdgpu_cs_request ibs_request; 838 struct amdgpu_cs_ib_info ib_info; 839 struct amdgpu_cs_fence fence_status; 840 uint32_t *ptr; 841 uint32_t expired; 842 int r, instance; 843 amdgpu_bo_list_handle bo_list; 844 amdgpu_va_handle va_handle; 845 struct drm_amdgpu_info_hw_ip info; 846 847 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 848 CU_ASSERT_EQUAL(r, 0); 849 850 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 851 CU_ASSERT_EQUAL(r, 0); 852 853 for (instance = 0; (1 << instance) & info.available_rings; instance++) { 854 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 855 AMDGPU_GEM_DOMAIN_GTT, 0, 856 &ib_result_handle, &ib_result_cpu, 857 &ib_result_mc_address, &va_handle); 858 CU_ASSERT_EQUAL(r, 0); 859 860 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 861 &bo_list); 862 CU_ASSERT_EQUAL(r, 0); 863 864 ptr = ib_result_cpu; 865 memset(ptr, 0, 16); 866 ptr[0]=PACKET3(PACKET3_NOP, 14); 867 868 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 869 ib_info.ib_mc_address = ib_result_mc_address; 870 ib_info.size = 16; 871 872 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 873 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 874 ibs_request.ring = instance; 875 ibs_request.number_of_ibs = 1; 876 ibs_request.ibs = &ib_info; 877 ibs_request.resources = bo_list; 878 ibs_request.fence_info.handle = NULL; 879 880 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 881 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 882 CU_ASSERT_EQUAL(r, 0); 883 884 fence_status.context = context_handle; 885 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 886 fence_status.ip_instance = 0; 887 fence_status.ring = instance; 888 fence_status.fence = ibs_request.seq_no; 889 890 r = amdgpu_cs_query_fence_status(&fence_status, 891 AMDGPU_TIMEOUT_INFINITE, 892 0, &expired); 893 CU_ASSERT_EQUAL(r, 0); 894 895 r = amdgpu_bo_list_destroy(bo_list); 896 CU_ASSERT_EQUAL(r, 0); 897 898 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 899 ib_result_mc_address, 4096); 900 CU_ASSERT_EQUAL(r, 0); 901 } 902 903 r = amdgpu_cs_ctx_free(context_handle); 904 CU_ASSERT_EQUAL(r, 0); 905} 906 907static void amdgpu_command_submission_compute_cp_write_data(void) 908{ 909 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 910} 911 912static void amdgpu_command_submission_compute_cp_const_fill(void) 913{ 914 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 915} 916 917static void amdgpu_command_submission_compute_cp_copy_data(void) 918{ 919 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 920} 921 922static void amdgpu_command_submission_compute(void) 923{ 924 /* write data using the CP */ 925 amdgpu_command_submission_compute_cp_write_data(); 926 /* const fill using the CP */ 927 amdgpu_command_submission_compute_cp_const_fill(); 928 /* copy data using the CP */ 929 amdgpu_command_submission_compute_cp_copy_data(); 930 /* nop test */ 931 amdgpu_command_submission_compute_nop(); 932} 933 934/* 935 * caller need create/release: 936 * pm4_src, resources, ib_info, and ibs_request 937 * submit command stream described in ibs_request and wait for this IB accomplished 938 */ 939static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 940 unsigned ip_type, 941 int instance, int pm4_dw, uint32_t *pm4_src, 942 int res_cnt, amdgpu_bo_handle *resources, 943 struct amdgpu_cs_ib_info *ib_info, 944 struct amdgpu_cs_request *ibs_request) 945{ 946 int r; 947 uint32_t expired; 948 uint32_t *ring_ptr; 949 amdgpu_bo_handle ib_result_handle; 950 void *ib_result_cpu; 951 uint64_t ib_result_mc_address; 952 struct amdgpu_cs_fence fence_status = {0}; 953 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 954 amdgpu_va_handle va_handle; 955 956 /* prepare CS */ 957 CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 958 CU_ASSERT_NOT_EQUAL(resources, NULL); 959 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 960 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 961 CU_ASSERT_TRUE(pm4_dw <= 1024); 962 963 /* allocate IB */ 964 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 965 AMDGPU_GEM_DOMAIN_GTT, 0, 966 &ib_result_handle, &ib_result_cpu, 967 &ib_result_mc_address, &va_handle); 968 CU_ASSERT_EQUAL(r, 0); 969 970 /* copy PM4 packet to ring from caller */ 971 ring_ptr = ib_result_cpu; 972 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 973 974 ib_info->ib_mc_address = ib_result_mc_address; 975 ib_info->size = pm4_dw; 976 977 ibs_request->ip_type = ip_type; 978 ibs_request->ring = instance; 979 ibs_request->number_of_ibs = 1; 980 ibs_request->ibs = ib_info; 981 ibs_request->fence_info.handle = NULL; 982 983 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 984 all_res[res_cnt] = ib_result_handle; 985 986 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 987 NULL, &ibs_request->resources); 988 CU_ASSERT_EQUAL(r, 0); 989 990 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 991 992 /* submit CS */ 993 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 994 CU_ASSERT_EQUAL(r, 0); 995 996 r = amdgpu_bo_list_destroy(ibs_request->resources); 997 CU_ASSERT_EQUAL(r, 0); 998 999 fence_status.ip_type = ip_type; 1000 fence_status.ip_instance = 0; 1001 fence_status.ring = ibs_request->ring; 1002 fence_status.context = context_handle; 1003 fence_status.fence = ibs_request->seq_no; 1004 1005 /* wait for IB accomplished */ 1006 r = amdgpu_cs_query_fence_status(&fence_status, 1007 AMDGPU_TIMEOUT_INFINITE, 1008 0, &expired); 1009 CU_ASSERT_EQUAL(r, 0); 1010 CU_ASSERT_EQUAL(expired, true); 1011 1012 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1013 ib_result_mc_address, 4096); 1014 CU_ASSERT_EQUAL(r, 0); 1015} 1016 1017static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 1018{ 1019 const int sdma_write_length = 128; 1020 const int pm4_dw = 256; 1021 amdgpu_context_handle context_handle; 1022 amdgpu_bo_handle bo; 1023 amdgpu_bo_handle *resources; 1024 uint32_t *pm4; 1025 struct amdgpu_cs_ib_info *ib_info; 1026 struct amdgpu_cs_request *ibs_request; 1027 uint64_t bo_mc; 1028 volatile uint32_t *bo_cpu; 1029 int i, j, r, loop, ring_id; 1030 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1031 amdgpu_va_handle va_handle; 1032 struct drm_amdgpu_info_hw_ip hw_ip_info; 1033 1034 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1035 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1036 1037 ib_info = calloc(1, sizeof(*ib_info)); 1038 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1039 1040 ibs_request = calloc(1, sizeof(*ibs_request)); 1041 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1042 1043 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1044 CU_ASSERT_EQUAL(r, 0); 1045 1046 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1047 CU_ASSERT_EQUAL(r, 0); 1048 1049 /* prepare resource */ 1050 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1051 CU_ASSERT_NOT_EQUAL(resources, NULL); 1052 1053 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1054 loop = 0; 1055 while(loop < 2) { 1056 /* allocate UC bo for sDMA use */ 1057 r = amdgpu_bo_alloc_and_map(device_handle, 1058 sdma_write_length * sizeof(uint32_t), 1059 4096, AMDGPU_GEM_DOMAIN_GTT, 1060 gtt_flags[loop], &bo, (void**)&bo_cpu, 1061 &bo_mc, &va_handle); 1062 CU_ASSERT_EQUAL(r, 0); 1063 1064 /* clear bo */ 1065 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 1066 1067 resources[0] = bo; 1068 1069 /* fulfill PM4: test DMA write-linear */ 1070 i = j = 0; 1071 if (ip_type == AMDGPU_HW_IP_DMA) { 1072 if (family_id == AMDGPU_FAMILY_SI) 1073 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1074 sdma_write_length); 1075 else 1076 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1077 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 1078 pm4[i++] = 0xffffffff & bo_mc; 1079 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1080 if (family_id >= AMDGPU_FAMILY_AI) 1081 pm4[i++] = sdma_write_length - 1; 1082 else if (family_id != AMDGPU_FAMILY_SI) 1083 pm4[i++] = sdma_write_length; 1084 while(j++ < sdma_write_length) 1085 pm4[i++] = 0xdeadbeaf; 1086 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1087 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1088 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 1089 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1090 pm4[i++] = 0xfffffffc & bo_mc; 1091 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1092 while(j++ < sdma_write_length) 1093 pm4[i++] = 0xdeadbeaf; 1094 } 1095 1096 amdgpu_test_exec_cs_helper(context_handle, 1097 ip_type, ring_id, 1098 i, pm4, 1099 1, resources, 1100 ib_info, ibs_request); 1101 1102 /* verify if SDMA test result meets with expected */ 1103 i = 0; 1104 while(i < sdma_write_length) { 1105 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1106 } 1107 1108 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1109 sdma_write_length * sizeof(uint32_t)); 1110 CU_ASSERT_EQUAL(r, 0); 1111 loop++; 1112 } 1113 } 1114 /* clean resources */ 1115 free(resources); 1116 free(ibs_request); 1117 free(ib_info); 1118 free(pm4); 1119 1120 /* end of test */ 1121 r = amdgpu_cs_ctx_free(context_handle); 1122 CU_ASSERT_EQUAL(r, 0); 1123} 1124 1125static void amdgpu_command_submission_sdma_write_linear(void) 1126{ 1127 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 1128} 1129 1130static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 1131{ 1132 const int sdma_write_length = 1024 * 1024; 1133 const int pm4_dw = 256; 1134 amdgpu_context_handle context_handle; 1135 amdgpu_bo_handle bo; 1136 amdgpu_bo_handle *resources; 1137 uint32_t *pm4; 1138 struct amdgpu_cs_ib_info *ib_info; 1139 struct amdgpu_cs_request *ibs_request; 1140 uint64_t bo_mc; 1141 volatile uint32_t *bo_cpu; 1142 int i, j, r, loop, ring_id; 1143 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1144 amdgpu_va_handle va_handle; 1145 struct drm_amdgpu_info_hw_ip hw_ip_info; 1146 1147 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1148 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1149 1150 ib_info = calloc(1, sizeof(*ib_info)); 1151 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1152 1153 ibs_request = calloc(1, sizeof(*ibs_request)); 1154 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1155 1156 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1157 CU_ASSERT_EQUAL(r, 0); 1158 1159 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1160 CU_ASSERT_EQUAL(r, 0); 1161 1162 /* prepare resource */ 1163 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1164 CU_ASSERT_NOT_EQUAL(resources, NULL); 1165 1166 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1167 loop = 0; 1168 while(loop < 2) { 1169 /* allocate UC bo for sDMA use */ 1170 r = amdgpu_bo_alloc_and_map(device_handle, 1171 sdma_write_length, 4096, 1172 AMDGPU_GEM_DOMAIN_GTT, 1173 gtt_flags[loop], &bo, (void**)&bo_cpu, 1174 &bo_mc, &va_handle); 1175 CU_ASSERT_EQUAL(r, 0); 1176 1177 /* clear bo */ 1178 memset((void*)bo_cpu, 0, sdma_write_length); 1179 1180 resources[0] = bo; 1181 1182 /* fulfill PM4: test DMA const fill */ 1183 i = j = 0; 1184 if (ip_type == AMDGPU_HW_IP_DMA) { 1185 if (family_id == AMDGPU_FAMILY_SI) { 1186 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 1187 0, 0, 0, 1188 sdma_write_length / 4); 1189 pm4[i++] = 0xfffffffc & bo_mc; 1190 pm4[i++] = 0xdeadbeaf; 1191 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 1192 } else { 1193 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 1194 SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 1195 pm4[i++] = 0xffffffff & bo_mc; 1196 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1197 pm4[i++] = 0xdeadbeaf; 1198 if (family_id >= AMDGPU_FAMILY_AI) 1199 pm4[i++] = sdma_write_length - 1; 1200 else 1201 pm4[i++] = sdma_write_length; 1202 } 1203 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1204 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1205 if (family_id == AMDGPU_FAMILY_SI) { 1206 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1207 pm4[i++] = 0xdeadbeaf; 1208 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1209 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1210 PACKET3_DMA_DATA_SI_SRC_SEL(2) | 1211 PACKET3_DMA_DATA_SI_CP_SYNC; 1212 pm4[i++] = 0xffffffff & bo_mc; 1213 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1214 pm4[i++] = sdma_write_length; 1215 } else { 1216 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1217 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1218 PACKET3_DMA_DATA_DST_SEL(0) | 1219 PACKET3_DMA_DATA_SRC_SEL(2) | 1220 PACKET3_DMA_DATA_CP_SYNC; 1221 pm4[i++] = 0xdeadbeaf; 1222 pm4[i++] = 0; 1223 pm4[i++] = 0xfffffffc & bo_mc; 1224 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1225 pm4[i++] = sdma_write_length; 1226 } 1227 } 1228 1229 amdgpu_test_exec_cs_helper(context_handle, 1230 ip_type, ring_id, 1231 i, pm4, 1232 1, resources, 1233 ib_info, ibs_request); 1234 1235 /* verify if SDMA test result meets with expected */ 1236 i = 0; 1237 while(i < (sdma_write_length / 4)) { 1238 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1239 } 1240 1241 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1242 sdma_write_length); 1243 CU_ASSERT_EQUAL(r, 0); 1244 loop++; 1245 } 1246 } 1247 /* clean resources */ 1248 free(resources); 1249 free(ibs_request); 1250 free(ib_info); 1251 free(pm4); 1252 1253 /* end of test */ 1254 r = amdgpu_cs_ctx_free(context_handle); 1255 CU_ASSERT_EQUAL(r, 0); 1256} 1257 1258static void amdgpu_command_submission_sdma_const_fill(void) 1259{ 1260 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 1261} 1262 1263static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 1264{ 1265 const int sdma_write_length = 1024; 1266 const int pm4_dw = 256; 1267 amdgpu_context_handle context_handle; 1268 amdgpu_bo_handle bo1, bo2; 1269 amdgpu_bo_handle *resources; 1270 uint32_t *pm4; 1271 struct amdgpu_cs_ib_info *ib_info; 1272 struct amdgpu_cs_request *ibs_request; 1273 uint64_t bo1_mc, bo2_mc; 1274 volatile unsigned char *bo1_cpu, *bo2_cpu; 1275 int i, j, r, loop1, loop2, ring_id; 1276 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1277 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 1278 struct drm_amdgpu_info_hw_ip hw_ip_info; 1279 1280 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1281 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1282 1283 ib_info = calloc(1, sizeof(*ib_info)); 1284 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1285 1286 ibs_request = calloc(1, sizeof(*ibs_request)); 1287 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1288 1289 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1290 CU_ASSERT_EQUAL(r, 0); 1291 1292 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1293 CU_ASSERT_EQUAL(r, 0); 1294 1295 /* prepare resource */ 1296 resources = calloc(2, sizeof(amdgpu_bo_handle)); 1297 CU_ASSERT_NOT_EQUAL(resources, NULL); 1298 1299 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1300 loop1 = loop2 = 0; 1301 /* run 9 circle to test all mapping combination */ 1302 while(loop1 < 2) { 1303 while(loop2 < 2) { 1304 /* allocate UC bo1for sDMA use */ 1305 r = amdgpu_bo_alloc_and_map(device_handle, 1306 sdma_write_length, 4096, 1307 AMDGPU_GEM_DOMAIN_GTT, 1308 gtt_flags[loop1], &bo1, 1309 (void**)&bo1_cpu, &bo1_mc, 1310 &bo1_va_handle); 1311 CU_ASSERT_EQUAL(r, 0); 1312 1313 /* set bo1 */ 1314 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 1315 1316 /* allocate UC bo2 for sDMA use */ 1317 r = amdgpu_bo_alloc_and_map(device_handle, 1318 sdma_write_length, 4096, 1319 AMDGPU_GEM_DOMAIN_GTT, 1320 gtt_flags[loop2], &bo2, 1321 (void**)&bo2_cpu, &bo2_mc, 1322 &bo2_va_handle); 1323 CU_ASSERT_EQUAL(r, 0); 1324 1325 /* clear bo2 */ 1326 memset((void*)bo2_cpu, 0, sdma_write_length); 1327 1328 resources[0] = bo1; 1329 resources[1] = bo2; 1330 1331 /* fulfill PM4: test DMA copy linear */ 1332 i = j = 0; 1333 if (ip_type == AMDGPU_HW_IP_DMA) { 1334 if (family_id == AMDGPU_FAMILY_SI) { 1335 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 1336 0, 0, 0, 1337 sdma_write_length); 1338 pm4[i++] = 0xffffffff & bo2_mc; 1339 pm4[i++] = 0xffffffff & bo1_mc; 1340 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1341 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1342 } else { 1343 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 1344 SDMA_COPY_SUB_OPCODE_LINEAR, 1345 0); 1346 if (family_id >= AMDGPU_FAMILY_AI) 1347 pm4[i++] = sdma_write_length - 1; 1348 else 1349 pm4[i++] = sdma_write_length; 1350 pm4[i++] = 0; 1351 pm4[i++] = 0xffffffff & bo1_mc; 1352 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1353 pm4[i++] = 0xffffffff & bo2_mc; 1354 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1355 } 1356 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1357 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1358 if (family_id == AMDGPU_FAMILY_SI) { 1359 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1360 pm4[i++] = 0xfffffffc & bo1_mc; 1361 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1362 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1363 PACKET3_DMA_DATA_SI_SRC_SEL(0) | 1364 PACKET3_DMA_DATA_SI_CP_SYNC | 1365 (0xffff00000000 & bo1_mc) >> 32; 1366 pm4[i++] = 0xfffffffc & bo2_mc; 1367 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1368 pm4[i++] = sdma_write_length; 1369 } else { 1370 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1371 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1372 PACKET3_DMA_DATA_DST_SEL(0) | 1373 PACKET3_DMA_DATA_SRC_SEL(0) | 1374 PACKET3_DMA_DATA_CP_SYNC; 1375 pm4[i++] = 0xfffffffc & bo1_mc; 1376 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1377 pm4[i++] = 0xfffffffc & bo2_mc; 1378 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1379 pm4[i++] = sdma_write_length; 1380 } 1381 } 1382 1383 amdgpu_test_exec_cs_helper(context_handle, 1384 ip_type, ring_id, 1385 i, pm4, 1386 2, resources, 1387 ib_info, ibs_request); 1388 1389 /* verify if SDMA test result meets with expected */ 1390 i = 0; 1391 while(i < sdma_write_length) { 1392 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 1393 } 1394 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 1395 sdma_write_length); 1396 CU_ASSERT_EQUAL(r, 0); 1397 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 1398 sdma_write_length); 1399 CU_ASSERT_EQUAL(r, 0); 1400 loop2++; 1401 } 1402 loop1++; 1403 } 1404 } 1405 /* clean resources */ 1406 free(resources); 1407 free(ibs_request); 1408 free(ib_info); 1409 free(pm4); 1410 1411 /* end of test */ 1412 r = amdgpu_cs_ctx_free(context_handle); 1413 CU_ASSERT_EQUAL(r, 0); 1414} 1415 1416static void amdgpu_command_submission_sdma_copy_linear(void) 1417{ 1418 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 1419} 1420 1421static void amdgpu_command_submission_sdma(void) 1422{ 1423 amdgpu_command_submission_sdma_write_linear(); 1424 amdgpu_command_submission_sdma_const_fill(); 1425 amdgpu_command_submission_sdma_copy_linear(); 1426} 1427 1428static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1429{ 1430 amdgpu_context_handle context_handle; 1431 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1432 void *ib_result_cpu, *ib_result_ce_cpu; 1433 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1434 struct amdgpu_cs_request ibs_request[2] = {0}; 1435 struct amdgpu_cs_ib_info ib_info[2]; 1436 struct amdgpu_cs_fence fence_status[2] = {0}; 1437 uint32_t *ptr; 1438 uint32_t expired; 1439 amdgpu_bo_list_handle bo_list; 1440 amdgpu_va_handle va_handle, va_handle_ce; 1441 int r; 1442 int i = 0, ib_cs_num = 2; 1443 1444 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1445 CU_ASSERT_EQUAL(r, 0); 1446 1447 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1448 AMDGPU_GEM_DOMAIN_GTT, 0, 1449 &ib_result_handle, &ib_result_cpu, 1450 &ib_result_mc_address, &va_handle); 1451 CU_ASSERT_EQUAL(r, 0); 1452 1453 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1454 AMDGPU_GEM_DOMAIN_GTT, 0, 1455 &ib_result_ce_handle, &ib_result_ce_cpu, 1456 &ib_result_ce_mc_address, &va_handle_ce); 1457 CU_ASSERT_EQUAL(r, 0); 1458 1459 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1460 ib_result_ce_handle, &bo_list); 1461 CU_ASSERT_EQUAL(r, 0); 1462 1463 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1464 1465 /* IT_SET_CE_DE_COUNTERS */ 1466 ptr = ib_result_ce_cpu; 1467 if (family_id != AMDGPU_FAMILY_SI) { 1468 ptr[i++] = 0xc0008900; 1469 ptr[i++] = 0; 1470 } 1471 ptr[i++] = 0xc0008400; 1472 ptr[i++] = 1; 1473 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1474 ib_info[0].size = i; 1475 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1476 1477 /* IT_WAIT_ON_CE_COUNTER */ 1478 ptr = ib_result_cpu; 1479 ptr[0] = 0xc0008600; 1480 ptr[1] = 0x00000001; 1481 ib_info[1].ib_mc_address = ib_result_mc_address; 1482 ib_info[1].size = 2; 1483 1484 for (i = 0; i < ib_cs_num; i++) { 1485 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1486 ibs_request[i].number_of_ibs = 2; 1487 ibs_request[i].ibs = ib_info; 1488 ibs_request[i].resources = bo_list; 1489 ibs_request[i].fence_info.handle = NULL; 1490 } 1491 1492 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1493 1494 CU_ASSERT_EQUAL(r, 0); 1495 1496 for (i = 0; i < ib_cs_num; i++) { 1497 fence_status[i].context = context_handle; 1498 fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1499 fence_status[i].fence = ibs_request[i].seq_no; 1500 } 1501 1502 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1503 AMDGPU_TIMEOUT_INFINITE, 1504 &expired, NULL); 1505 CU_ASSERT_EQUAL(r, 0); 1506 1507 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1508 ib_result_mc_address, 4096); 1509 CU_ASSERT_EQUAL(r, 0); 1510 1511 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 1512 ib_result_ce_mc_address, 4096); 1513 CU_ASSERT_EQUAL(r, 0); 1514 1515 r = amdgpu_bo_list_destroy(bo_list); 1516 CU_ASSERT_EQUAL(r, 0); 1517 1518 r = amdgpu_cs_ctx_free(context_handle); 1519 CU_ASSERT_EQUAL(r, 0); 1520} 1521 1522static void amdgpu_command_submission_multi_fence(void) 1523{ 1524 amdgpu_command_submission_multi_fence_wait_all(true); 1525 amdgpu_command_submission_multi_fence_wait_all(false); 1526} 1527 1528static void amdgpu_userptr_test(void) 1529{ 1530 int i, r, j; 1531 uint32_t *pm4 = NULL; 1532 uint64_t bo_mc; 1533 void *ptr = NULL; 1534 int pm4_dw = 256; 1535 int sdma_write_length = 4; 1536 amdgpu_bo_handle handle; 1537 amdgpu_context_handle context_handle; 1538 struct amdgpu_cs_ib_info *ib_info; 1539 struct amdgpu_cs_request *ibs_request; 1540 amdgpu_bo_handle buf_handle; 1541 amdgpu_va_handle va_handle; 1542 1543 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1544 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1545 1546 ib_info = calloc(1, sizeof(*ib_info)); 1547 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1548 1549 ibs_request = calloc(1, sizeof(*ibs_request)); 1550 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1551 1552 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1553 CU_ASSERT_EQUAL(r, 0); 1554 1555 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 1556 CU_ASSERT_NOT_EQUAL(ptr, NULL); 1557 memset(ptr, 0, BUFFER_SIZE); 1558 1559 r = amdgpu_create_bo_from_user_mem(device_handle, 1560 ptr, BUFFER_SIZE, &buf_handle); 1561 CU_ASSERT_EQUAL(r, 0); 1562 1563 r = amdgpu_va_range_alloc(device_handle, 1564 amdgpu_gpu_va_range_general, 1565 BUFFER_SIZE, 1, 0, &bo_mc, 1566 &va_handle, 0); 1567 CU_ASSERT_EQUAL(r, 0); 1568 1569 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 1570 CU_ASSERT_EQUAL(r, 0); 1571 1572 handle = buf_handle; 1573 1574 j = i = 0; 1575 1576 if (family_id == AMDGPU_FAMILY_SI) 1577 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1578 sdma_write_length); 1579 else 1580 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1581 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 1582 pm4[i++] = 0xffffffff & bo_mc; 1583 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1584 if (family_id >= AMDGPU_FAMILY_AI) 1585 pm4[i++] = sdma_write_length - 1; 1586 else if (family_id != AMDGPU_FAMILY_SI) 1587 pm4[i++] = sdma_write_length; 1588 1589 while (j++ < sdma_write_length) 1590 pm4[i++] = 0xdeadbeaf; 1591 1592 if (!fork()) { 1593 pm4[0] = 0x0; 1594 exit(0); 1595 } 1596 1597 amdgpu_test_exec_cs_helper(context_handle, 1598 AMDGPU_HW_IP_DMA, 0, 1599 i, pm4, 1600 1, &handle, 1601 ib_info, ibs_request); 1602 i = 0; 1603 while (i < sdma_write_length) { 1604 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 1605 } 1606 free(ibs_request); 1607 free(ib_info); 1608 free(pm4); 1609 1610 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 1611 CU_ASSERT_EQUAL(r, 0); 1612 r = amdgpu_va_range_free(va_handle); 1613 CU_ASSERT_EQUAL(r, 0); 1614 r = amdgpu_bo_free(buf_handle); 1615 CU_ASSERT_EQUAL(r, 0); 1616 free(ptr); 1617 1618 r = amdgpu_cs_ctx_free(context_handle); 1619 CU_ASSERT_EQUAL(r, 0); 1620 1621 wait(NULL); 1622} 1623 1624static void amdgpu_sync_dependency_test(void) 1625{ 1626 amdgpu_context_handle context_handle[2]; 1627 amdgpu_bo_handle ib_result_handle; 1628 void *ib_result_cpu; 1629 uint64_t ib_result_mc_address; 1630 struct amdgpu_cs_request ibs_request; 1631 struct amdgpu_cs_ib_info ib_info; 1632 struct amdgpu_cs_fence fence_status; 1633 uint32_t expired; 1634 int i, j, r; 1635 amdgpu_bo_list_handle bo_list; 1636 amdgpu_va_handle va_handle; 1637 static uint32_t *ptr; 1638 uint64_t seq_no; 1639 1640 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 1641 CU_ASSERT_EQUAL(r, 0); 1642 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 1643 CU_ASSERT_EQUAL(r, 0); 1644 1645 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 1646 AMDGPU_GEM_DOMAIN_GTT, 0, 1647 &ib_result_handle, &ib_result_cpu, 1648 &ib_result_mc_address, &va_handle); 1649 CU_ASSERT_EQUAL(r, 0); 1650 1651 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 1652 &bo_list); 1653 CU_ASSERT_EQUAL(r, 0); 1654 1655 ptr = ib_result_cpu; 1656 i = 0; 1657 1658 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 1659 1660 /* Dispatch minimal init config and verify it's executed */ 1661 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 1662 ptr[i++] = 0x80000000; 1663 ptr[i++] = 0x80000000; 1664 1665 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 1666 ptr[i++] = 0x80000000; 1667 1668 1669 /* Program compute regs */ 1670 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 1671 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1672 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 1673 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 1674 1675 1676 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 1677 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 1678 /* 1679 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 1680 SGPRS = 1 1681 PRIORITY = 0 1682 FLOAT_MODE = 192 (0xc0) 1683 PRIV = 0 1684 DX10_CLAMP = 1 1685 DEBUG_MODE = 0 1686 IEEE_MODE = 0 1687 BULKY = 0 1688 CDBG_USER = 0 1689 * 1690 */ 1691 ptr[i++] = 0x002c0040; 1692 1693 1694 /* 1695 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 1696 USER_SGPR = 8 1697 TRAP_PRESENT = 0 1698 TGID_X_EN = 0 1699 TGID_Y_EN = 0 1700 TGID_Z_EN = 0 1701 TG_SIZE_EN = 0 1702 TIDIG_COMP_CNT = 0 1703 EXCP_EN_MSB = 0 1704 LDS_SIZE = 0 1705 EXCP_EN = 0 1706 * 1707 */ 1708 ptr[i++] = 0x00000010; 1709 1710 1711/* 1712 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 1713 WAVESIZE = 0 1714 * 1715 */ 1716 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 1717 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 1718 ptr[i++] = 0x00000100; 1719 1720 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 1721 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 1722 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 1723 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 1724 1725 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 1726 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 1727 ptr[i++] = 0; 1728 1729 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 1730 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 1731 ptr[i++] = 1; 1732 ptr[i++] = 1; 1733 ptr[i++] = 1; 1734 1735 1736 /* Dispatch */ 1737 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1738 ptr[i++] = 1; 1739 ptr[i++] = 1; 1740 ptr[i++] = 1; 1741 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 1742 1743 1744 while (i & 7) 1745 ptr[i++] = 0xffff1000; /* type3 nop packet */ 1746 1747 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 1748 ib_info.ib_mc_address = ib_result_mc_address; 1749 ib_info.size = i; 1750 1751 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 1752 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 1753 ibs_request.ring = 0; 1754 ibs_request.number_of_ibs = 1; 1755 ibs_request.ibs = &ib_info; 1756 ibs_request.resources = bo_list; 1757 ibs_request.fence_info.handle = NULL; 1758 1759 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 1760 CU_ASSERT_EQUAL(r, 0); 1761 seq_no = ibs_request.seq_no; 1762 1763 1764 1765 /* Prepare second command with dependency on the first */ 1766 j = i; 1767 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 1768 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1769 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 1770 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 1771 ptr[i++] = 99; 1772 1773 while (i & 7) 1774 ptr[i++] = 0xffff1000; /* type3 nop packet */ 1775 1776 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 1777 ib_info.ib_mc_address = ib_result_mc_address + j * 4; 1778 ib_info.size = i - j; 1779 1780 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 1781 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 1782 ibs_request.ring = 0; 1783 ibs_request.number_of_ibs = 1; 1784 ibs_request.ibs = &ib_info; 1785 ibs_request.resources = bo_list; 1786 ibs_request.fence_info.handle = NULL; 1787 1788 ibs_request.number_of_dependencies = 1; 1789 1790 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 1791 ibs_request.dependencies[0].context = context_handle[1]; 1792 ibs_request.dependencies[0].ip_instance = 0; 1793 ibs_request.dependencies[0].ring = 0; 1794 ibs_request.dependencies[0].fence = seq_no; 1795 1796 1797 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 1798 CU_ASSERT_EQUAL(r, 0); 1799 1800 1801 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 1802 fence_status.context = context_handle[0]; 1803 fence_status.ip_type = AMDGPU_HW_IP_GFX; 1804 fence_status.ip_instance = 0; 1805 fence_status.ring = 0; 1806 fence_status.fence = ibs_request.seq_no; 1807 1808 r = amdgpu_cs_query_fence_status(&fence_status, 1809 AMDGPU_TIMEOUT_INFINITE,0, &expired); 1810 CU_ASSERT_EQUAL(r, 0); 1811 1812 /* Expect the second command to wait for shader to complete */ 1813 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 1814 1815 r = amdgpu_bo_list_destroy(bo_list); 1816 CU_ASSERT_EQUAL(r, 0); 1817 1818 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1819 ib_result_mc_address, 4096); 1820 CU_ASSERT_EQUAL(r, 0); 1821 1822 r = amdgpu_cs_ctx_free(context_handle[0]); 1823 CU_ASSERT_EQUAL(r, 0); 1824 r = amdgpu_cs_ctx_free(context_handle[1]); 1825 CU_ASSERT_EQUAL(r, 0); 1826 1827 free(ibs_request.dependencies); 1828} 1829