basic_tests.c revision d8807b2f
1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22*/ 23 24#ifdef HAVE_CONFIG_H 25#include "config.h" 26#endif 27 28#include <stdio.h> 29#include <stdlib.h> 30#include <unistd.h> 31#ifdef HAVE_ALLOCA_H 32# include <alloca.h> 33#endif 34 35#include "CUnit/Basic.h" 36 37#include "amdgpu_test.h" 38#include "amdgpu_drm.h" 39 40static amdgpu_device_handle device_handle; 41static uint32_t major_version; 42static uint32_t minor_version; 43static uint32_t family_id; 44 45static void amdgpu_query_info_test(void); 46static void amdgpu_memory_alloc(void); 47static void amdgpu_command_submission_gfx(void); 48static void amdgpu_command_submission_compute(void); 49static void amdgpu_command_submission_multi_fence(void); 50static void amdgpu_command_submission_sdma(void); 51static void amdgpu_userptr_test(void); 52static void amdgpu_semaphore_test(void); 53 54static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 55static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 56static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 57 58CU_TestInfo basic_tests[] = { 59 { "Query Info Test", amdgpu_query_info_test }, 60 { "Memory alloc Test", amdgpu_memory_alloc }, 61 { "Userptr Test", amdgpu_userptr_test }, 62 { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 63 { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 64 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 65 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 66 { "SW semaphore Test", amdgpu_semaphore_test }, 67 CU_TEST_INFO_NULL, 68}; 69#define BUFFER_SIZE (8 * 1024) 70#define SDMA_PKT_HEADER_op_offset 0 71#define SDMA_PKT_HEADER_op_mask 0x000000FF 72#define SDMA_PKT_HEADER_op_shift 0 73#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 74#define SDMA_OPCODE_CONSTANT_FILL 11 75# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 76 /* 0 = byte fill 77 * 2 = DW fill 78 */ 79#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 80 (((sub_op) & 0xFF) << 8) | \ 81 (((op) & 0xFF) << 0)) 82#define SDMA_OPCODE_WRITE 2 83# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 84# define SDMA_WRTIE_SUB_OPCODE_TILED 1 85 86#define SDMA_OPCODE_COPY 1 87# define SDMA_COPY_SUB_OPCODE_LINEAR 0 88 89#define GFX_COMPUTE_NOP 0xffff1000 90#define SDMA_NOP 0x0 91 92/* PM4 */ 93#define PACKET_TYPE0 0 94#define PACKET_TYPE1 1 95#define PACKET_TYPE2 2 96#define PACKET_TYPE3 3 97 98#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 99#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 100#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 101#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 102#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 103 ((reg) & 0xFFFF) | \ 104 ((n) & 0x3FFF) << 16) 105#define CP_PACKET2 0x80000000 106#define PACKET2_PAD_SHIFT 0 107#define PACKET2_PAD_MASK (0x3fffffff << 0) 108 109#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 110 111#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 112 (((op) & 0xFF) << 8) | \ 113 ((n) & 0x3FFF) << 16) 114 115/* Packet 3 types */ 116#define PACKET3_NOP 0x10 117 118#define PACKET3_WRITE_DATA 0x37 119#define WRITE_DATA_DST_SEL(x) ((x) << 8) 120 /* 0 - register 121 * 1 - memory (sync - via GRBM) 122 * 2 - gl2 123 * 3 - gds 124 * 4 - reserved 125 * 5 - memory (async - direct) 126 */ 127#define WR_ONE_ADDR (1 << 16) 128#define WR_CONFIRM (1 << 20) 129#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 130 /* 0 - LRU 131 * 1 - Stream 132 */ 133#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 134 /* 0 - me 135 * 1 - pfp 136 * 2 - ce 137 */ 138 139#define PACKET3_DMA_DATA 0x50 140/* 1. header 141 * 2. CONTROL 142 * 3. SRC_ADDR_LO or DATA [31:0] 143 * 4. SRC_ADDR_HI [31:0] 144 * 5. DST_ADDR_LO [31:0] 145 * 6. DST_ADDR_HI [7:0] 146 * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 147 */ 148/* CONTROL */ 149# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 150 /* 0 - ME 151 * 1 - PFP 152 */ 153# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 154 /* 0 - LRU 155 * 1 - Stream 156 * 2 - Bypass 157 */ 158# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 159# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 160 /* 0 - DST_ADDR using DAS 161 * 1 - GDS 162 * 3 - DST_ADDR using L2 163 */ 164# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 165 /* 0 - LRU 166 * 1 - Stream 167 * 2 - Bypass 168 */ 169# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 170# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 171 /* 0 - SRC_ADDR using SAS 172 * 1 - GDS 173 * 2 - DATA 174 * 3 - SRC_ADDR using L2 175 */ 176# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 177/* COMMAND */ 178# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 179# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 180 /* 0 - none 181 * 1 - 8 in 16 182 * 2 - 8 in 32 183 * 3 - 8 in 64 184 */ 185# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 186 /* 0 - none 187 * 1 - 8 in 16 188 * 2 - 8 in 32 189 * 3 - 8 in 64 190 */ 191# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 192 /* 0 - memory 193 * 1 - register 194 */ 195# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 196 /* 0 - memory 197 * 1 - register 198 */ 199# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 200# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 201# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 202 203#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 204 (((b) & 0x1) << 26) | \ 205 (((t) & 0x1) << 23) | \ 206 (((s) & 0x1) << 22) | \ 207 (((cnt) & 0xFFFFF) << 0)) 208#define SDMA_OPCODE_COPY_SI 3 209#define SDMA_OPCODE_CONSTANT_FILL_SI 13 210#define SDMA_NOP_SI 0xf 211#define GFX_COMPUTE_NOP_SI 0x80000000 212#define PACKET3_DMA_DATA_SI 0x41 213# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 214 /* 0 - ME 215 * 1 - PFP 216 */ 217# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 218 /* 0 - DST_ADDR using DAS 219 * 1 - GDS 220 * 3 - DST_ADDR using L2 221 */ 222# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 223 /* 0 - SRC_ADDR using SAS 224 * 1 - GDS 225 * 2 - DATA 226 * 3 - SRC_ADDR using L2 227 */ 228# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 229 230int suite_basic_tests_init(void) 231{ 232 struct amdgpu_gpu_info gpu_info = {0}; 233 int r; 234 235 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 236 &minor_version, &device_handle); 237 238 if (r) { 239 if ((r == -EACCES) && (errno == EACCES)) 240 printf("\n\nError:%s. " 241 "Hint:Try to run this test program as root.", 242 strerror(errno)); 243 return CUE_SINIT_FAILED; 244 } 245 246 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 247 if (r) 248 return CUE_SINIT_FAILED; 249 250 family_id = gpu_info.family_id; 251 252 return CUE_SUCCESS; 253} 254 255int suite_basic_tests_clean(void) 256{ 257 int r = amdgpu_device_deinitialize(device_handle); 258 259 if (r == 0) 260 return CUE_SUCCESS; 261 else 262 return CUE_SCLEAN_FAILED; 263} 264 265static void amdgpu_query_info_test(void) 266{ 267 struct amdgpu_gpu_info gpu_info = {0}; 268 uint32_t version, feature; 269 int r; 270 271 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 272 CU_ASSERT_EQUAL(r, 0); 273 274 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 275 0, &version, &feature); 276 CU_ASSERT_EQUAL(r, 0); 277} 278 279static void amdgpu_memory_alloc(void) 280{ 281 amdgpu_bo_handle bo; 282 amdgpu_va_handle va_handle; 283 uint64_t bo_mc; 284 int r; 285 286 /* Test visible VRAM */ 287 bo = gpu_mem_alloc(device_handle, 288 4096, 4096, 289 AMDGPU_GEM_DOMAIN_VRAM, 290 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 291 &bo_mc, &va_handle); 292 293 r = gpu_mem_free(bo, va_handle, bo_mc, 4096); 294 CU_ASSERT_EQUAL(r, 0); 295 296 /* Test invisible VRAM */ 297 bo = gpu_mem_alloc(device_handle, 298 4096, 4096, 299 AMDGPU_GEM_DOMAIN_VRAM, 300 AMDGPU_GEM_CREATE_NO_CPU_ACCESS, 301 &bo_mc, &va_handle); 302 303 r = gpu_mem_free(bo, va_handle, bo_mc, 4096); 304 CU_ASSERT_EQUAL(r, 0); 305 306 /* Test GART Cacheable */ 307 bo = gpu_mem_alloc(device_handle, 308 4096, 4096, 309 AMDGPU_GEM_DOMAIN_GTT, 310 0, &bo_mc, &va_handle); 311 312 r = gpu_mem_free(bo, va_handle, bo_mc, 4096); 313 CU_ASSERT_EQUAL(r, 0); 314 315 /* Test GART USWC */ 316 bo = gpu_mem_alloc(device_handle, 317 4096, 4096, 318 AMDGPU_GEM_DOMAIN_GTT, 319 AMDGPU_GEM_CREATE_CPU_GTT_USWC, 320 &bo_mc, &va_handle); 321 322 r = gpu_mem_free(bo, va_handle, bo_mc, 4096); 323 CU_ASSERT_EQUAL(r, 0); 324} 325 326static void amdgpu_command_submission_gfx_separate_ibs(void) 327{ 328 amdgpu_context_handle context_handle; 329 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 330 void *ib_result_cpu, *ib_result_ce_cpu; 331 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 332 struct amdgpu_cs_request ibs_request = {0}; 333 struct amdgpu_cs_ib_info ib_info[2]; 334 struct amdgpu_cs_fence fence_status = {0}; 335 uint32_t *ptr; 336 uint32_t expired; 337 amdgpu_bo_list_handle bo_list; 338 amdgpu_va_handle va_handle, va_handle_ce; 339 int r, i = 0; 340 341 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 342 CU_ASSERT_EQUAL(r, 0); 343 344 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 345 AMDGPU_GEM_DOMAIN_GTT, 0, 346 &ib_result_handle, &ib_result_cpu, 347 &ib_result_mc_address, &va_handle); 348 CU_ASSERT_EQUAL(r, 0); 349 350 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 351 AMDGPU_GEM_DOMAIN_GTT, 0, 352 &ib_result_ce_handle, &ib_result_ce_cpu, 353 &ib_result_ce_mc_address, &va_handle_ce); 354 CU_ASSERT_EQUAL(r, 0); 355 356 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 357 ib_result_ce_handle, &bo_list); 358 CU_ASSERT_EQUAL(r, 0); 359 360 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 361 362 /* IT_SET_CE_DE_COUNTERS */ 363 ptr = ib_result_ce_cpu; 364 if (family_id != AMDGPU_FAMILY_SI) { 365 ptr[i++] = 0xc0008900; 366 ptr[i++] = 0; 367 } 368 ptr[i++] = 0xc0008400; 369 ptr[i++] = 1; 370 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 371 ib_info[0].size = i; 372 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 373 374 /* IT_WAIT_ON_CE_COUNTER */ 375 ptr = ib_result_cpu; 376 ptr[0] = 0xc0008600; 377 ptr[1] = 0x00000001; 378 ib_info[1].ib_mc_address = ib_result_mc_address; 379 ib_info[1].size = 2; 380 381 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 382 ibs_request.number_of_ibs = 2; 383 ibs_request.ibs = ib_info; 384 ibs_request.resources = bo_list; 385 ibs_request.fence_info.handle = NULL; 386 387 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 388 389 CU_ASSERT_EQUAL(r, 0); 390 391 fence_status.context = context_handle; 392 fence_status.ip_type = AMDGPU_HW_IP_GFX; 393 fence_status.ip_instance = 0; 394 fence_status.fence = ibs_request.seq_no; 395 396 r = amdgpu_cs_query_fence_status(&fence_status, 397 AMDGPU_TIMEOUT_INFINITE, 398 0, &expired); 399 CU_ASSERT_EQUAL(r, 0); 400 401 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 402 ib_result_mc_address, 4096); 403 CU_ASSERT_EQUAL(r, 0); 404 405 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 406 ib_result_ce_mc_address, 4096); 407 CU_ASSERT_EQUAL(r, 0); 408 409 r = amdgpu_bo_list_destroy(bo_list); 410 CU_ASSERT_EQUAL(r, 0); 411 412 r = amdgpu_cs_ctx_free(context_handle); 413 CU_ASSERT_EQUAL(r, 0); 414 415} 416 417static void amdgpu_command_submission_gfx_shared_ib(void) 418{ 419 amdgpu_context_handle context_handle; 420 amdgpu_bo_handle ib_result_handle; 421 void *ib_result_cpu; 422 uint64_t ib_result_mc_address; 423 struct amdgpu_cs_request ibs_request = {0}; 424 struct amdgpu_cs_ib_info ib_info[2]; 425 struct amdgpu_cs_fence fence_status = {0}; 426 uint32_t *ptr; 427 uint32_t expired; 428 amdgpu_bo_list_handle bo_list; 429 amdgpu_va_handle va_handle; 430 int r, i = 0; 431 432 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 433 CU_ASSERT_EQUAL(r, 0); 434 435 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 436 AMDGPU_GEM_DOMAIN_GTT, 0, 437 &ib_result_handle, &ib_result_cpu, 438 &ib_result_mc_address, &va_handle); 439 CU_ASSERT_EQUAL(r, 0); 440 441 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 442 &bo_list); 443 CU_ASSERT_EQUAL(r, 0); 444 445 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 446 447 /* IT_SET_CE_DE_COUNTERS */ 448 ptr = ib_result_cpu; 449 if (family_id != AMDGPU_FAMILY_SI) { 450 ptr[i++] = 0xc0008900; 451 ptr[i++] = 0; 452 } 453 ptr[i++] = 0xc0008400; 454 ptr[i++] = 1; 455 ib_info[0].ib_mc_address = ib_result_mc_address; 456 ib_info[0].size = i; 457 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 458 459 ptr = (uint32_t *)ib_result_cpu + 4; 460 ptr[0] = 0xc0008600; 461 ptr[1] = 0x00000001; 462 ib_info[1].ib_mc_address = ib_result_mc_address + 16; 463 ib_info[1].size = 2; 464 465 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 466 ibs_request.number_of_ibs = 2; 467 ibs_request.ibs = ib_info; 468 ibs_request.resources = bo_list; 469 ibs_request.fence_info.handle = NULL; 470 471 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 472 473 CU_ASSERT_EQUAL(r, 0); 474 475 fence_status.context = context_handle; 476 fence_status.ip_type = AMDGPU_HW_IP_GFX; 477 fence_status.ip_instance = 0; 478 fence_status.fence = ibs_request.seq_no; 479 480 r = amdgpu_cs_query_fence_status(&fence_status, 481 AMDGPU_TIMEOUT_INFINITE, 482 0, &expired); 483 CU_ASSERT_EQUAL(r, 0); 484 485 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 486 ib_result_mc_address, 4096); 487 CU_ASSERT_EQUAL(r, 0); 488 489 r = amdgpu_bo_list_destroy(bo_list); 490 CU_ASSERT_EQUAL(r, 0); 491 492 r = amdgpu_cs_ctx_free(context_handle); 493 CU_ASSERT_EQUAL(r, 0); 494} 495 496static void amdgpu_command_submission_gfx_cp_write_data(void) 497{ 498 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 499} 500 501static void amdgpu_command_submission_gfx_cp_const_fill(void) 502{ 503 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 504} 505 506static void amdgpu_command_submission_gfx_cp_copy_data(void) 507{ 508 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 509} 510 511static void amdgpu_command_submission_gfx(void) 512{ 513 /* write data using the CP */ 514 amdgpu_command_submission_gfx_cp_write_data(); 515 /* const fill using the CP */ 516 amdgpu_command_submission_gfx_cp_const_fill(); 517 /* copy data using the CP */ 518 amdgpu_command_submission_gfx_cp_copy_data(); 519 /* separate IB buffers for multi-IB submission */ 520 amdgpu_command_submission_gfx_separate_ibs(); 521 /* shared IB buffer for multi-IB submission */ 522 amdgpu_command_submission_gfx_shared_ib(); 523} 524 525static void amdgpu_semaphore_test(void) 526{ 527 amdgpu_context_handle context_handle[2]; 528 amdgpu_semaphore_handle sem; 529 amdgpu_bo_handle ib_result_handle[2]; 530 void *ib_result_cpu[2]; 531 uint64_t ib_result_mc_address[2]; 532 struct amdgpu_cs_request ibs_request[2] = {0}; 533 struct amdgpu_cs_ib_info ib_info[2] = {0}; 534 struct amdgpu_cs_fence fence_status = {0}; 535 uint32_t *ptr; 536 uint32_t expired; 537 uint32_t sdma_nop, gfx_nop; 538 amdgpu_bo_list_handle bo_list[2]; 539 amdgpu_va_handle va_handle[2]; 540 int r, i; 541 542 if (family_id == AMDGPU_FAMILY_SI) { 543 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 544 gfx_nop = GFX_COMPUTE_NOP_SI; 545 } else { 546 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 547 gfx_nop = GFX_COMPUTE_NOP; 548 } 549 550 r = amdgpu_cs_create_semaphore(&sem); 551 CU_ASSERT_EQUAL(r, 0); 552 for (i = 0; i < 2; i++) { 553 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 554 CU_ASSERT_EQUAL(r, 0); 555 556 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 557 AMDGPU_GEM_DOMAIN_GTT, 0, 558 &ib_result_handle[i], &ib_result_cpu[i], 559 &ib_result_mc_address[i], &va_handle[i]); 560 CU_ASSERT_EQUAL(r, 0); 561 562 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 563 NULL, &bo_list[i]); 564 CU_ASSERT_EQUAL(r, 0); 565 } 566 567 /* 1. same context different engine */ 568 ptr = ib_result_cpu[0]; 569 ptr[0] = sdma_nop; 570 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 571 ib_info[0].size = 1; 572 573 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 574 ibs_request[0].number_of_ibs = 1; 575 ibs_request[0].ibs = &ib_info[0]; 576 ibs_request[0].resources = bo_list[0]; 577 ibs_request[0].fence_info.handle = NULL; 578 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 579 CU_ASSERT_EQUAL(r, 0); 580 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 581 CU_ASSERT_EQUAL(r, 0); 582 583 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 584 CU_ASSERT_EQUAL(r, 0); 585 ptr = ib_result_cpu[1]; 586 ptr[0] = gfx_nop; 587 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 588 ib_info[1].size = 1; 589 590 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 591 ibs_request[1].number_of_ibs = 1; 592 ibs_request[1].ibs = &ib_info[1]; 593 ibs_request[1].resources = bo_list[1]; 594 ibs_request[1].fence_info.handle = NULL; 595 596 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 597 CU_ASSERT_EQUAL(r, 0); 598 599 fence_status.context = context_handle[0]; 600 fence_status.ip_type = AMDGPU_HW_IP_GFX; 601 fence_status.ip_instance = 0; 602 fence_status.fence = ibs_request[1].seq_no; 603 r = amdgpu_cs_query_fence_status(&fence_status, 604 500000000, 0, &expired); 605 CU_ASSERT_EQUAL(r, 0); 606 CU_ASSERT_EQUAL(expired, true); 607 608 /* 2. same engine different context */ 609 ptr = ib_result_cpu[0]; 610 ptr[0] = gfx_nop; 611 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 612 ib_info[0].size = 1; 613 614 ibs_request[0].ip_type = AMDGPU_HW_IP_GFX; 615 ibs_request[0].number_of_ibs = 1; 616 ibs_request[0].ibs = &ib_info[0]; 617 ibs_request[0].resources = bo_list[0]; 618 ibs_request[0].fence_info.handle = NULL; 619 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 620 CU_ASSERT_EQUAL(r, 0); 621 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem); 622 CU_ASSERT_EQUAL(r, 0); 623 624 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem); 625 CU_ASSERT_EQUAL(r, 0); 626 ptr = ib_result_cpu[1]; 627 ptr[0] = gfx_nop; 628 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 629 ib_info[1].size = 1; 630 631 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX; 632 ibs_request[1].number_of_ibs = 1; 633 ibs_request[1].ibs = &ib_info[1]; 634 ibs_request[1].resources = bo_list[1]; 635 ibs_request[1].fence_info.handle = NULL; 636 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 637 638 CU_ASSERT_EQUAL(r, 0); 639 640 fence_status.context = context_handle[1]; 641 fence_status.ip_type = AMDGPU_HW_IP_GFX; 642 fence_status.ip_instance = 0; 643 fence_status.fence = ibs_request[1].seq_no; 644 r = amdgpu_cs_query_fence_status(&fence_status, 645 500000000, 0, &expired); 646 CU_ASSERT_EQUAL(r, 0); 647 CU_ASSERT_EQUAL(expired, true); 648 649 for (i = 0; i < 2; i++) { 650 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 651 ib_result_mc_address[i], 4096); 652 CU_ASSERT_EQUAL(r, 0); 653 654 r = amdgpu_bo_list_destroy(bo_list[i]); 655 CU_ASSERT_EQUAL(r, 0); 656 657 r = amdgpu_cs_ctx_free(context_handle[i]); 658 CU_ASSERT_EQUAL(r, 0); 659 } 660 661 r = amdgpu_cs_destroy_semaphore(sem); 662 CU_ASSERT_EQUAL(r, 0); 663} 664 665static void amdgpu_command_submission_compute_nop(void) 666{ 667 amdgpu_context_handle context_handle; 668 amdgpu_bo_handle ib_result_handle; 669 void *ib_result_cpu; 670 uint64_t ib_result_mc_address; 671 struct amdgpu_cs_request ibs_request; 672 struct amdgpu_cs_ib_info ib_info; 673 struct amdgpu_cs_fence fence_status; 674 uint32_t *ptr; 675 uint32_t expired; 676 int i, r, instance; 677 amdgpu_bo_list_handle bo_list; 678 amdgpu_va_handle va_handle; 679 struct drm_amdgpu_info_hw_ip info; 680 681 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 682 CU_ASSERT_EQUAL(r, 0); 683 684 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 685 CU_ASSERT_EQUAL(r, 0); 686 687 for (instance = 0; (1 << instance) & info.available_rings; instance++) { 688 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 689 AMDGPU_GEM_DOMAIN_GTT, 0, 690 &ib_result_handle, &ib_result_cpu, 691 &ib_result_mc_address, &va_handle); 692 CU_ASSERT_EQUAL(r, 0); 693 694 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 695 &bo_list); 696 CU_ASSERT_EQUAL(r, 0); 697 698 ptr = ib_result_cpu; 699 memset(ptr, 0, 16); 700 ptr[0]=PACKET3(PACKET3_NOP, 14); 701 702 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 703 ib_info.ib_mc_address = ib_result_mc_address; 704 ib_info.size = 16; 705 706 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 707 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 708 ibs_request.ring = instance; 709 ibs_request.number_of_ibs = 1; 710 ibs_request.ibs = &ib_info; 711 ibs_request.resources = bo_list; 712 ibs_request.fence_info.handle = NULL; 713 714 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 715 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 716 CU_ASSERT_EQUAL(r, 0); 717 718 fence_status.context = context_handle; 719 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 720 fence_status.ip_instance = 0; 721 fence_status.ring = instance; 722 fence_status.fence = ibs_request.seq_no; 723 724 r = amdgpu_cs_query_fence_status(&fence_status, 725 AMDGPU_TIMEOUT_INFINITE, 726 0, &expired); 727 CU_ASSERT_EQUAL(r, 0); 728 729 r = amdgpu_bo_list_destroy(bo_list); 730 CU_ASSERT_EQUAL(r, 0); 731 732 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 733 ib_result_mc_address, 4096); 734 CU_ASSERT_EQUAL(r, 0); 735 } 736 737 r = amdgpu_cs_ctx_free(context_handle); 738 CU_ASSERT_EQUAL(r, 0); 739} 740 741static void amdgpu_command_submission_compute_cp_write_data(void) 742{ 743 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 744} 745 746static void amdgpu_command_submission_compute_cp_const_fill(void) 747{ 748 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 749} 750 751static void amdgpu_command_submission_compute_cp_copy_data(void) 752{ 753 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 754} 755 756static void amdgpu_command_submission_compute(void) 757{ 758 /* write data using the CP */ 759 amdgpu_command_submission_compute_cp_write_data(); 760 /* const fill using the CP */ 761 amdgpu_command_submission_compute_cp_const_fill(); 762 /* copy data using the CP */ 763 amdgpu_command_submission_compute_cp_copy_data(); 764 /* nop test */ 765 amdgpu_command_submission_compute_nop(); 766} 767 768/* 769 * caller need create/release: 770 * pm4_src, resources, ib_info, and ibs_request 771 * submit command stream described in ibs_request and wait for this IB accomplished 772 */ 773static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 774 unsigned ip_type, 775 int instance, int pm4_dw, uint32_t *pm4_src, 776 int res_cnt, amdgpu_bo_handle *resources, 777 struct amdgpu_cs_ib_info *ib_info, 778 struct amdgpu_cs_request *ibs_request) 779{ 780 int r; 781 uint32_t expired; 782 uint32_t *ring_ptr; 783 amdgpu_bo_handle ib_result_handle; 784 void *ib_result_cpu; 785 uint64_t ib_result_mc_address; 786 struct amdgpu_cs_fence fence_status = {0}; 787 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 788 amdgpu_va_handle va_handle; 789 790 /* prepare CS */ 791 CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 792 CU_ASSERT_NOT_EQUAL(resources, NULL); 793 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 794 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 795 CU_ASSERT_TRUE(pm4_dw <= 1024); 796 797 /* allocate IB */ 798 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 799 AMDGPU_GEM_DOMAIN_GTT, 0, 800 &ib_result_handle, &ib_result_cpu, 801 &ib_result_mc_address, &va_handle); 802 CU_ASSERT_EQUAL(r, 0); 803 804 /* copy PM4 packet to ring from caller */ 805 ring_ptr = ib_result_cpu; 806 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 807 808 ib_info->ib_mc_address = ib_result_mc_address; 809 ib_info->size = pm4_dw; 810 811 ibs_request->ip_type = ip_type; 812 ibs_request->ring = instance; 813 ibs_request->number_of_ibs = 1; 814 ibs_request->ibs = ib_info; 815 ibs_request->fence_info.handle = NULL; 816 817 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 818 all_res[res_cnt] = ib_result_handle; 819 820 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 821 NULL, &ibs_request->resources); 822 CU_ASSERT_EQUAL(r, 0); 823 824 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 825 826 /* submit CS */ 827 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 828 CU_ASSERT_EQUAL(r, 0); 829 830 r = amdgpu_bo_list_destroy(ibs_request->resources); 831 CU_ASSERT_EQUAL(r, 0); 832 833 fence_status.ip_type = ip_type; 834 fence_status.ip_instance = 0; 835 fence_status.ring = ibs_request->ring; 836 fence_status.context = context_handle; 837 fence_status.fence = ibs_request->seq_no; 838 839 /* wait for IB accomplished */ 840 r = amdgpu_cs_query_fence_status(&fence_status, 841 AMDGPU_TIMEOUT_INFINITE, 842 0, &expired); 843 CU_ASSERT_EQUAL(r, 0); 844 CU_ASSERT_EQUAL(expired, true); 845 846 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 847 ib_result_mc_address, 4096); 848 CU_ASSERT_EQUAL(r, 0); 849} 850 851static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 852{ 853 const int sdma_write_length = 128; 854 const int pm4_dw = 256; 855 amdgpu_context_handle context_handle; 856 amdgpu_bo_handle bo; 857 amdgpu_bo_handle *resources; 858 uint32_t *pm4; 859 struct amdgpu_cs_ib_info *ib_info; 860 struct amdgpu_cs_request *ibs_request; 861 uint64_t bo_mc; 862 volatile uint32_t *bo_cpu; 863 int i, j, r, loop; 864 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 865 amdgpu_va_handle va_handle; 866 867 pm4 = calloc(pm4_dw, sizeof(*pm4)); 868 CU_ASSERT_NOT_EQUAL(pm4, NULL); 869 870 ib_info = calloc(1, sizeof(*ib_info)); 871 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 872 873 ibs_request = calloc(1, sizeof(*ibs_request)); 874 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 875 876 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 877 CU_ASSERT_EQUAL(r, 0); 878 879 /* prepare resource */ 880 resources = calloc(1, sizeof(amdgpu_bo_handle)); 881 CU_ASSERT_NOT_EQUAL(resources, NULL); 882 883 loop = 0; 884 while(loop < 2) { 885 /* allocate UC bo for sDMA use */ 886 r = amdgpu_bo_alloc_and_map(device_handle, 887 sdma_write_length * sizeof(uint32_t), 888 4096, AMDGPU_GEM_DOMAIN_GTT, 889 gtt_flags[loop], &bo, (void**)&bo_cpu, 890 &bo_mc, &va_handle); 891 CU_ASSERT_EQUAL(r, 0); 892 893 /* clear bo */ 894 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 895 896 897 resources[0] = bo; 898 899 /* fulfill PM4: test DMA write-linear */ 900 i = j = 0; 901 if (ip_type == AMDGPU_HW_IP_DMA) { 902 if (family_id == AMDGPU_FAMILY_SI) 903 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 904 sdma_write_length); 905 else 906 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 907 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 908 pm4[i++] = 0xffffffff & bo_mc; 909 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 910 if (family_id >= AMDGPU_FAMILY_AI) 911 pm4[i++] = sdma_write_length - 1; 912 else if (family_id != AMDGPU_FAMILY_SI) 913 pm4[i++] = sdma_write_length; 914 while(j++ < sdma_write_length) 915 pm4[i++] = 0xdeadbeaf; 916 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 917 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 918 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 919 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 920 pm4[i++] = 0xfffffffc & bo_mc; 921 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 922 while(j++ < sdma_write_length) 923 pm4[i++] = 0xdeadbeaf; 924 } 925 926 amdgpu_test_exec_cs_helper(context_handle, 927 ip_type, 0, 928 i, pm4, 929 1, resources, 930 ib_info, ibs_request); 931 932 /* verify if SDMA test result meets with expected */ 933 i = 0; 934 while(i < sdma_write_length) { 935 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 936 } 937 938 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 939 sdma_write_length * sizeof(uint32_t)); 940 CU_ASSERT_EQUAL(r, 0); 941 loop++; 942 } 943 /* clean resources */ 944 free(resources); 945 free(ibs_request); 946 free(ib_info); 947 free(pm4); 948 949 /* end of test */ 950 r = amdgpu_cs_ctx_free(context_handle); 951 CU_ASSERT_EQUAL(r, 0); 952} 953 954static void amdgpu_command_submission_sdma_write_linear(void) 955{ 956 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 957} 958 959static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 960{ 961 const int sdma_write_length = 1024 * 1024; 962 const int pm4_dw = 256; 963 amdgpu_context_handle context_handle; 964 amdgpu_bo_handle bo; 965 amdgpu_bo_handle *resources; 966 uint32_t *pm4; 967 struct amdgpu_cs_ib_info *ib_info; 968 struct amdgpu_cs_request *ibs_request; 969 uint64_t bo_mc; 970 volatile uint32_t *bo_cpu; 971 int i, j, r, loop; 972 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 973 amdgpu_va_handle va_handle; 974 975 pm4 = calloc(pm4_dw, sizeof(*pm4)); 976 CU_ASSERT_NOT_EQUAL(pm4, NULL); 977 978 ib_info = calloc(1, sizeof(*ib_info)); 979 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 980 981 ibs_request = calloc(1, sizeof(*ibs_request)); 982 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 983 984 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 985 CU_ASSERT_EQUAL(r, 0); 986 987 /* prepare resource */ 988 resources = calloc(1, sizeof(amdgpu_bo_handle)); 989 CU_ASSERT_NOT_EQUAL(resources, NULL); 990 991 loop = 0; 992 while(loop < 2) { 993 /* allocate UC bo for sDMA use */ 994 r = amdgpu_bo_alloc_and_map(device_handle, 995 sdma_write_length, 4096, 996 AMDGPU_GEM_DOMAIN_GTT, 997 gtt_flags[loop], &bo, (void**)&bo_cpu, 998 &bo_mc, &va_handle); 999 CU_ASSERT_EQUAL(r, 0); 1000 1001 /* clear bo */ 1002 memset((void*)bo_cpu, 0, sdma_write_length); 1003 1004 resources[0] = bo; 1005 1006 /* fulfill PM4: test DMA const fill */ 1007 i = j = 0; 1008 if (ip_type == AMDGPU_HW_IP_DMA) { 1009 if (family_id == AMDGPU_FAMILY_SI) { 1010 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 0, 0, 0, 1011 sdma_write_length / 4); 1012 pm4[i++] = 0xfffffffc & bo_mc; 1013 pm4[i++] = 0xdeadbeaf; 1014 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 1015 } else { 1016 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 1017 SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 1018 pm4[i++] = 0xffffffff & bo_mc; 1019 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1020 pm4[i++] = 0xdeadbeaf; 1021 if (family_id >= AMDGPU_FAMILY_AI) 1022 pm4[i++] = sdma_write_length - 1; 1023 else 1024 pm4[i++] = sdma_write_length; 1025 } 1026 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1027 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1028 if (family_id == AMDGPU_FAMILY_SI) { 1029 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1030 pm4[i++] = 0xdeadbeaf; 1031 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1032 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1033 PACKET3_DMA_DATA_SI_SRC_SEL(2) | 1034 PACKET3_DMA_DATA_SI_CP_SYNC; 1035 pm4[i++] = 0xffffffff & bo_mc; 1036 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1037 pm4[i++] = sdma_write_length; 1038 } else { 1039 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1040 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1041 PACKET3_DMA_DATA_DST_SEL(0) | 1042 PACKET3_DMA_DATA_SRC_SEL(2) | 1043 PACKET3_DMA_DATA_CP_SYNC; 1044 pm4[i++] = 0xdeadbeaf; 1045 pm4[i++] = 0; 1046 pm4[i++] = 0xfffffffc & bo_mc; 1047 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1048 pm4[i++] = sdma_write_length; 1049 } 1050 } 1051 1052 amdgpu_test_exec_cs_helper(context_handle, 1053 ip_type, 0, 1054 i, pm4, 1055 1, resources, 1056 ib_info, ibs_request); 1057 1058 /* verify if SDMA test result meets with expected */ 1059 i = 0; 1060 while(i < (sdma_write_length / 4)) { 1061 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1062 } 1063 1064 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1065 sdma_write_length); 1066 CU_ASSERT_EQUAL(r, 0); 1067 loop++; 1068 } 1069 /* clean resources */ 1070 free(resources); 1071 free(ibs_request); 1072 free(ib_info); 1073 free(pm4); 1074 1075 /* end of test */ 1076 r = amdgpu_cs_ctx_free(context_handle); 1077 CU_ASSERT_EQUAL(r, 0); 1078} 1079 1080static void amdgpu_command_submission_sdma_const_fill(void) 1081{ 1082 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 1083} 1084 1085static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 1086{ 1087 const int sdma_write_length = 1024; 1088 const int pm4_dw = 256; 1089 amdgpu_context_handle context_handle; 1090 amdgpu_bo_handle bo1, bo2; 1091 amdgpu_bo_handle *resources; 1092 uint32_t *pm4; 1093 struct amdgpu_cs_ib_info *ib_info; 1094 struct amdgpu_cs_request *ibs_request; 1095 uint64_t bo1_mc, bo2_mc; 1096 volatile unsigned char *bo1_cpu, *bo2_cpu; 1097 int i, j, r, loop1, loop2; 1098 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1099 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 1100 1101 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1102 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1103 1104 ib_info = calloc(1, sizeof(*ib_info)); 1105 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1106 1107 ibs_request = calloc(1, sizeof(*ibs_request)); 1108 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1109 1110 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1111 CU_ASSERT_EQUAL(r, 0); 1112 1113 /* prepare resource */ 1114 resources = calloc(2, sizeof(amdgpu_bo_handle)); 1115 CU_ASSERT_NOT_EQUAL(resources, NULL); 1116 1117 loop1 = loop2 = 0; 1118 /* run 9 circle to test all mapping combination */ 1119 while(loop1 < 2) { 1120 while(loop2 < 2) { 1121 /* allocate UC bo1for sDMA use */ 1122 r = amdgpu_bo_alloc_and_map(device_handle, 1123 sdma_write_length, 4096, 1124 AMDGPU_GEM_DOMAIN_GTT, 1125 gtt_flags[loop1], &bo1, 1126 (void**)&bo1_cpu, &bo1_mc, 1127 &bo1_va_handle); 1128 CU_ASSERT_EQUAL(r, 0); 1129 1130 /* set bo1 */ 1131 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 1132 1133 /* allocate UC bo2 for sDMA use */ 1134 r = amdgpu_bo_alloc_and_map(device_handle, 1135 sdma_write_length, 4096, 1136 AMDGPU_GEM_DOMAIN_GTT, 1137 gtt_flags[loop2], &bo2, 1138 (void**)&bo2_cpu, &bo2_mc, 1139 &bo2_va_handle); 1140 CU_ASSERT_EQUAL(r, 0); 1141 1142 /* clear bo2 */ 1143 memset((void*)bo2_cpu, 0, sdma_write_length); 1144 1145 resources[0] = bo1; 1146 resources[1] = bo2; 1147 1148 /* fulfill PM4: test DMA copy linear */ 1149 i = j = 0; 1150 if (ip_type == AMDGPU_HW_IP_DMA) { 1151 if (family_id == AMDGPU_FAMILY_SI) { 1152 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 1153 sdma_write_length); 1154 pm4[i++] = 0xffffffff & bo2_mc; 1155 pm4[i++] = 0xffffffff & bo1_mc; 1156 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1157 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1158 } else { 1159 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 1160 if (family_id >= AMDGPU_FAMILY_AI) 1161 pm4[i++] = sdma_write_length - 1; 1162 else 1163 pm4[i++] = sdma_write_length; 1164 pm4[i++] = 0; 1165 pm4[i++] = 0xffffffff & bo1_mc; 1166 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1167 pm4[i++] = 0xffffffff & bo2_mc; 1168 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1169 } 1170 1171 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1172 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1173 if (family_id == AMDGPU_FAMILY_SI) { 1174 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1175 pm4[i++] = 0xfffffffc & bo1_mc; 1176 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1177 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1178 PACKET3_DMA_DATA_SI_SRC_SEL(0) | 1179 PACKET3_DMA_DATA_SI_CP_SYNC | 1180 (0xffff00000000 & bo1_mc) >> 32; 1181 pm4[i++] = 0xfffffffc & bo2_mc; 1182 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1183 pm4[i++] = sdma_write_length; 1184 } else { 1185 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1186 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1187 PACKET3_DMA_DATA_DST_SEL(0) | 1188 PACKET3_DMA_DATA_SRC_SEL(0) | 1189 PACKET3_DMA_DATA_CP_SYNC; 1190 pm4[i++] = 0xfffffffc & bo1_mc; 1191 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1192 pm4[i++] = 0xfffffffc & bo2_mc; 1193 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1194 pm4[i++] = sdma_write_length; 1195 } 1196 } 1197 1198 amdgpu_test_exec_cs_helper(context_handle, 1199 ip_type, 0, 1200 i, pm4, 1201 2, resources, 1202 ib_info, ibs_request); 1203 1204 /* verify if SDMA test result meets with expected */ 1205 i = 0; 1206 while(i < sdma_write_length) { 1207 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 1208 } 1209 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 1210 sdma_write_length); 1211 CU_ASSERT_EQUAL(r, 0); 1212 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 1213 sdma_write_length); 1214 CU_ASSERT_EQUAL(r, 0); 1215 loop2++; 1216 } 1217 loop1++; 1218 } 1219 /* clean resources */ 1220 free(resources); 1221 free(ibs_request); 1222 free(ib_info); 1223 free(pm4); 1224 1225 /* end of test */ 1226 r = amdgpu_cs_ctx_free(context_handle); 1227 CU_ASSERT_EQUAL(r, 0); 1228} 1229 1230static void amdgpu_command_submission_sdma_copy_linear(void) 1231{ 1232 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 1233} 1234 1235static void amdgpu_command_submission_sdma(void) 1236{ 1237 amdgpu_command_submission_sdma_write_linear(); 1238 amdgpu_command_submission_sdma_const_fill(); 1239 amdgpu_command_submission_sdma_copy_linear(); 1240} 1241 1242static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 1243{ 1244 amdgpu_context_handle context_handle; 1245 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 1246 void *ib_result_cpu, *ib_result_ce_cpu; 1247 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 1248 struct amdgpu_cs_request ibs_request[2] = {0}; 1249 struct amdgpu_cs_ib_info ib_info[2]; 1250 struct amdgpu_cs_fence fence_status[2] = {0}; 1251 uint32_t *ptr; 1252 uint32_t expired; 1253 amdgpu_bo_list_handle bo_list; 1254 amdgpu_va_handle va_handle, va_handle_ce; 1255 int r; 1256 int i = 0, ib_cs_num = 2; 1257 1258 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1259 CU_ASSERT_EQUAL(r, 0); 1260 1261 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1262 AMDGPU_GEM_DOMAIN_GTT, 0, 1263 &ib_result_handle, &ib_result_cpu, 1264 &ib_result_mc_address, &va_handle); 1265 CU_ASSERT_EQUAL(r, 0); 1266 1267 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1268 AMDGPU_GEM_DOMAIN_GTT, 0, 1269 &ib_result_ce_handle, &ib_result_ce_cpu, 1270 &ib_result_ce_mc_address, &va_handle_ce); 1271 CU_ASSERT_EQUAL(r, 0); 1272 1273 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 1274 ib_result_ce_handle, &bo_list); 1275 CU_ASSERT_EQUAL(r, 0); 1276 1277 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 1278 1279 /* IT_SET_CE_DE_COUNTERS */ 1280 ptr = ib_result_ce_cpu; 1281 if (family_id != AMDGPU_FAMILY_SI) { 1282 ptr[i++] = 0xc0008900; 1283 ptr[i++] = 0; 1284 } 1285 ptr[i++] = 0xc0008400; 1286 ptr[i++] = 1; 1287 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 1288 ib_info[0].size = i; 1289 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 1290 1291 /* IT_WAIT_ON_CE_COUNTER */ 1292 ptr = ib_result_cpu; 1293 ptr[0] = 0xc0008600; 1294 ptr[1] = 0x00000001; 1295 ib_info[1].ib_mc_address = ib_result_mc_address; 1296 ib_info[1].size = 2; 1297 1298 for (i = 0; i < ib_cs_num; i++) { 1299 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 1300 ibs_request[i].number_of_ibs = 2; 1301 ibs_request[i].ibs = ib_info; 1302 ibs_request[i].resources = bo_list; 1303 ibs_request[i].fence_info.handle = NULL; 1304 } 1305 1306 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 1307 1308 CU_ASSERT_EQUAL(r, 0); 1309 1310 for (i = 0; i < ib_cs_num; i++) { 1311 fence_status[i].context = context_handle; 1312 fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 1313 fence_status[i].fence = ibs_request[i].seq_no; 1314 } 1315 1316 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 1317 AMDGPU_TIMEOUT_INFINITE, 1318 &expired, NULL); 1319 CU_ASSERT_EQUAL(r, 0); 1320 1321 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1322 ib_result_mc_address, 4096); 1323 CU_ASSERT_EQUAL(r, 0); 1324 1325 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 1326 ib_result_ce_mc_address, 4096); 1327 CU_ASSERT_EQUAL(r, 0); 1328 1329 r = amdgpu_bo_list_destroy(bo_list); 1330 CU_ASSERT_EQUAL(r, 0); 1331 1332 r = amdgpu_cs_ctx_free(context_handle); 1333 CU_ASSERT_EQUAL(r, 0); 1334} 1335 1336static void amdgpu_command_submission_multi_fence(void) 1337{ 1338 amdgpu_command_submission_multi_fence_wait_all(true); 1339 amdgpu_command_submission_multi_fence_wait_all(false); 1340} 1341 1342static void amdgpu_userptr_test(void) 1343{ 1344 int i, r, j; 1345 uint32_t *pm4 = NULL; 1346 uint64_t bo_mc; 1347 void *ptr = NULL; 1348 int pm4_dw = 256; 1349 int sdma_write_length = 4; 1350 amdgpu_bo_handle handle; 1351 amdgpu_context_handle context_handle; 1352 struct amdgpu_cs_ib_info *ib_info; 1353 struct amdgpu_cs_request *ibs_request; 1354 amdgpu_bo_handle buf_handle; 1355 amdgpu_va_handle va_handle; 1356 1357 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1358 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1359 1360 ib_info = calloc(1, sizeof(*ib_info)); 1361 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1362 1363 ibs_request = calloc(1, sizeof(*ibs_request)); 1364 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1365 1366 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1367 CU_ASSERT_EQUAL(r, 0); 1368 1369 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 1370 CU_ASSERT_NOT_EQUAL(ptr, NULL); 1371 memset(ptr, 0, BUFFER_SIZE); 1372 1373 r = amdgpu_create_bo_from_user_mem(device_handle, 1374 ptr, BUFFER_SIZE, &buf_handle); 1375 CU_ASSERT_EQUAL(r, 0); 1376 1377 r = amdgpu_va_range_alloc(device_handle, 1378 amdgpu_gpu_va_range_general, 1379 BUFFER_SIZE, 1, 0, &bo_mc, 1380 &va_handle, 0); 1381 CU_ASSERT_EQUAL(r, 0); 1382 1383 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 1384 CU_ASSERT_EQUAL(r, 0); 1385 1386 handle = buf_handle; 1387 1388 j = i = 0; 1389 1390 if (family_id == AMDGPU_FAMILY_SI) 1391 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1392 sdma_write_length); 1393 else 1394 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1395 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 1396 pm4[i++] = 0xffffffff & bo_mc; 1397 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1398 if (family_id >= AMDGPU_FAMILY_AI) 1399 pm4[i++] = sdma_write_length - 1; 1400 else if (family_id != AMDGPU_FAMILY_SI) 1401 pm4[i++] = sdma_write_length; 1402 1403 while (j++ < sdma_write_length) 1404 pm4[i++] = 0xdeadbeaf; 1405 1406 amdgpu_test_exec_cs_helper(context_handle, 1407 AMDGPU_HW_IP_DMA, 0, 1408 i, pm4, 1409 1, &handle, 1410 ib_info, ibs_request); 1411 i = 0; 1412 while (i < sdma_write_length) { 1413 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 1414 } 1415 free(ibs_request); 1416 free(ib_info); 1417 free(pm4); 1418 1419 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 1420 CU_ASSERT_EQUAL(r, 0); 1421 r = amdgpu_va_range_free(va_handle); 1422 CU_ASSERT_EQUAL(r, 0); 1423 r = amdgpu_bo_free(buf_handle); 1424 CU_ASSERT_EQUAL(r, 0); 1425 free(ptr); 1426 1427 r = amdgpu_cs_ctx_free(context_handle); 1428 CU_ASSERT_EQUAL(r, 0); 1429} 1430