15324fb0dSmrg/* 25324fb0dSmrg * Copyright 2017 Advanced Micro Devices, Inc. 35324fb0dSmrg * 45324fb0dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 55324fb0dSmrg * copy of this software and associated documentation files (the "Software"), 65324fb0dSmrg * to deal in the Software without restriction, including without limitation 75324fb0dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 85324fb0dSmrg * and/or sell copies of the Software, and to permit persons to whom the 95324fb0dSmrg * Software is furnished to do so, subject to the following conditions: 105324fb0dSmrg * 115324fb0dSmrg * The above copyright notice and this permission notice shall be included in 125324fb0dSmrg * all copies or substantial portions of the Software. 135324fb0dSmrg * 145324fb0dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 155324fb0dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 165324fb0dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 175324fb0dSmrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 185324fb0dSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 195324fb0dSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 205324fb0dSmrg * OTHER DEALINGS IN THE SOFTWARE. 215324fb0dSmrg * 225324fb0dSmrg*/ 235324fb0dSmrg 245324fb0dSmrg#include "CUnit/Basic.h" 255324fb0dSmrg#include "xf86drm.h" 265324fb0dSmrg 275324fb0dSmrg#include "amdgpu_test.h" 285324fb0dSmrg#include "amdgpu_drm.h" 295324fb0dSmrg#include "amdgpu_internal.h" 305324fb0dSmrg#include <pthread.h> 315324fb0dSmrg 325324fb0dSmrgstatic amdgpu_device_handle device_handle; 335324fb0dSmrgstatic uint32_t major_version; 345324fb0dSmrgstatic uint32_t minor_version; 355324fb0dSmrg 364babd585Smrgstatic uint32_t family_id; 374babd585Smrgstatic uint32_t chip_id; 384babd585Smrgstatic uint32_t chip_rev; 394babd585Smrg 405324fb0dSmrgstatic void amdgpu_syncobj_timeline_test(void); 415324fb0dSmrg 425324fb0dSmrgCU_BOOL suite_syncobj_timeline_tests_enable(void) 435324fb0dSmrg{ 445324fb0dSmrg int r; 455324fb0dSmrg uint64_t cap = 0; 465324fb0dSmrg 475324fb0dSmrg r = drmGetCap(drm_amdgpu[0], DRM_CAP_SYNCOBJ_TIMELINE, &cap); 485324fb0dSmrg if (r || cap == 0) 495324fb0dSmrg return CU_FALSE; 505324fb0dSmrg 515324fb0dSmrg return CU_TRUE; 525324fb0dSmrg} 535324fb0dSmrg 545324fb0dSmrgint suite_syncobj_timeline_tests_init(void) 555324fb0dSmrg{ 565324fb0dSmrg int r; 575324fb0dSmrg 585324fb0dSmrg r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 595324fb0dSmrg &minor_version, &device_handle); 605324fb0dSmrg 615324fb0dSmrg if (r) { 625324fb0dSmrg if ((r == -EACCES) && (errno == EACCES)) 635324fb0dSmrg printf("\n\nError:%s. " 645324fb0dSmrg "Hint:Try to run this test program as root.", 655324fb0dSmrg strerror(errno)); 665324fb0dSmrg return CUE_SINIT_FAILED; 675324fb0dSmrg } 685324fb0dSmrg 695324fb0dSmrg return CUE_SUCCESS; 705324fb0dSmrg} 715324fb0dSmrg 725324fb0dSmrgint suite_syncobj_timeline_tests_clean(void) 735324fb0dSmrg{ 745324fb0dSmrg int r = amdgpu_device_deinitialize(device_handle); 755324fb0dSmrg 765324fb0dSmrg if (r == 0) 775324fb0dSmrg return CUE_SUCCESS; 785324fb0dSmrg else 795324fb0dSmrg return CUE_SCLEAN_FAILED; 805324fb0dSmrg} 815324fb0dSmrg 825324fb0dSmrg 835324fb0dSmrgCU_TestInfo syncobj_timeline_tests[] = { 845324fb0dSmrg { "syncobj timeline test", amdgpu_syncobj_timeline_test }, 855324fb0dSmrg CU_TEST_INFO_NULL, 865324fb0dSmrg}; 875324fb0dSmrg 885324fb0dSmrg#define GFX_COMPUTE_NOP 0xffff1000 895324fb0dSmrg#define SDMA_NOP 0x0 905324fb0dSmrgstatic int syncobj_command_submission_helper(uint32_t syncobj_handle, bool 915324fb0dSmrg wait_or_signal, uint64_t point) 925324fb0dSmrg{ 935324fb0dSmrg amdgpu_context_handle context_handle; 945324fb0dSmrg amdgpu_bo_handle ib_result_handle; 955324fb0dSmrg void *ib_result_cpu; 965324fb0dSmrg uint64_t ib_result_mc_address; 975324fb0dSmrg struct drm_amdgpu_cs_chunk chunks[2]; 985324fb0dSmrg struct drm_amdgpu_cs_chunk_data chunk_data; 995324fb0dSmrg struct drm_amdgpu_cs_chunk_syncobj syncobj_data; 1005324fb0dSmrg struct amdgpu_cs_fence fence_status; 1015324fb0dSmrg amdgpu_bo_list_handle bo_list; 1025324fb0dSmrg amdgpu_va_handle va_handle; 1039bd392adSmrg uint32_t expired; 1045324fb0dSmrg int i, r; 1055324fb0dSmrg uint64_t seq_no; 1064babd585Smrg static uint32_t *ptr; 1074babd585Smrg struct amdgpu_gpu_info gpu_info = {0}; 1084babd585Smrg unsigned gc_ip_type; 1094babd585Smrg 1104babd585Smrg r = amdgpu_query_gpu_info(device_handle, &gpu_info); 1114babd585Smrg CU_ASSERT_EQUAL(r, 0); 1124babd585Smrg 1134babd585Smrg family_id = device_handle->info.family_id; 1144babd585Smrg chip_id = device_handle->info.chip_external_rev; 1154babd585Smrg chip_rev = device_handle->info.chip_rev; 1164babd585Smrg 1174babd585Smrg gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ? 1184babd585Smrg AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX; 1195324fb0dSmrg 1205324fb0dSmrg r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1215324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 1225324fb0dSmrg 1235324fb0dSmrg r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1245324fb0dSmrg AMDGPU_GEM_DOMAIN_GTT, 0, 1255324fb0dSmrg &ib_result_handle, &ib_result_cpu, 1265324fb0dSmrg &ib_result_mc_address, &va_handle); 1275324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 1285324fb0dSmrg 1295324fb0dSmrg r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 1305324fb0dSmrg &bo_list); 1315324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 1325324fb0dSmrg 1335324fb0dSmrg ptr = ib_result_cpu; 1345324fb0dSmrg 1355324fb0dSmrg for (i = 0; i < 16; ++i) 1365324fb0dSmrg ptr[i] = wait_or_signal ? GFX_COMPUTE_NOP: SDMA_NOP; 1375324fb0dSmrg 1385324fb0dSmrg chunks[0].chunk_id = AMDGPU_CHUNK_ID_IB; 1395324fb0dSmrg chunks[0].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; 1405324fb0dSmrg chunks[0].chunk_data = (uint64_t)(uintptr_t)&chunk_data; 1415324fb0dSmrg chunk_data.ib_data._pad = 0; 1425324fb0dSmrg chunk_data.ib_data.va_start = ib_result_mc_address; 1435324fb0dSmrg chunk_data.ib_data.ib_bytes = 16 * 4; 1444babd585Smrg chunk_data.ib_data.ip_type = wait_or_signal ? gc_ip_type : 1455324fb0dSmrg AMDGPU_HW_IP_DMA; 1465324fb0dSmrg chunk_data.ib_data.ip_instance = 0; 1475324fb0dSmrg chunk_data.ib_data.ring = 0; 1484babd585Smrg chunk_data.ib_data.flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC; 1495324fb0dSmrg 1505324fb0dSmrg chunks[1].chunk_id = wait_or_signal ? 1515324fb0dSmrg AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT : 1525324fb0dSmrg AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL; 1535324fb0dSmrg chunks[1].length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4; 1545324fb0dSmrg chunks[1].chunk_data = (uint64_t)(uintptr_t)&syncobj_data; 1555324fb0dSmrg syncobj_data.handle = syncobj_handle; 1565324fb0dSmrg syncobj_data.point = point; 1575324fb0dSmrg syncobj_data.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT; 1585324fb0dSmrg 1595324fb0dSmrg r = amdgpu_cs_submit_raw(device_handle, 1605324fb0dSmrg context_handle, 1615324fb0dSmrg bo_list, 1625324fb0dSmrg 2, 1635324fb0dSmrg chunks, 1645324fb0dSmrg &seq_no); 1655324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 1665324fb0dSmrg 1675324fb0dSmrg 1685324fb0dSmrg memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 1695324fb0dSmrg fence_status.context = context_handle; 1704babd585Smrg fence_status.ip_type = wait_or_signal ? gc_ip_type : 1715324fb0dSmrg AMDGPU_HW_IP_DMA; 1725324fb0dSmrg fence_status.ip_instance = 0; 1735324fb0dSmrg fence_status.ring = 0; 1745324fb0dSmrg fence_status.fence = seq_no; 1755324fb0dSmrg 1765324fb0dSmrg r = amdgpu_cs_query_fence_status(&fence_status, 1775324fb0dSmrg AMDGPU_TIMEOUT_INFINITE,0, &expired); 1785324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 1795324fb0dSmrg 1805324fb0dSmrg r = amdgpu_bo_list_destroy(bo_list); 1815324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 1825324fb0dSmrg 1835324fb0dSmrg r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1845324fb0dSmrg ib_result_mc_address, 4096); 1855324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 1865324fb0dSmrg 1875324fb0dSmrg r = amdgpu_cs_ctx_free(context_handle); 1885324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 1895324fb0dSmrg 1905324fb0dSmrg return r; 1915324fb0dSmrg} 1925324fb0dSmrg 1935324fb0dSmrgstruct syncobj_point { 1945324fb0dSmrg uint32_t syncobj_handle; 1955324fb0dSmrg uint64_t point; 1965324fb0dSmrg}; 1975324fb0dSmrg 1985324fb0dSmrgstatic void *syncobj_wait(void *data) 1995324fb0dSmrg{ 2005324fb0dSmrg struct syncobj_point *sp = (struct syncobj_point *)data; 2015324fb0dSmrg int r; 2025324fb0dSmrg 2035324fb0dSmrg r = syncobj_command_submission_helper(sp->syncobj_handle, true, 2045324fb0dSmrg sp->point); 2055324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2065324fb0dSmrg 2075324fb0dSmrg return (void *)(long)r; 2085324fb0dSmrg} 2095324fb0dSmrg 2105324fb0dSmrgstatic void *syncobj_signal(void *data) 2115324fb0dSmrg{ 2125324fb0dSmrg struct syncobj_point *sp = (struct syncobj_point *)data; 2135324fb0dSmrg int r; 2145324fb0dSmrg 2155324fb0dSmrg r = syncobj_command_submission_helper(sp->syncobj_handle, false, 2165324fb0dSmrg sp->point); 2175324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2185324fb0dSmrg 2195324fb0dSmrg return (void *)(long)r; 2205324fb0dSmrg} 2215324fb0dSmrg 2225324fb0dSmrgstatic void amdgpu_syncobj_timeline_test(void) 2235324fb0dSmrg{ 2245324fb0dSmrg static pthread_t wait_thread; 2255324fb0dSmrg static pthread_t signal_thread; 2265324fb0dSmrg static pthread_t c_thread; 2275324fb0dSmrg struct syncobj_point sp1, sp2, sp3; 2285324fb0dSmrg uint32_t syncobj_handle; 2295324fb0dSmrg uint64_t payload; 2305324fb0dSmrg uint64_t wait_point, signal_point; 2315324fb0dSmrg uint64_t timeout; 2325324fb0dSmrg struct timespec tp; 2335324fb0dSmrg int r, sync_fd; 2345324fb0dSmrg void *tmp; 2355324fb0dSmrg 2365324fb0dSmrg r = amdgpu_cs_create_syncobj2(device_handle, 0, &syncobj_handle); 2375324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2385324fb0dSmrg 2395324fb0dSmrg // wait on point 5 2405324fb0dSmrg sp1.syncobj_handle = syncobj_handle; 2415324fb0dSmrg sp1.point = 5; 2425324fb0dSmrg r = pthread_create(&wait_thread, NULL, syncobj_wait, &sp1); 2435324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2445324fb0dSmrg 2455324fb0dSmrg // signal on point 10 2465324fb0dSmrg sp2.syncobj_handle = syncobj_handle; 2475324fb0dSmrg sp2.point = 10; 2485324fb0dSmrg r = pthread_create(&signal_thread, NULL, syncobj_signal, &sp2); 2495324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2505324fb0dSmrg 2515324fb0dSmrg r = pthread_join(wait_thread, &tmp); 2525324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2535324fb0dSmrg CU_ASSERT_EQUAL(tmp, 0); 2545324fb0dSmrg 2555324fb0dSmrg r = pthread_join(signal_thread, &tmp); 2565324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2575324fb0dSmrg CU_ASSERT_EQUAL(tmp, 0); 2585324fb0dSmrg 2595324fb0dSmrg //query timeline payload 2605324fb0dSmrg r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle, 2615324fb0dSmrg &payload, 1); 2625324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2635324fb0dSmrg CU_ASSERT_EQUAL(payload, 10); 2645324fb0dSmrg 2655324fb0dSmrg //signal on point 16 2665324fb0dSmrg sp3.syncobj_handle = syncobj_handle; 2675324fb0dSmrg sp3.point = 16; 2685324fb0dSmrg r = pthread_create(&c_thread, NULL, syncobj_signal, &sp3); 2695324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2705324fb0dSmrg //CPU wait on point 16 2715324fb0dSmrg wait_point = 16; 2725324fb0dSmrg timeout = 0; 2735324fb0dSmrg clock_gettime(CLOCK_MONOTONIC, &tp); 2745324fb0dSmrg timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec; 2755324fb0dSmrg timeout += 0x10000000000; //10s 2765324fb0dSmrg r = amdgpu_cs_syncobj_timeline_wait(device_handle, &syncobj_handle, 2775324fb0dSmrg &wait_point, 1, timeout, 2785324fb0dSmrg DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | 2795324fb0dSmrg DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 2805324fb0dSmrg NULL); 2815324fb0dSmrg 2825324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2835324fb0dSmrg r = pthread_join(c_thread, &tmp); 2845324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2855324fb0dSmrg CU_ASSERT_EQUAL(tmp, 0); 2865324fb0dSmrg 2875324fb0dSmrg // export point 16 and import to point 18 2885324fb0dSmrg r = amdgpu_cs_syncobj_export_sync_file2(device_handle, syncobj_handle, 2895324fb0dSmrg 16, 2905324fb0dSmrg DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, 2915324fb0dSmrg &sync_fd); 2925324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2935324fb0dSmrg r = amdgpu_cs_syncobj_import_sync_file2(device_handle, syncobj_handle, 2945324fb0dSmrg 18, sync_fd); 2955324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2965324fb0dSmrg r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle, 2975324fb0dSmrg &payload, 1); 2985324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 2995324fb0dSmrg CU_ASSERT_EQUAL(payload, 18); 3005324fb0dSmrg 3015324fb0dSmrg // CPU signal on point 20 3025324fb0dSmrg signal_point = 20; 3035324fb0dSmrg r = amdgpu_cs_syncobj_timeline_signal(device_handle, &syncobj_handle, 3045324fb0dSmrg &signal_point, 1); 3055324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 3065324fb0dSmrg r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle, 3075324fb0dSmrg &payload, 1); 3085324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 3095324fb0dSmrg CU_ASSERT_EQUAL(payload, 20); 3105324fb0dSmrg 3115324fb0dSmrg r = amdgpu_cs_destroy_syncobj(device_handle, syncobj_handle); 3125324fb0dSmrg CU_ASSERT_EQUAL(r, 0); 3135324fb0dSmrg 3145324fb0dSmrg} 315