15324fb0dSmrg/*
25324fb0dSmrg * Copyright 2017 Advanced Micro Devices, Inc.
35324fb0dSmrg *
45324fb0dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
55324fb0dSmrg * copy of this software and associated documentation files (the "Software"),
65324fb0dSmrg * to deal in the Software without restriction, including without limitation
75324fb0dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
85324fb0dSmrg * and/or sell copies of the Software, and to permit persons to whom the
95324fb0dSmrg * Software is furnished to do so, subject to the following conditions:
105324fb0dSmrg *
115324fb0dSmrg * The above copyright notice and this permission notice shall be included in
125324fb0dSmrg * all copies or substantial portions of the Software.
135324fb0dSmrg *
145324fb0dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
155324fb0dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
165324fb0dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
175324fb0dSmrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
185324fb0dSmrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
195324fb0dSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
205324fb0dSmrg * OTHER DEALINGS IN THE SOFTWARE.
215324fb0dSmrg *
225324fb0dSmrg*/
235324fb0dSmrg
245324fb0dSmrg#include "CUnit/Basic.h"
255324fb0dSmrg#include "xf86drm.h"
265324fb0dSmrg
275324fb0dSmrg#include "amdgpu_test.h"
285324fb0dSmrg#include "amdgpu_drm.h"
295324fb0dSmrg#include "amdgpu_internal.h"
305324fb0dSmrg#include <pthread.h>
315324fb0dSmrg
325324fb0dSmrgstatic  amdgpu_device_handle device_handle;
335324fb0dSmrgstatic  uint32_t  major_version;
345324fb0dSmrgstatic  uint32_t  minor_version;
355324fb0dSmrg
364babd585Smrgstatic  uint32_t  family_id;
374babd585Smrgstatic  uint32_t  chip_id;
384babd585Smrgstatic  uint32_t  chip_rev;
394babd585Smrg
405324fb0dSmrgstatic void amdgpu_syncobj_timeline_test(void);
415324fb0dSmrg
425324fb0dSmrgCU_BOOL suite_syncobj_timeline_tests_enable(void)
435324fb0dSmrg{
445324fb0dSmrg	int r;
455324fb0dSmrg	uint64_t cap = 0;
465324fb0dSmrg
475324fb0dSmrg	r = drmGetCap(drm_amdgpu[0], DRM_CAP_SYNCOBJ_TIMELINE, &cap);
485324fb0dSmrg	if (r || cap == 0)
495324fb0dSmrg		return CU_FALSE;
505324fb0dSmrg
515324fb0dSmrg	return CU_TRUE;
525324fb0dSmrg}
535324fb0dSmrg
545324fb0dSmrgint suite_syncobj_timeline_tests_init(void)
555324fb0dSmrg{
565324fb0dSmrg	int r;
575324fb0dSmrg
585324fb0dSmrg	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
595324fb0dSmrg				   &minor_version, &device_handle);
605324fb0dSmrg
615324fb0dSmrg	if (r) {
625324fb0dSmrg		if ((r == -EACCES) && (errno == EACCES))
635324fb0dSmrg			printf("\n\nError:%s. "
645324fb0dSmrg				"Hint:Try to run this test program as root.",
655324fb0dSmrg				strerror(errno));
665324fb0dSmrg		return CUE_SINIT_FAILED;
675324fb0dSmrg	}
685324fb0dSmrg
695324fb0dSmrg	return CUE_SUCCESS;
705324fb0dSmrg}
715324fb0dSmrg
725324fb0dSmrgint suite_syncobj_timeline_tests_clean(void)
735324fb0dSmrg{
745324fb0dSmrg	int r = amdgpu_device_deinitialize(device_handle);
755324fb0dSmrg
765324fb0dSmrg	if (r == 0)
775324fb0dSmrg		return CUE_SUCCESS;
785324fb0dSmrg	else
795324fb0dSmrg		return CUE_SCLEAN_FAILED;
805324fb0dSmrg}
815324fb0dSmrg
825324fb0dSmrg
835324fb0dSmrgCU_TestInfo syncobj_timeline_tests[] = {
845324fb0dSmrg	{ "syncobj timeline test",  amdgpu_syncobj_timeline_test },
855324fb0dSmrg	CU_TEST_INFO_NULL,
865324fb0dSmrg};
875324fb0dSmrg
885324fb0dSmrg#define GFX_COMPUTE_NOP  0xffff1000
895324fb0dSmrg#define SDMA_NOP  0x0
905324fb0dSmrgstatic int syncobj_command_submission_helper(uint32_t syncobj_handle, bool
915324fb0dSmrg					     wait_or_signal, uint64_t point)
925324fb0dSmrg{
935324fb0dSmrg	amdgpu_context_handle context_handle;
945324fb0dSmrg	amdgpu_bo_handle ib_result_handle;
955324fb0dSmrg	void *ib_result_cpu;
965324fb0dSmrg	uint64_t ib_result_mc_address;
975324fb0dSmrg	struct drm_amdgpu_cs_chunk chunks[2];
985324fb0dSmrg	struct drm_amdgpu_cs_chunk_data chunk_data;
995324fb0dSmrg	struct drm_amdgpu_cs_chunk_syncobj syncobj_data;
1005324fb0dSmrg	struct amdgpu_cs_fence fence_status;
1015324fb0dSmrg	amdgpu_bo_list_handle bo_list;
1025324fb0dSmrg	amdgpu_va_handle va_handle;
1039bd392adSmrg	uint32_t expired;
1045324fb0dSmrg	int i, r;
1055324fb0dSmrg	uint64_t seq_no;
1064babd585Smrg	static uint32_t *ptr;
1074babd585Smrg	struct amdgpu_gpu_info gpu_info = {0};
1084babd585Smrg	unsigned gc_ip_type;
1094babd585Smrg
1104babd585Smrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
1114babd585Smrg	CU_ASSERT_EQUAL(r, 0);
1124babd585Smrg
1134babd585Smrg	family_id = device_handle->info.family_id;
1144babd585Smrg	chip_id = device_handle->info.chip_external_rev;
1154babd585Smrg	chip_rev = device_handle->info.chip_rev;
1164babd585Smrg
1174babd585Smrg	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
1184babd585Smrg			AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
1195324fb0dSmrg
1205324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1215324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
1225324fb0dSmrg
1235324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1245324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
1255324fb0dSmrg				    &ib_result_handle, &ib_result_cpu,
1265324fb0dSmrg				    &ib_result_mc_address, &va_handle);
1275324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
1285324fb0dSmrg
1295324fb0dSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1305324fb0dSmrg			       &bo_list);
1315324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
1325324fb0dSmrg
1335324fb0dSmrg	ptr = ib_result_cpu;
1345324fb0dSmrg
1355324fb0dSmrg	for (i = 0; i < 16; ++i)
1365324fb0dSmrg		ptr[i] = wait_or_signal ? GFX_COMPUTE_NOP: SDMA_NOP;
1375324fb0dSmrg
1385324fb0dSmrg	chunks[0].chunk_id = AMDGPU_CHUNK_ID_IB;
1395324fb0dSmrg	chunks[0].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
1405324fb0dSmrg	chunks[0].chunk_data = (uint64_t)(uintptr_t)&chunk_data;
1415324fb0dSmrg	chunk_data.ib_data._pad = 0;
1425324fb0dSmrg	chunk_data.ib_data.va_start = ib_result_mc_address;
1435324fb0dSmrg	chunk_data.ib_data.ib_bytes = 16 * 4;
1444babd585Smrg	chunk_data.ib_data.ip_type = wait_or_signal ? gc_ip_type :
1455324fb0dSmrg		AMDGPU_HW_IP_DMA;
1465324fb0dSmrg	chunk_data.ib_data.ip_instance = 0;
1475324fb0dSmrg	chunk_data.ib_data.ring = 0;
1484babd585Smrg	chunk_data.ib_data.flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC;
1495324fb0dSmrg
1505324fb0dSmrg	chunks[1].chunk_id = wait_or_signal ?
1515324fb0dSmrg		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT :
1525324fb0dSmrg		AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL;
1535324fb0dSmrg	chunks[1].length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4;
1545324fb0dSmrg	chunks[1].chunk_data = (uint64_t)(uintptr_t)&syncobj_data;
1555324fb0dSmrg	syncobj_data.handle = syncobj_handle;
1565324fb0dSmrg	syncobj_data.point = point;
1575324fb0dSmrg	syncobj_data.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
1585324fb0dSmrg
1595324fb0dSmrg	r = amdgpu_cs_submit_raw(device_handle,
1605324fb0dSmrg				 context_handle,
1615324fb0dSmrg				 bo_list,
1625324fb0dSmrg				 2,
1635324fb0dSmrg				 chunks,
1645324fb0dSmrg				 &seq_no);
1655324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
1665324fb0dSmrg
1675324fb0dSmrg
1685324fb0dSmrg	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1695324fb0dSmrg	fence_status.context = context_handle;
1704babd585Smrg	fence_status.ip_type = wait_or_signal ? gc_ip_type :
1715324fb0dSmrg		AMDGPU_HW_IP_DMA;
1725324fb0dSmrg	fence_status.ip_instance = 0;
1735324fb0dSmrg	fence_status.ring = 0;
1745324fb0dSmrg	fence_status.fence = seq_no;
1755324fb0dSmrg
1765324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
1775324fb0dSmrg			AMDGPU_TIMEOUT_INFINITE,0, &expired);
1785324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
1795324fb0dSmrg
1805324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
1815324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
1825324fb0dSmrg
1835324fb0dSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1845324fb0dSmrg				     ib_result_mc_address, 4096);
1855324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
1865324fb0dSmrg
1875324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
1885324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
1895324fb0dSmrg
1905324fb0dSmrg	return r;
1915324fb0dSmrg}
1925324fb0dSmrg
1935324fb0dSmrgstruct syncobj_point {
1945324fb0dSmrg	uint32_t syncobj_handle;
1955324fb0dSmrg	uint64_t point;
1965324fb0dSmrg};
1975324fb0dSmrg
1985324fb0dSmrgstatic void *syncobj_wait(void *data)
1995324fb0dSmrg{
2005324fb0dSmrg	struct syncobj_point *sp = (struct syncobj_point *)data;
2015324fb0dSmrg	int r;
2025324fb0dSmrg
2035324fb0dSmrg	r = syncobj_command_submission_helper(sp->syncobj_handle, true,
2045324fb0dSmrg					      sp->point);
2055324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2065324fb0dSmrg
2075324fb0dSmrg	return (void *)(long)r;
2085324fb0dSmrg}
2095324fb0dSmrg
2105324fb0dSmrgstatic void *syncobj_signal(void *data)
2115324fb0dSmrg{
2125324fb0dSmrg	struct syncobj_point *sp = (struct syncobj_point *)data;
2135324fb0dSmrg	int r;
2145324fb0dSmrg
2155324fb0dSmrg	r = syncobj_command_submission_helper(sp->syncobj_handle, false,
2165324fb0dSmrg					      sp->point);
2175324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2185324fb0dSmrg
2195324fb0dSmrg	return (void *)(long)r;
2205324fb0dSmrg}
2215324fb0dSmrg
2225324fb0dSmrgstatic void amdgpu_syncobj_timeline_test(void)
2235324fb0dSmrg{
2245324fb0dSmrg	static pthread_t wait_thread;
2255324fb0dSmrg	static pthread_t signal_thread;
2265324fb0dSmrg	static pthread_t c_thread;
2275324fb0dSmrg	struct syncobj_point sp1, sp2, sp3;
2285324fb0dSmrg	uint32_t syncobj_handle;
2295324fb0dSmrg	uint64_t payload;
2305324fb0dSmrg	uint64_t wait_point, signal_point;
2315324fb0dSmrg	uint64_t timeout;
2325324fb0dSmrg	struct timespec tp;
2335324fb0dSmrg	int r, sync_fd;
2345324fb0dSmrg	void *tmp;
2355324fb0dSmrg
2365324fb0dSmrg	r =  amdgpu_cs_create_syncobj2(device_handle, 0, &syncobj_handle);
2375324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2385324fb0dSmrg
2395324fb0dSmrg	// wait on point 5
2405324fb0dSmrg	sp1.syncobj_handle = syncobj_handle;
2415324fb0dSmrg	sp1.point = 5;
2425324fb0dSmrg	r = pthread_create(&wait_thread, NULL, syncobj_wait, &sp1);
2435324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2445324fb0dSmrg
2455324fb0dSmrg	// signal on point 10
2465324fb0dSmrg	sp2.syncobj_handle = syncobj_handle;
2475324fb0dSmrg	sp2.point = 10;
2485324fb0dSmrg	r = pthread_create(&signal_thread, NULL, syncobj_signal, &sp2);
2495324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2505324fb0dSmrg
2515324fb0dSmrg	r = pthread_join(wait_thread, &tmp);
2525324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2535324fb0dSmrg	CU_ASSERT_EQUAL(tmp, 0);
2545324fb0dSmrg
2555324fb0dSmrg	r = pthread_join(signal_thread, &tmp);
2565324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2575324fb0dSmrg	CU_ASSERT_EQUAL(tmp, 0);
2585324fb0dSmrg
2595324fb0dSmrg	//query timeline payload
2605324fb0dSmrg	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
2615324fb0dSmrg				    &payload, 1);
2625324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2635324fb0dSmrg	CU_ASSERT_EQUAL(payload, 10);
2645324fb0dSmrg
2655324fb0dSmrg	//signal on point 16
2665324fb0dSmrg	sp3.syncobj_handle = syncobj_handle;
2675324fb0dSmrg	sp3.point = 16;
2685324fb0dSmrg	r = pthread_create(&c_thread, NULL, syncobj_signal, &sp3);
2695324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2705324fb0dSmrg	//CPU wait on point 16
2715324fb0dSmrg	wait_point = 16;
2725324fb0dSmrg	timeout = 0;
2735324fb0dSmrg	clock_gettime(CLOCK_MONOTONIC, &tp);
2745324fb0dSmrg	timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
2755324fb0dSmrg	timeout += 0x10000000000; //10s
2765324fb0dSmrg	r = amdgpu_cs_syncobj_timeline_wait(device_handle, &syncobj_handle,
2775324fb0dSmrg					    &wait_point, 1, timeout,
2785324fb0dSmrg					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
2795324fb0dSmrg					    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
2805324fb0dSmrg					    NULL);
2815324fb0dSmrg
2825324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2835324fb0dSmrg	r = pthread_join(c_thread, &tmp);
2845324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2855324fb0dSmrg	CU_ASSERT_EQUAL(tmp, 0);
2865324fb0dSmrg
2875324fb0dSmrg	// export point 16 and import to point 18
2885324fb0dSmrg	r = amdgpu_cs_syncobj_export_sync_file2(device_handle, syncobj_handle,
2895324fb0dSmrg						16,
2905324fb0dSmrg						DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
2915324fb0dSmrg						&sync_fd);
2925324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2935324fb0dSmrg	r = amdgpu_cs_syncobj_import_sync_file2(device_handle, syncobj_handle,
2945324fb0dSmrg						18, sync_fd);
2955324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2965324fb0dSmrg	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
2975324fb0dSmrg				    &payload, 1);
2985324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
2995324fb0dSmrg	CU_ASSERT_EQUAL(payload, 18);
3005324fb0dSmrg
3015324fb0dSmrg	// CPU signal on point 20
3025324fb0dSmrg	signal_point = 20;
3035324fb0dSmrg	r = amdgpu_cs_syncobj_timeline_signal(device_handle, &syncobj_handle,
3045324fb0dSmrg					      &signal_point, 1);
3055324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
3065324fb0dSmrg	r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
3075324fb0dSmrg				    &payload, 1);
3085324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
3095324fb0dSmrg	CU_ASSERT_EQUAL(payload, 20);
3105324fb0dSmrg
3115324fb0dSmrg	r = amdgpu_cs_destroy_syncobj(device_handle, syncobj_handle);
3125324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
3135324fb0dSmrg
3145324fb0dSmrg}
315