basic_tests.c revision d8807b2f
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <unistd.h>
31#ifdef HAVE_ALLOCA_H
32# include <alloca.h>
33#endif
34
35#include "CUnit/Basic.h"
36
37#include "amdgpu_test.h"
38#include "amdgpu_drm.h"
39
40static  amdgpu_device_handle device_handle;
41static  uint32_t  major_version;
42static  uint32_t  minor_version;
43static  uint32_t  family_id;
44
45static void amdgpu_query_info_test(void);
46static void amdgpu_memory_alloc(void);
47static void amdgpu_command_submission_gfx(void);
48static void amdgpu_command_submission_compute(void);
49static void amdgpu_command_submission_multi_fence(void);
50static void amdgpu_command_submission_sdma(void);
51static void amdgpu_userptr_test(void);
52static void amdgpu_semaphore_test(void);
53
54static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
55static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
56static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
57
58CU_TestInfo basic_tests[] = {
59	{ "Query Info Test",  amdgpu_query_info_test },
60	{ "Memory alloc Test",  amdgpu_memory_alloc },
61	{ "Userptr Test",  amdgpu_userptr_test },
62	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
63	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
64	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
65	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
66	{ "SW semaphore Test",  amdgpu_semaphore_test },
67	CU_TEST_INFO_NULL,
68};
69#define BUFFER_SIZE (8 * 1024)
70#define SDMA_PKT_HEADER_op_offset 0
71#define SDMA_PKT_HEADER_op_mask   0x000000FF
72#define SDMA_PKT_HEADER_op_shift  0
73#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
74#define SDMA_OPCODE_CONSTANT_FILL  11
75#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
76	/* 0 = byte fill
77	 * 2 = DW fill
78	 */
79#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
80					(((sub_op) & 0xFF) << 8) |	\
81					(((op) & 0xFF) << 0))
82#define	SDMA_OPCODE_WRITE				  2
83#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
84#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
85
86#define	SDMA_OPCODE_COPY				  1
87#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
88
89#define GFX_COMPUTE_NOP  0xffff1000
90#define SDMA_NOP  0x0
91
92/* PM4 */
93#define	PACKET_TYPE0	0
94#define	PACKET_TYPE1	1
95#define	PACKET_TYPE2	2
96#define	PACKET_TYPE3	3
97
98#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
99#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
100#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
101#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
102#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
103			 ((reg) & 0xFFFF) |			\
104			 ((n) & 0x3FFF) << 16)
105#define CP_PACKET2			0x80000000
106#define		PACKET2_PAD_SHIFT		0
107#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
108
109#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
110
111#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
112			 (((op) & 0xFF) << 8) |				\
113			 ((n) & 0x3FFF) << 16)
114
115/* Packet 3 types */
116#define	PACKET3_NOP					0x10
117
118#define	PACKET3_WRITE_DATA				0x37
119#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
120		/* 0 - register
121		 * 1 - memory (sync - via GRBM)
122		 * 2 - gl2
123		 * 3 - gds
124		 * 4 - reserved
125		 * 5 - memory (async - direct)
126		 */
127#define		WR_ONE_ADDR                             (1 << 16)
128#define		WR_CONFIRM                              (1 << 20)
129#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
130		/* 0 - LRU
131		 * 1 - Stream
132		 */
133#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
134		/* 0 - me
135		 * 1 - pfp
136		 * 2 - ce
137		 */
138
139#define	PACKET3_DMA_DATA				0x50
140/* 1. header
141 * 2. CONTROL
142 * 3. SRC_ADDR_LO or DATA [31:0]
143 * 4. SRC_ADDR_HI [31:0]
144 * 5. DST_ADDR_LO [31:0]
145 * 6. DST_ADDR_HI [7:0]
146 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
147 */
148/* CONTROL */
149#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
150		/* 0 - ME
151		 * 1 - PFP
152		 */
153#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
154		/* 0 - LRU
155		 * 1 - Stream
156		 * 2 - Bypass
157		 */
158#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
159#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
160		/* 0 - DST_ADDR using DAS
161		 * 1 - GDS
162		 * 3 - DST_ADDR using L2
163		 */
164#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
165		/* 0 - LRU
166		 * 1 - Stream
167		 * 2 - Bypass
168		 */
169#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
170#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
171		/* 0 - SRC_ADDR using SAS
172		 * 1 - GDS
173		 * 2 - DATA
174		 * 3 - SRC_ADDR using L2
175		 */
176#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
177/* COMMAND */
178#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
179#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
180		/* 0 - none
181		 * 1 - 8 in 16
182		 * 2 - 8 in 32
183		 * 3 - 8 in 64
184		 */
185#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
186		/* 0 - none
187		 * 1 - 8 in 16
188		 * 2 - 8 in 32
189		 * 3 - 8 in 64
190		 */
191#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
192		/* 0 - memory
193		 * 1 - register
194		 */
195#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
196		/* 0 - memory
197		 * 1 - register
198		 */
199#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
200#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
201#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
202
203#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
204						(((b) & 0x1) << 26) |		\
205						(((t) & 0x1) << 23) |		\
206						(((s) & 0x1) << 22) |		\
207						(((cnt) & 0xFFFFF) << 0))
208#define	SDMA_OPCODE_COPY_SI	3
209#define SDMA_OPCODE_CONSTANT_FILL_SI	13
210#define SDMA_NOP_SI  0xf
211#define GFX_COMPUTE_NOP_SI 0x80000000
212#define	PACKET3_DMA_DATA_SI	0x41
213#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
214		/* 0 - ME
215		 * 1 - PFP
216		 */
217#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
218		/* 0 - DST_ADDR using DAS
219		 * 1 - GDS
220		 * 3 - DST_ADDR using L2
221		 */
222#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
223		/* 0 - SRC_ADDR using SAS
224		 * 1 - GDS
225		 * 2 - DATA
226		 * 3 - SRC_ADDR using L2
227		 */
228#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
229
230int suite_basic_tests_init(void)
231{
232	struct amdgpu_gpu_info gpu_info = {0};
233	int r;
234
235	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
236				   &minor_version, &device_handle);
237
238	if (r) {
239		if ((r == -EACCES) && (errno == EACCES))
240			printf("\n\nError:%s. "
241				"Hint:Try to run this test program as root.",
242				strerror(errno));
243		return CUE_SINIT_FAILED;
244	}
245
246	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
247	if (r)
248		return CUE_SINIT_FAILED;
249
250	family_id = gpu_info.family_id;
251
252	return CUE_SUCCESS;
253}
254
255int suite_basic_tests_clean(void)
256{
257	int r = amdgpu_device_deinitialize(device_handle);
258
259	if (r == 0)
260		return CUE_SUCCESS;
261	else
262		return CUE_SCLEAN_FAILED;
263}
264
265static void amdgpu_query_info_test(void)
266{
267	struct amdgpu_gpu_info gpu_info = {0};
268	uint32_t version, feature;
269	int r;
270
271	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
272	CU_ASSERT_EQUAL(r, 0);
273
274	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
275					  0, &version, &feature);
276	CU_ASSERT_EQUAL(r, 0);
277}
278
279static void amdgpu_memory_alloc(void)
280{
281	amdgpu_bo_handle bo;
282	amdgpu_va_handle va_handle;
283	uint64_t bo_mc;
284	int r;
285
286	/* Test visible VRAM */
287	bo = gpu_mem_alloc(device_handle,
288			4096, 4096,
289			AMDGPU_GEM_DOMAIN_VRAM,
290			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
291			&bo_mc, &va_handle);
292
293	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
294	CU_ASSERT_EQUAL(r, 0);
295
296	/* Test invisible VRAM */
297	bo = gpu_mem_alloc(device_handle,
298			4096, 4096,
299			AMDGPU_GEM_DOMAIN_VRAM,
300			AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
301			&bo_mc, &va_handle);
302
303	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
304	CU_ASSERT_EQUAL(r, 0);
305
306	/* Test GART Cacheable */
307	bo = gpu_mem_alloc(device_handle,
308			4096, 4096,
309			AMDGPU_GEM_DOMAIN_GTT,
310			0, &bo_mc, &va_handle);
311
312	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
313	CU_ASSERT_EQUAL(r, 0);
314
315	/* Test GART USWC */
316	bo = gpu_mem_alloc(device_handle,
317			4096, 4096,
318			AMDGPU_GEM_DOMAIN_GTT,
319			AMDGPU_GEM_CREATE_CPU_GTT_USWC,
320			&bo_mc, &va_handle);
321
322	r = gpu_mem_free(bo, va_handle, bo_mc, 4096);
323	CU_ASSERT_EQUAL(r, 0);
324}
325
326static void amdgpu_command_submission_gfx_separate_ibs(void)
327{
328	amdgpu_context_handle context_handle;
329	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
330	void *ib_result_cpu, *ib_result_ce_cpu;
331	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
332	struct amdgpu_cs_request ibs_request = {0};
333	struct amdgpu_cs_ib_info ib_info[2];
334	struct amdgpu_cs_fence fence_status = {0};
335	uint32_t *ptr;
336	uint32_t expired;
337	amdgpu_bo_list_handle bo_list;
338	amdgpu_va_handle va_handle, va_handle_ce;
339	int r, i = 0;
340
341	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
342	CU_ASSERT_EQUAL(r, 0);
343
344	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
345				    AMDGPU_GEM_DOMAIN_GTT, 0,
346				    &ib_result_handle, &ib_result_cpu,
347				    &ib_result_mc_address, &va_handle);
348	CU_ASSERT_EQUAL(r, 0);
349
350	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
351				    AMDGPU_GEM_DOMAIN_GTT, 0,
352				    &ib_result_ce_handle, &ib_result_ce_cpu,
353				    &ib_result_ce_mc_address, &va_handle_ce);
354	CU_ASSERT_EQUAL(r, 0);
355
356	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
357			       ib_result_ce_handle, &bo_list);
358	CU_ASSERT_EQUAL(r, 0);
359
360	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
361
362	/* IT_SET_CE_DE_COUNTERS */
363	ptr = ib_result_ce_cpu;
364	if (family_id != AMDGPU_FAMILY_SI) {
365		ptr[i++] = 0xc0008900;
366		ptr[i++] = 0;
367	}
368	ptr[i++] = 0xc0008400;
369	ptr[i++] = 1;
370	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
371	ib_info[0].size = i;
372	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
373
374	/* IT_WAIT_ON_CE_COUNTER */
375	ptr = ib_result_cpu;
376	ptr[0] = 0xc0008600;
377	ptr[1] = 0x00000001;
378	ib_info[1].ib_mc_address = ib_result_mc_address;
379	ib_info[1].size = 2;
380
381	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
382	ibs_request.number_of_ibs = 2;
383	ibs_request.ibs = ib_info;
384	ibs_request.resources = bo_list;
385	ibs_request.fence_info.handle = NULL;
386
387	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
388
389	CU_ASSERT_EQUAL(r, 0);
390
391	fence_status.context = context_handle;
392	fence_status.ip_type = AMDGPU_HW_IP_GFX;
393	fence_status.ip_instance = 0;
394	fence_status.fence = ibs_request.seq_no;
395
396	r = amdgpu_cs_query_fence_status(&fence_status,
397					 AMDGPU_TIMEOUT_INFINITE,
398					 0, &expired);
399	CU_ASSERT_EQUAL(r, 0);
400
401	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
402				     ib_result_mc_address, 4096);
403	CU_ASSERT_EQUAL(r, 0);
404
405	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
406				     ib_result_ce_mc_address, 4096);
407	CU_ASSERT_EQUAL(r, 0);
408
409	r = amdgpu_bo_list_destroy(bo_list);
410	CU_ASSERT_EQUAL(r, 0);
411
412	r = amdgpu_cs_ctx_free(context_handle);
413	CU_ASSERT_EQUAL(r, 0);
414
415}
416
417static void amdgpu_command_submission_gfx_shared_ib(void)
418{
419	amdgpu_context_handle context_handle;
420	amdgpu_bo_handle ib_result_handle;
421	void *ib_result_cpu;
422	uint64_t ib_result_mc_address;
423	struct amdgpu_cs_request ibs_request = {0};
424	struct amdgpu_cs_ib_info ib_info[2];
425	struct amdgpu_cs_fence fence_status = {0};
426	uint32_t *ptr;
427	uint32_t expired;
428	amdgpu_bo_list_handle bo_list;
429	amdgpu_va_handle va_handle;
430	int r, i = 0;
431
432	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
433	CU_ASSERT_EQUAL(r, 0);
434
435	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
436				    AMDGPU_GEM_DOMAIN_GTT, 0,
437				    &ib_result_handle, &ib_result_cpu,
438				    &ib_result_mc_address, &va_handle);
439	CU_ASSERT_EQUAL(r, 0);
440
441	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
442			       &bo_list);
443	CU_ASSERT_EQUAL(r, 0);
444
445	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
446
447	/* IT_SET_CE_DE_COUNTERS */
448	ptr = ib_result_cpu;
449	if (family_id != AMDGPU_FAMILY_SI) {
450		ptr[i++] = 0xc0008900;
451		ptr[i++] = 0;
452	}
453	ptr[i++] = 0xc0008400;
454	ptr[i++] = 1;
455	ib_info[0].ib_mc_address = ib_result_mc_address;
456	ib_info[0].size = i;
457	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
458
459	ptr = (uint32_t *)ib_result_cpu + 4;
460	ptr[0] = 0xc0008600;
461	ptr[1] = 0x00000001;
462	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
463	ib_info[1].size = 2;
464
465	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
466	ibs_request.number_of_ibs = 2;
467	ibs_request.ibs = ib_info;
468	ibs_request.resources = bo_list;
469	ibs_request.fence_info.handle = NULL;
470
471	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
472
473	CU_ASSERT_EQUAL(r, 0);
474
475	fence_status.context = context_handle;
476	fence_status.ip_type = AMDGPU_HW_IP_GFX;
477	fence_status.ip_instance = 0;
478	fence_status.fence = ibs_request.seq_no;
479
480	r = amdgpu_cs_query_fence_status(&fence_status,
481					 AMDGPU_TIMEOUT_INFINITE,
482					 0, &expired);
483	CU_ASSERT_EQUAL(r, 0);
484
485	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
486				     ib_result_mc_address, 4096);
487	CU_ASSERT_EQUAL(r, 0);
488
489	r = amdgpu_bo_list_destroy(bo_list);
490	CU_ASSERT_EQUAL(r, 0);
491
492	r = amdgpu_cs_ctx_free(context_handle);
493	CU_ASSERT_EQUAL(r, 0);
494}
495
496static void amdgpu_command_submission_gfx_cp_write_data(void)
497{
498	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
499}
500
501static void amdgpu_command_submission_gfx_cp_const_fill(void)
502{
503	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
504}
505
506static void amdgpu_command_submission_gfx_cp_copy_data(void)
507{
508	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
509}
510
511static void amdgpu_command_submission_gfx(void)
512{
513	/* write data using the CP */
514	amdgpu_command_submission_gfx_cp_write_data();
515	/* const fill using the CP */
516	amdgpu_command_submission_gfx_cp_const_fill();
517	/* copy data using the CP */
518	amdgpu_command_submission_gfx_cp_copy_data();
519	/* separate IB buffers for multi-IB submission */
520	amdgpu_command_submission_gfx_separate_ibs();
521	/* shared IB buffer for multi-IB submission */
522	amdgpu_command_submission_gfx_shared_ib();
523}
524
525static void amdgpu_semaphore_test(void)
526{
527	amdgpu_context_handle context_handle[2];
528	amdgpu_semaphore_handle sem;
529	amdgpu_bo_handle ib_result_handle[2];
530	void *ib_result_cpu[2];
531	uint64_t ib_result_mc_address[2];
532	struct amdgpu_cs_request ibs_request[2] = {0};
533	struct amdgpu_cs_ib_info ib_info[2] = {0};
534	struct amdgpu_cs_fence fence_status = {0};
535	uint32_t *ptr;
536	uint32_t expired;
537	uint32_t sdma_nop, gfx_nop;
538	amdgpu_bo_list_handle bo_list[2];
539	amdgpu_va_handle va_handle[2];
540	int r, i;
541
542	if (family_id == AMDGPU_FAMILY_SI) {
543		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
544		gfx_nop = GFX_COMPUTE_NOP_SI;
545	} else {
546		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
547		gfx_nop = GFX_COMPUTE_NOP;
548	}
549
550	r = amdgpu_cs_create_semaphore(&sem);
551	CU_ASSERT_EQUAL(r, 0);
552	for (i = 0; i < 2; i++) {
553		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
554		CU_ASSERT_EQUAL(r, 0);
555
556		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
557					    AMDGPU_GEM_DOMAIN_GTT, 0,
558					    &ib_result_handle[i], &ib_result_cpu[i],
559					    &ib_result_mc_address[i], &va_handle[i]);
560		CU_ASSERT_EQUAL(r, 0);
561
562		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
563				       NULL, &bo_list[i]);
564		CU_ASSERT_EQUAL(r, 0);
565	}
566
567	/* 1. same context different engine */
568	ptr = ib_result_cpu[0];
569	ptr[0] = sdma_nop;
570	ib_info[0].ib_mc_address = ib_result_mc_address[0];
571	ib_info[0].size = 1;
572
573	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
574	ibs_request[0].number_of_ibs = 1;
575	ibs_request[0].ibs = &ib_info[0];
576	ibs_request[0].resources = bo_list[0];
577	ibs_request[0].fence_info.handle = NULL;
578	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
579	CU_ASSERT_EQUAL(r, 0);
580	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
581	CU_ASSERT_EQUAL(r, 0);
582
583	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
584	CU_ASSERT_EQUAL(r, 0);
585	ptr = ib_result_cpu[1];
586	ptr[0] = gfx_nop;
587	ib_info[1].ib_mc_address = ib_result_mc_address[1];
588	ib_info[1].size = 1;
589
590	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
591	ibs_request[1].number_of_ibs = 1;
592	ibs_request[1].ibs = &ib_info[1];
593	ibs_request[1].resources = bo_list[1];
594	ibs_request[1].fence_info.handle = NULL;
595
596	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
597	CU_ASSERT_EQUAL(r, 0);
598
599	fence_status.context = context_handle[0];
600	fence_status.ip_type = AMDGPU_HW_IP_GFX;
601	fence_status.ip_instance = 0;
602	fence_status.fence = ibs_request[1].seq_no;
603	r = amdgpu_cs_query_fence_status(&fence_status,
604					 500000000, 0, &expired);
605	CU_ASSERT_EQUAL(r, 0);
606	CU_ASSERT_EQUAL(expired, true);
607
608	/* 2. same engine different context */
609	ptr = ib_result_cpu[0];
610	ptr[0] = gfx_nop;
611	ib_info[0].ib_mc_address = ib_result_mc_address[0];
612	ib_info[0].size = 1;
613
614	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
615	ibs_request[0].number_of_ibs = 1;
616	ibs_request[0].ibs = &ib_info[0];
617	ibs_request[0].resources = bo_list[0];
618	ibs_request[0].fence_info.handle = NULL;
619	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
620	CU_ASSERT_EQUAL(r, 0);
621	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
622	CU_ASSERT_EQUAL(r, 0);
623
624	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
625	CU_ASSERT_EQUAL(r, 0);
626	ptr = ib_result_cpu[1];
627	ptr[0] = gfx_nop;
628	ib_info[1].ib_mc_address = ib_result_mc_address[1];
629	ib_info[1].size = 1;
630
631	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
632	ibs_request[1].number_of_ibs = 1;
633	ibs_request[1].ibs = &ib_info[1];
634	ibs_request[1].resources = bo_list[1];
635	ibs_request[1].fence_info.handle = NULL;
636	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
637
638	CU_ASSERT_EQUAL(r, 0);
639
640	fence_status.context = context_handle[1];
641	fence_status.ip_type = AMDGPU_HW_IP_GFX;
642	fence_status.ip_instance = 0;
643	fence_status.fence = ibs_request[1].seq_no;
644	r = amdgpu_cs_query_fence_status(&fence_status,
645					 500000000, 0, &expired);
646	CU_ASSERT_EQUAL(r, 0);
647	CU_ASSERT_EQUAL(expired, true);
648
649	for (i = 0; i < 2; i++) {
650		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
651					     ib_result_mc_address[i], 4096);
652		CU_ASSERT_EQUAL(r, 0);
653
654		r = amdgpu_bo_list_destroy(bo_list[i]);
655		CU_ASSERT_EQUAL(r, 0);
656
657		r = amdgpu_cs_ctx_free(context_handle[i]);
658		CU_ASSERT_EQUAL(r, 0);
659	}
660
661	r = amdgpu_cs_destroy_semaphore(sem);
662	CU_ASSERT_EQUAL(r, 0);
663}
664
665static void amdgpu_command_submission_compute_nop(void)
666{
667	amdgpu_context_handle context_handle;
668	amdgpu_bo_handle ib_result_handle;
669	void *ib_result_cpu;
670	uint64_t ib_result_mc_address;
671	struct amdgpu_cs_request ibs_request;
672	struct amdgpu_cs_ib_info ib_info;
673	struct amdgpu_cs_fence fence_status;
674	uint32_t *ptr;
675	uint32_t expired;
676	int i, r, instance;
677	amdgpu_bo_list_handle bo_list;
678	amdgpu_va_handle va_handle;
679	struct drm_amdgpu_info_hw_ip info;
680
681	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
682	CU_ASSERT_EQUAL(r, 0);
683
684	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
685	CU_ASSERT_EQUAL(r, 0);
686
687	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
688		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
689					    AMDGPU_GEM_DOMAIN_GTT, 0,
690					    &ib_result_handle, &ib_result_cpu,
691					    &ib_result_mc_address, &va_handle);
692		CU_ASSERT_EQUAL(r, 0);
693
694		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
695				       &bo_list);
696		CU_ASSERT_EQUAL(r, 0);
697
698		ptr = ib_result_cpu;
699		memset(ptr, 0, 16);
700		ptr[0]=PACKET3(PACKET3_NOP, 14);
701
702		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
703		ib_info.ib_mc_address = ib_result_mc_address;
704		ib_info.size = 16;
705
706		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
707		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
708		ibs_request.ring = instance;
709		ibs_request.number_of_ibs = 1;
710		ibs_request.ibs = &ib_info;
711		ibs_request.resources = bo_list;
712		ibs_request.fence_info.handle = NULL;
713
714		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
715		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
716		CU_ASSERT_EQUAL(r, 0);
717
718		fence_status.context = context_handle;
719		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
720		fence_status.ip_instance = 0;
721		fence_status.ring = instance;
722		fence_status.fence = ibs_request.seq_no;
723
724		r = amdgpu_cs_query_fence_status(&fence_status,
725						 AMDGPU_TIMEOUT_INFINITE,
726						 0, &expired);
727		CU_ASSERT_EQUAL(r, 0);
728
729		r = amdgpu_bo_list_destroy(bo_list);
730		CU_ASSERT_EQUAL(r, 0);
731
732		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
733					     ib_result_mc_address, 4096);
734		CU_ASSERT_EQUAL(r, 0);
735	}
736
737	r = amdgpu_cs_ctx_free(context_handle);
738	CU_ASSERT_EQUAL(r, 0);
739}
740
741static void amdgpu_command_submission_compute_cp_write_data(void)
742{
743	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
744}
745
746static void amdgpu_command_submission_compute_cp_const_fill(void)
747{
748	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
749}
750
751static void amdgpu_command_submission_compute_cp_copy_data(void)
752{
753	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
754}
755
756static void amdgpu_command_submission_compute(void)
757{
758	/* write data using the CP */
759	amdgpu_command_submission_compute_cp_write_data();
760	/* const fill using the CP */
761	amdgpu_command_submission_compute_cp_const_fill();
762	/* copy data using the CP */
763	amdgpu_command_submission_compute_cp_copy_data();
764	/* nop test */
765	amdgpu_command_submission_compute_nop();
766}
767
768/*
769 * caller need create/release:
770 * pm4_src, resources, ib_info, and ibs_request
771 * submit command stream described in ibs_request and wait for this IB accomplished
772 */
773static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
774				       unsigned ip_type,
775				       int instance, int pm4_dw, uint32_t *pm4_src,
776				       int res_cnt, amdgpu_bo_handle *resources,
777				       struct amdgpu_cs_ib_info *ib_info,
778				       struct amdgpu_cs_request *ibs_request)
779{
780	int r;
781	uint32_t expired;
782	uint32_t *ring_ptr;
783	amdgpu_bo_handle ib_result_handle;
784	void *ib_result_cpu;
785	uint64_t ib_result_mc_address;
786	struct amdgpu_cs_fence fence_status = {0};
787	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
788	amdgpu_va_handle va_handle;
789
790	/* prepare CS */
791	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
792	CU_ASSERT_NOT_EQUAL(resources, NULL);
793	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
794	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
795	CU_ASSERT_TRUE(pm4_dw <= 1024);
796
797	/* allocate IB */
798	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
799				    AMDGPU_GEM_DOMAIN_GTT, 0,
800				    &ib_result_handle, &ib_result_cpu,
801				    &ib_result_mc_address, &va_handle);
802	CU_ASSERT_EQUAL(r, 0);
803
804	/* copy PM4 packet to ring from caller */
805	ring_ptr = ib_result_cpu;
806	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
807
808	ib_info->ib_mc_address = ib_result_mc_address;
809	ib_info->size = pm4_dw;
810
811	ibs_request->ip_type = ip_type;
812	ibs_request->ring = instance;
813	ibs_request->number_of_ibs = 1;
814	ibs_request->ibs = ib_info;
815	ibs_request->fence_info.handle = NULL;
816
817	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
818	all_res[res_cnt] = ib_result_handle;
819
820	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
821				  NULL, &ibs_request->resources);
822	CU_ASSERT_EQUAL(r, 0);
823
824	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
825
826	/* submit CS */
827	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
828	CU_ASSERT_EQUAL(r, 0);
829
830	r = amdgpu_bo_list_destroy(ibs_request->resources);
831	CU_ASSERT_EQUAL(r, 0);
832
833	fence_status.ip_type = ip_type;
834	fence_status.ip_instance = 0;
835	fence_status.ring = ibs_request->ring;
836	fence_status.context = context_handle;
837	fence_status.fence = ibs_request->seq_no;
838
839	/* wait for IB accomplished */
840	r = amdgpu_cs_query_fence_status(&fence_status,
841					 AMDGPU_TIMEOUT_INFINITE,
842					 0, &expired);
843	CU_ASSERT_EQUAL(r, 0);
844	CU_ASSERT_EQUAL(expired, true);
845
846	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
847				     ib_result_mc_address, 4096);
848	CU_ASSERT_EQUAL(r, 0);
849}
850
851static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
852{
853	const int sdma_write_length = 128;
854	const int pm4_dw = 256;
855	amdgpu_context_handle context_handle;
856	amdgpu_bo_handle bo;
857	amdgpu_bo_handle *resources;
858	uint32_t *pm4;
859	struct amdgpu_cs_ib_info *ib_info;
860	struct amdgpu_cs_request *ibs_request;
861	uint64_t bo_mc;
862	volatile uint32_t *bo_cpu;
863	int i, j, r, loop;
864	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
865	amdgpu_va_handle va_handle;
866
867	pm4 = calloc(pm4_dw, sizeof(*pm4));
868	CU_ASSERT_NOT_EQUAL(pm4, NULL);
869
870	ib_info = calloc(1, sizeof(*ib_info));
871	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
872
873	ibs_request = calloc(1, sizeof(*ibs_request));
874	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
875
876	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
877	CU_ASSERT_EQUAL(r, 0);
878
879	/* prepare resource */
880	resources = calloc(1, sizeof(amdgpu_bo_handle));
881	CU_ASSERT_NOT_EQUAL(resources, NULL);
882
883	loop = 0;
884	while(loop < 2) {
885		/* allocate UC bo for sDMA use */
886		r = amdgpu_bo_alloc_and_map(device_handle,
887					    sdma_write_length * sizeof(uint32_t),
888					    4096, AMDGPU_GEM_DOMAIN_GTT,
889					    gtt_flags[loop], &bo, (void**)&bo_cpu,
890					    &bo_mc, &va_handle);
891		CU_ASSERT_EQUAL(r, 0);
892
893		/* clear bo */
894		memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
895
896
897		resources[0] = bo;
898
899		/* fulfill PM4: test DMA write-linear */
900		i = j = 0;
901		if (ip_type == AMDGPU_HW_IP_DMA) {
902			if (family_id == AMDGPU_FAMILY_SI)
903				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
904							  sdma_write_length);
905			else
906				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
907						       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
908			pm4[i++] = 0xffffffff & bo_mc;
909			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
910			if (family_id >= AMDGPU_FAMILY_AI)
911				pm4[i++] = sdma_write_length - 1;
912			else if (family_id != AMDGPU_FAMILY_SI)
913				pm4[i++] = sdma_write_length;
914			while(j++ < sdma_write_length)
915				pm4[i++] = 0xdeadbeaf;
916		} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
917			   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
918			pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
919			pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
920			pm4[i++] = 0xfffffffc & bo_mc;
921			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
922			while(j++ < sdma_write_length)
923				pm4[i++] = 0xdeadbeaf;
924		}
925
926		amdgpu_test_exec_cs_helper(context_handle,
927					   ip_type, 0,
928					   i, pm4,
929					   1, resources,
930					   ib_info, ibs_request);
931
932		/* verify if SDMA test result meets with expected */
933		i = 0;
934		while(i < sdma_write_length) {
935			CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
936		}
937
938		r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
939					     sdma_write_length * sizeof(uint32_t));
940		CU_ASSERT_EQUAL(r, 0);
941		loop++;
942	}
943	/* clean resources */
944	free(resources);
945	free(ibs_request);
946	free(ib_info);
947	free(pm4);
948
949	/* end of test */
950	r = amdgpu_cs_ctx_free(context_handle);
951	CU_ASSERT_EQUAL(r, 0);
952}
953
954static void amdgpu_command_submission_sdma_write_linear(void)
955{
956	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
957}
958
959static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
960{
961	const int sdma_write_length = 1024 * 1024;
962	const int pm4_dw = 256;
963	amdgpu_context_handle context_handle;
964	amdgpu_bo_handle bo;
965	amdgpu_bo_handle *resources;
966	uint32_t *pm4;
967	struct amdgpu_cs_ib_info *ib_info;
968	struct amdgpu_cs_request *ibs_request;
969	uint64_t bo_mc;
970	volatile uint32_t *bo_cpu;
971	int i, j, r, loop;
972	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
973	amdgpu_va_handle va_handle;
974
975	pm4 = calloc(pm4_dw, sizeof(*pm4));
976	CU_ASSERT_NOT_EQUAL(pm4, NULL);
977
978	ib_info = calloc(1, sizeof(*ib_info));
979	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
980
981	ibs_request = calloc(1, sizeof(*ibs_request));
982	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
983
984	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
985	CU_ASSERT_EQUAL(r, 0);
986
987	/* prepare resource */
988	resources = calloc(1, sizeof(amdgpu_bo_handle));
989	CU_ASSERT_NOT_EQUAL(resources, NULL);
990
991	loop = 0;
992	while(loop < 2) {
993		/* allocate UC bo for sDMA use */
994		r = amdgpu_bo_alloc_and_map(device_handle,
995					    sdma_write_length, 4096,
996					    AMDGPU_GEM_DOMAIN_GTT,
997					    gtt_flags[loop], &bo, (void**)&bo_cpu,
998					    &bo_mc, &va_handle);
999		CU_ASSERT_EQUAL(r, 0);
1000
1001		/* clear bo */
1002		memset((void*)bo_cpu, 0, sdma_write_length);
1003
1004		resources[0] = bo;
1005
1006		/* fulfill PM4: test DMA const fill */
1007		i = j = 0;
1008		if (ip_type == AMDGPU_HW_IP_DMA) {
1009			if (family_id == AMDGPU_FAMILY_SI) {
1010				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 0, 0, 0,
1011							  sdma_write_length / 4);
1012				pm4[i++] = 0xfffffffc & bo_mc;
1013				pm4[i++] = 0xdeadbeaf;
1014				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1015			} else {
1016				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1017						       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1018				pm4[i++] = 0xffffffff & bo_mc;
1019				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1020				pm4[i++] = 0xdeadbeaf;
1021				if (family_id >= AMDGPU_FAMILY_AI)
1022					pm4[i++] = sdma_write_length - 1;
1023				else
1024					pm4[i++] = sdma_write_length;
1025			}
1026		} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1027			   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1028			if (family_id == AMDGPU_FAMILY_SI) {
1029				pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1030				pm4[i++] = 0xdeadbeaf;
1031				pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1032					PACKET3_DMA_DATA_SI_DST_SEL(0) |
1033					PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1034					PACKET3_DMA_DATA_SI_CP_SYNC;
1035				pm4[i++] = 0xffffffff & bo_mc;
1036				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1037				pm4[i++] = sdma_write_length;
1038			} else {
1039				pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1040				pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1041					PACKET3_DMA_DATA_DST_SEL(0) |
1042					PACKET3_DMA_DATA_SRC_SEL(2) |
1043					PACKET3_DMA_DATA_CP_SYNC;
1044				pm4[i++] = 0xdeadbeaf;
1045				pm4[i++] = 0;
1046				pm4[i++] = 0xfffffffc & bo_mc;
1047				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1048				pm4[i++] = sdma_write_length;
1049			}
1050		}
1051
1052		amdgpu_test_exec_cs_helper(context_handle,
1053					   ip_type, 0,
1054					   i, pm4,
1055					   1, resources,
1056					   ib_info, ibs_request);
1057
1058		/* verify if SDMA test result meets with expected */
1059		i = 0;
1060		while(i < (sdma_write_length / 4)) {
1061			CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1062		}
1063
1064		r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1065					     sdma_write_length);
1066		CU_ASSERT_EQUAL(r, 0);
1067		loop++;
1068	}
1069	/* clean resources */
1070	free(resources);
1071	free(ibs_request);
1072	free(ib_info);
1073	free(pm4);
1074
1075	/* end of test */
1076	r = amdgpu_cs_ctx_free(context_handle);
1077	CU_ASSERT_EQUAL(r, 0);
1078}
1079
1080static void amdgpu_command_submission_sdma_const_fill(void)
1081{
1082	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1083}
1084
1085static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1086{
1087	const int sdma_write_length = 1024;
1088	const int pm4_dw = 256;
1089	amdgpu_context_handle context_handle;
1090	amdgpu_bo_handle bo1, bo2;
1091	amdgpu_bo_handle *resources;
1092	uint32_t *pm4;
1093	struct amdgpu_cs_ib_info *ib_info;
1094	struct amdgpu_cs_request *ibs_request;
1095	uint64_t bo1_mc, bo2_mc;
1096	volatile unsigned char *bo1_cpu, *bo2_cpu;
1097	int i, j, r, loop1, loop2;
1098	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1099	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1100
1101	pm4 = calloc(pm4_dw, sizeof(*pm4));
1102	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1103
1104	ib_info = calloc(1, sizeof(*ib_info));
1105	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1106
1107	ibs_request = calloc(1, sizeof(*ibs_request));
1108	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1109
1110	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1111	CU_ASSERT_EQUAL(r, 0);
1112
1113	/* prepare resource */
1114	resources = calloc(2, sizeof(amdgpu_bo_handle));
1115	CU_ASSERT_NOT_EQUAL(resources, NULL);
1116
1117	loop1 = loop2 = 0;
1118	/* run 9 circle to test all mapping combination */
1119	while(loop1 < 2) {
1120		while(loop2 < 2) {
1121			/* allocate UC bo1for sDMA use */
1122			r = amdgpu_bo_alloc_and_map(device_handle,
1123						    sdma_write_length, 4096,
1124						    AMDGPU_GEM_DOMAIN_GTT,
1125						    gtt_flags[loop1], &bo1,
1126						    (void**)&bo1_cpu, &bo1_mc,
1127						    &bo1_va_handle);
1128			CU_ASSERT_EQUAL(r, 0);
1129
1130			/* set bo1 */
1131			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1132
1133			/* allocate UC bo2 for sDMA use */
1134			r = amdgpu_bo_alloc_and_map(device_handle,
1135						    sdma_write_length, 4096,
1136						    AMDGPU_GEM_DOMAIN_GTT,
1137						    gtt_flags[loop2], &bo2,
1138						    (void**)&bo2_cpu, &bo2_mc,
1139						    &bo2_va_handle);
1140			CU_ASSERT_EQUAL(r, 0);
1141
1142			/* clear bo2 */
1143			memset((void*)bo2_cpu, 0, sdma_write_length);
1144
1145			resources[0] = bo1;
1146			resources[1] = bo2;
1147
1148			/* fulfill PM4: test DMA copy linear */
1149			i = j = 0;
1150			if (ip_type == AMDGPU_HW_IP_DMA) {
1151				if (family_id == AMDGPU_FAMILY_SI) {
1152					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
1153								  sdma_write_length);
1154					pm4[i++] = 0xffffffff & bo2_mc;
1155					pm4[i++] = 0xffffffff & bo1_mc;
1156					pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1157					pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1158				} else {
1159					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
1160					if (family_id >= AMDGPU_FAMILY_AI)
1161						pm4[i++] = sdma_write_length - 1;
1162					else
1163						pm4[i++] = sdma_write_length;
1164					pm4[i++] = 0;
1165					pm4[i++] = 0xffffffff & bo1_mc;
1166					pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1167					pm4[i++] = 0xffffffff & bo2_mc;
1168					pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1169				}
1170
1171			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1172				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1173				if (family_id == AMDGPU_FAMILY_SI) {
1174					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1175					pm4[i++] = 0xfffffffc & bo1_mc;
1176					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1177						PACKET3_DMA_DATA_SI_DST_SEL(0) |
1178						PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1179						PACKET3_DMA_DATA_SI_CP_SYNC |
1180						(0xffff00000000 & bo1_mc) >> 32;
1181					pm4[i++] = 0xfffffffc & bo2_mc;
1182					pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1183					pm4[i++] = sdma_write_length;
1184				} else {
1185					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1186					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1187						PACKET3_DMA_DATA_DST_SEL(0) |
1188						PACKET3_DMA_DATA_SRC_SEL(0) |
1189						PACKET3_DMA_DATA_CP_SYNC;
1190					pm4[i++] = 0xfffffffc & bo1_mc;
1191					pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1192					pm4[i++] = 0xfffffffc & bo2_mc;
1193					pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1194					pm4[i++] = sdma_write_length;
1195				}
1196			}
1197
1198			amdgpu_test_exec_cs_helper(context_handle,
1199						   ip_type, 0,
1200						   i, pm4,
1201						   2, resources,
1202						   ib_info, ibs_request);
1203
1204			/* verify if SDMA test result meets with expected */
1205			i = 0;
1206			while(i < sdma_write_length) {
1207				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1208			}
1209			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1210						     sdma_write_length);
1211			CU_ASSERT_EQUAL(r, 0);
1212			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1213						     sdma_write_length);
1214			CU_ASSERT_EQUAL(r, 0);
1215			loop2++;
1216		}
1217		loop1++;
1218	}
1219	/* clean resources */
1220	free(resources);
1221	free(ibs_request);
1222	free(ib_info);
1223	free(pm4);
1224
1225	/* end of test */
1226	r = amdgpu_cs_ctx_free(context_handle);
1227	CU_ASSERT_EQUAL(r, 0);
1228}
1229
1230static void amdgpu_command_submission_sdma_copy_linear(void)
1231{
1232	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1233}
1234
1235static void amdgpu_command_submission_sdma(void)
1236{
1237	amdgpu_command_submission_sdma_write_linear();
1238	amdgpu_command_submission_sdma_const_fill();
1239	amdgpu_command_submission_sdma_copy_linear();
1240}
1241
1242static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1243{
1244	amdgpu_context_handle context_handle;
1245	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1246	void *ib_result_cpu, *ib_result_ce_cpu;
1247	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1248	struct amdgpu_cs_request ibs_request[2] = {0};
1249	struct amdgpu_cs_ib_info ib_info[2];
1250	struct amdgpu_cs_fence fence_status[2] = {0};
1251	uint32_t *ptr;
1252	uint32_t expired;
1253	amdgpu_bo_list_handle bo_list;
1254	amdgpu_va_handle va_handle, va_handle_ce;
1255	int r;
1256	int i = 0, ib_cs_num = 2;
1257
1258	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1259	CU_ASSERT_EQUAL(r, 0);
1260
1261	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1262				    AMDGPU_GEM_DOMAIN_GTT, 0,
1263				    &ib_result_handle, &ib_result_cpu,
1264				    &ib_result_mc_address, &va_handle);
1265	CU_ASSERT_EQUAL(r, 0);
1266
1267	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1268				    AMDGPU_GEM_DOMAIN_GTT, 0,
1269				    &ib_result_ce_handle, &ib_result_ce_cpu,
1270				    &ib_result_ce_mc_address, &va_handle_ce);
1271	CU_ASSERT_EQUAL(r, 0);
1272
1273	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1274			       ib_result_ce_handle, &bo_list);
1275	CU_ASSERT_EQUAL(r, 0);
1276
1277	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1278
1279	/* IT_SET_CE_DE_COUNTERS */
1280	ptr = ib_result_ce_cpu;
1281	if (family_id != AMDGPU_FAMILY_SI) {
1282		ptr[i++] = 0xc0008900;
1283		ptr[i++] = 0;
1284	}
1285	ptr[i++] = 0xc0008400;
1286	ptr[i++] = 1;
1287	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1288	ib_info[0].size = i;
1289	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1290
1291	/* IT_WAIT_ON_CE_COUNTER */
1292	ptr = ib_result_cpu;
1293	ptr[0] = 0xc0008600;
1294	ptr[1] = 0x00000001;
1295	ib_info[1].ib_mc_address = ib_result_mc_address;
1296	ib_info[1].size = 2;
1297
1298	for (i = 0; i < ib_cs_num; i++) {
1299		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1300		ibs_request[i].number_of_ibs = 2;
1301		ibs_request[i].ibs = ib_info;
1302		ibs_request[i].resources = bo_list;
1303		ibs_request[i].fence_info.handle = NULL;
1304	}
1305
1306	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1307
1308	CU_ASSERT_EQUAL(r, 0);
1309
1310	for (i = 0; i < ib_cs_num; i++) {
1311		fence_status[i].context = context_handle;
1312		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1313		fence_status[i].fence = ibs_request[i].seq_no;
1314	}
1315
1316	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1317				AMDGPU_TIMEOUT_INFINITE,
1318				&expired, NULL);
1319	CU_ASSERT_EQUAL(r, 0);
1320
1321	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1322				     ib_result_mc_address, 4096);
1323	CU_ASSERT_EQUAL(r, 0);
1324
1325	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1326				     ib_result_ce_mc_address, 4096);
1327	CU_ASSERT_EQUAL(r, 0);
1328
1329	r = amdgpu_bo_list_destroy(bo_list);
1330	CU_ASSERT_EQUAL(r, 0);
1331
1332	r = amdgpu_cs_ctx_free(context_handle);
1333	CU_ASSERT_EQUAL(r, 0);
1334}
1335
1336static void amdgpu_command_submission_multi_fence(void)
1337{
1338	amdgpu_command_submission_multi_fence_wait_all(true);
1339	amdgpu_command_submission_multi_fence_wait_all(false);
1340}
1341
1342static void amdgpu_userptr_test(void)
1343{
1344	int i, r, j;
1345	uint32_t *pm4 = NULL;
1346	uint64_t bo_mc;
1347	void *ptr = NULL;
1348	int pm4_dw = 256;
1349	int sdma_write_length = 4;
1350	amdgpu_bo_handle handle;
1351	amdgpu_context_handle context_handle;
1352	struct amdgpu_cs_ib_info *ib_info;
1353	struct amdgpu_cs_request *ibs_request;
1354	amdgpu_bo_handle buf_handle;
1355	amdgpu_va_handle va_handle;
1356
1357	pm4 = calloc(pm4_dw, sizeof(*pm4));
1358	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1359
1360	ib_info = calloc(1, sizeof(*ib_info));
1361	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1362
1363	ibs_request = calloc(1, sizeof(*ibs_request));
1364	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1365
1366	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1367	CU_ASSERT_EQUAL(r, 0);
1368
1369	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
1370	CU_ASSERT_NOT_EQUAL(ptr, NULL);
1371	memset(ptr, 0, BUFFER_SIZE);
1372
1373	r = amdgpu_create_bo_from_user_mem(device_handle,
1374					   ptr, BUFFER_SIZE, &buf_handle);
1375	CU_ASSERT_EQUAL(r, 0);
1376
1377	r = amdgpu_va_range_alloc(device_handle,
1378				  amdgpu_gpu_va_range_general,
1379				  BUFFER_SIZE, 1, 0, &bo_mc,
1380				  &va_handle, 0);
1381	CU_ASSERT_EQUAL(r, 0);
1382
1383	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
1384	CU_ASSERT_EQUAL(r, 0);
1385
1386	handle = buf_handle;
1387
1388	j = i = 0;
1389
1390	if (family_id == AMDGPU_FAMILY_SI)
1391		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1392				sdma_write_length);
1393	else
1394		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1395				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1396	pm4[i++] = 0xffffffff & bo_mc;
1397	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1398	if (family_id >= AMDGPU_FAMILY_AI)
1399		pm4[i++] = sdma_write_length - 1;
1400	else if (family_id != AMDGPU_FAMILY_SI)
1401		pm4[i++] = sdma_write_length;
1402
1403	while (j++ < sdma_write_length)
1404		pm4[i++] = 0xdeadbeaf;
1405
1406	amdgpu_test_exec_cs_helper(context_handle,
1407				   AMDGPU_HW_IP_DMA, 0,
1408				   i, pm4,
1409				   1, &handle,
1410				   ib_info, ibs_request);
1411	i = 0;
1412	while (i < sdma_write_length) {
1413		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
1414	}
1415	free(ibs_request);
1416	free(ib_info);
1417	free(pm4);
1418
1419	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
1420	CU_ASSERT_EQUAL(r, 0);
1421	r = amdgpu_va_range_free(va_handle);
1422	CU_ASSERT_EQUAL(r, 0);
1423	r = amdgpu_bo_free(buf_handle);
1424	CU_ASSERT_EQUAL(r, 0);
1425	free(ptr);
1426
1427	r = amdgpu_cs_ctx_free(context_handle);
1428	CU_ASSERT_EQUAL(r, 0);
1429}
1430