basic_tests.c revision 00a23bda
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <unistd.h>
31#ifdef HAVE_ALLOCA_H
32# include <alloca.h>
33#endif
34#include <sys/wait.h>
35
36#include "CUnit/Basic.h"
37
38#include "amdgpu_test.h"
39#include "amdgpu_drm.h"
40
41static  amdgpu_device_handle device_handle;
42static  uint32_t  major_version;
43static  uint32_t  minor_version;
44static  uint32_t  family_id;
45
46static void amdgpu_query_info_test(void);
47static void amdgpu_command_submission_gfx(void);
48static void amdgpu_command_submission_compute(void);
49static void amdgpu_command_submission_multi_fence(void);
50static void amdgpu_command_submission_sdma(void);
51static void amdgpu_userptr_test(void);
52static void amdgpu_semaphore_test(void);
53static void amdgpu_sync_dependency_test(void);
54static void amdgpu_bo_eviction_test(void);
55
56static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
57static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
58static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
59static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
60				       unsigned ip_type,
61				       int instance, int pm4_dw, uint32_t *pm4_src,
62				       int res_cnt, amdgpu_bo_handle *resources,
63				       struct amdgpu_cs_ib_info *ib_info,
64				       struct amdgpu_cs_request *ibs_request);
65
66CU_TestInfo basic_tests[] = {
67	{ "Query Info Test",  amdgpu_query_info_test },
68	{ "Userptr Test",  amdgpu_userptr_test },
69	{ "bo eviction Test",  amdgpu_bo_eviction_test },
70	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
71	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
72	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
73	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
74	{ "SW semaphore Test",  amdgpu_semaphore_test },
75	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
76	CU_TEST_INFO_NULL,
77};
78#define BUFFER_SIZE (8 * 1024)
79#define SDMA_PKT_HEADER_op_offset 0
80#define SDMA_PKT_HEADER_op_mask   0x000000FF
81#define SDMA_PKT_HEADER_op_shift  0
82#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
83#define SDMA_OPCODE_CONSTANT_FILL  11
84#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
85	/* 0 = byte fill
86	 * 2 = DW fill
87	 */
88#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
89					(((sub_op) & 0xFF) << 8) |	\
90					(((op) & 0xFF) << 0))
91#define	SDMA_OPCODE_WRITE				  2
92#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
93#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
94
95#define	SDMA_OPCODE_COPY				  1
96#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
97
98#define GFX_COMPUTE_NOP  0xffff1000
99#define SDMA_NOP  0x0
100
101/* PM4 */
102#define	PACKET_TYPE0	0
103#define	PACKET_TYPE1	1
104#define	PACKET_TYPE2	2
105#define	PACKET_TYPE3	3
106
107#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
108#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
109#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
110#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
111#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
112			 ((reg) & 0xFFFF) |			\
113			 ((n) & 0x3FFF) << 16)
114#define CP_PACKET2			0x80000000
115#define		PACKET2_PAD_SHIFT		0
116#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
117
118#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
119
120#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
121			 (((op) & 0xFF) << 8) |				\
122			 ((n) & 0x3FFF) << 16)
123
124/* Packet 3 types */
125#define	PACKET3_NOP					0x10
126
127#define	PACKET3_WRITE_DATA				0x37
128#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
129		/* 0 - register
130		 * 1 - memory (sync - via GRBM)
131		 * 2 - gl2
132		 * 3 - gds
133		 * 4 - reserved
134		 * 5 - memory (async - direct)
135		 */
136#define		WR_ONE_ADDR                             (1 << 16)
137#define		WR_CONFIRM                              (1 << 20)
138#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
139		/* 0 - LRU
140		 * 1 - Stream
141		 */
142#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
143		/* 0 - me
144		 * 1 - pfp
145		 * 2 - ce
146		 */
147
148#define	PACKET3_DMA_DATA				0x50
149/* 1. header
150 * 2. CONTROL
151 * 3. SRC_ADDR_LO or DATA [31:0]
152 * 4. SRC_ADDR_HI [31:0]
153 * 5. DST_ADDR_LO [31:0]
154 * 6. DST_ADDR_HI [7:0]
155 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
156 */
157/* CONTROL */
158#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
159		/* 0 - ME
160		 * 1 - PFP
161		 */
162#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
163		/* 0 - LRU
164		 * 1 - Stream
165		 * 2 - Bypass
166		 */
167#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
168#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
169		/* 0 - DST_ADDR using DAS
170		 * 1 - GDS
171		 * 3 - DST_ADDR using L2
172		 */
173#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
174		/* 0 - LRU
175		 * 1 - Stream
176		 * 2 - Bypass
177		 */
178#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
179#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
180		/* 0 - SRC_ADDR using SAS
181		 * 1 - GDS
182		 * 2 - DATA
183		 * 3 - SRC_ADDR using L2
184		 */
185#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
186/* COMMAND */
187#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
188#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
189		/* 0 - none
190		 * 1 - 8 in 16
191		 * 2 - 8 in 32
192		 * 3 - 8 in 64
193		 */
194#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
195		/* 0 - none
196		 * 1 - 8 in 16
197		 * 2 - 8 in 32
198		 * 3 - 8 in 64
199		 */
200#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
201		/* 0 - memory
202		 * 1 - register
203		 */
204#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
205		/* 0 - memory
206		 * 1 - register
207		 */
208#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
209#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
210#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
211
212#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
213						(((b) & 0x1) << 26) |		\
214						(((t) & 0x1) << 23) |		\
215						(((s) & 0x1) << 22) |		\
216						(((cnt) & 0xFFFFF) << 0))
217#define	SDMA_OPCODE_COPY_SI	3
218#define SDMA_OPCODE_CONSTANT_FILL_SI	13
219#define SDMA_NOP_SI  0xf
220#define GFX_COMPUTE_NOP_SI 0x80000000
221#define	PACKET3_DMA_DATA_SI	0x41
222#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
223		/* 0 - ME
224		 * 1 - PFP
225		 */
226#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
227		/* 0 - DST_ADDR using DAS
228		 * 1 - GDS
229		 * 3 - DST_ADDR using L2
230		 */
231#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
232		/* 0 - SRC_ADDR using SAS
233		 * 1 - GDS
234		 * 2 - DATA
235		 * 3 - SRC_ADDR using L2
236		 */
237#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
238
239
240#define PKT3_CONTEXT_CONTROL                   0x28
241#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
242#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
243#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
244
245#define PKT3_CLEAR_STATE                       0x12
246
247#define PKT3_SET_SH_REG                        0x76
248#define		PACKET3_SET_SH_REG_START			0x00002c00
249
250#define	PACKET3_DISPATCH_DIRECT				0x15
251
252
253/* gfx 8 */
254#define mmCOMPUTE_PGM_LO                                                        0x2e0c
255#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
256#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
257#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
258#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
259#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
260#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
261
262
263
264#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
265		      ((num & 0x0000ff00) << 8) | \
266		      ((num & 0x00ff0000) >> 8) | \
267		      ((num & 0x000000ff) << 24))
268
269
270/* Shader code
271 * void main()
272{
273
274	float x = some_input;
275		for (unsigned i = 0; i < 1000000; i++)
276  	x = sin(x);
277
278	u[0] = 42u;
279}
280*/
281
282static  uint32_t shader_bin[] = {
283	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
284	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
285	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
286	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
287};
288
289#define CODE_OFFSET 512
290#define DATA_OFFSET 1024
291
292
293int suite_basic_tests_init(void)
294{
295	struct amdgpu_gpu_info gpu_info = {0};
296	int r;
297
298	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
299				   &minor_version, &device_handle);
300
301	if (r) {
302		if ((r == -EACCES) && (errno == EACCES))
303			printf("\n\nError:%s. "
304				"Hint:Try to run this test program as root.",
305				strerror(errno));
306		return CUE_SINIT_FAILED;
307	}
308
309	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
310	if (r)
311		return CUE_SINIT_FAILED;
312
313	family_id = gpu_info.family_id;
314
315	return CUE_SUCCESS;
316}
317
318int suite_basic_tests_clean(void)
319{
320	int r = amdgpu_device_deinitialize(device_handle);
321
322	if (r == 0)
323		return CUE_SUCCESS;
324	else
325		return CUE_SCLEAN_FAILED;
326}
327
328static void amdgpu_query_info_test(void)
329{
330	struct amdgpu_gpu_info gpu_info = {0};
331	uint32_t version, feature;
332	int r;
333
334	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
335	CU_ASSERT_EQUAL(r, 0);
336
337	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
338					  0, &version, &feature);
339	CU_ASSERT_EQUAL(r, 0);
340}
341
342static void amdgpu_command_submission_gfx_separate_ibs(void)
343{
344	amdgpu_context_handle context_handle;
345	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
346	void *ib_result_cpu, *ib_result_ce_cpu;
347	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
348	struct amdgpu_cs_request ibs_request = {0};
349	struct amdgpu_cs_ib_info ib_info[2];
350	struct amdgpu_cs_fence fence_status = {0};
351	uint32_t *ptr;
352	uint32_t expired;
353	amdgpu_bo_list_handle bo_list;
354	amdgpu_va_handle va_handle, va_handle_ce;
355	int r, i = 0;
356
357	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
358	CU_ASSERT_EQUAL(r, 0);
359
360	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
361				    AMDGPU_GEM_DOMAIN_GTT, 0,
362				    &ib_result_handle, &ib_result_cpu,
363				    &ib_result_mc_address, &va_handle);
364	CU_ASSERT_EQUAL(r, 0);
365
366	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
367				    AMDGPU_GEM_DOMAIN_GTT, 0,
368				    &ib_result_ce_handle, &ib_result_ce_cpu,
369				    &ib_result_ce_mc_address, &va_handle_ce);
370	CU_ASSERT_EQUAL(r, 0);
371
372	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
373			       ib_result_ce_handle, &bo_list);
374	CU_ASSERT_EQUAL(r, 0);
375
376	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
377
378	/* IT_SET_CE_DE_COUNTERS */
379	ptr = ib_result_ce_cpu;
380	if (family_id != AMDGPU_FAMILY_SI) {
381		ptr[i++] = 0xc0008900;
382		ptr[i++] = 0;
383	}
384	ptr[i++] = 0xc0008400;
385	ptr[i++] = 1;
386	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
387	ib_info[0].size = i;
388	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
389
390	/* IT_WAIT_ON_CE_COUNTER */
391	ptr = ib_result_cpu;
392	ptr[0] = 0xc0008600;
393	ptr[1] = 0x00000001;
394	ib_info[1].ib_mc_address = ib_result_mc_address;
395	ib_info[1].size = 2;
396
397	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
398	ibs_request.number_of_ibs = 2;
399	ibs_request.ibs = ib_info;
400	ibs_request.resources = bo_list;
401	ibs_request.fence_info.handle = NULL;
402
403	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
404
405	CU_ASSERT_EQUAL(r, 0);
406
407	fence_status.context = context_handle;
408	fence_status.ip_type = AMDGPU_HW_IP_GFX;
409	fence_status.ip_instance = 0;
410	fence_status.fence = ibs_request.seq_no;
411
412	r = amdgpu_cs_query_fence_status(&fence_status,
413					 AMDGPU_TIMEOUT_INFINITE,
414					 0, &expired);
415	CU_ASSERT_EQUAL(r, 0);
416
417	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
418				     ib_result_mc_address, 4096);
419	CU_ASSERT_EQUAL(r, 0);
420
421	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
422				     ib_result_ce_mc_address, 4096);
423	CU_ASSERT_EQUAL(r, 0);
424
425	r = amdgpu_bo_list_destroy(bo_list);
426	CU_ASSERT_EQUAL(r, 0);
427
428	r = amdgpu_cs_ctx_free(context_handle);
429	CU_ASSERT_EQUAL(r, 0);
430
431}
432
433static void amdgpu_command_submission_gfx_shared_ib(void)
434{
435	amdgpu_context_handle context_handle;
436	amdgpu_bo_handle ib_result_handle;
437	void *ib_result_cpu;
438	uint64_t ib_result_mc_address;
439	struct amdgpu_cs_request ibs_request = {0};
440	struct amdgpu_cs_ib_info ib_info[2];
441	struct amdgpu_cs_fence fence_status = {0};
442	uint32_t *ptr;
443	uint32_t expired;
444	amdgpu_bo_list_handle bo_list;
445	amdgpu_va_handle va_handle;
446	int r, i = 0;
447
448	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
449	CU_ASSERT_EQUAL(r, 0);
450
451	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
452				    AMDGPU_GEM_DOMAIN_GTT, 0,
453				    &ib_result_handle, &ib_result_cpu,
454				    &ib_result_mc_address, &va_handle);
455	CU_ASSERT_EQUAL(r, 0);
456
457	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
458			       &bo_list);
459	CU_ASSERT_EQUAL(r, 0);
460
461	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
462
463	/* IT_SET_CE_DE_COUNTERS */
464	ptr = ib_result_cpu;
465	if (family_id != AMDGPU_FAMILY_SI) {
466		ptr[i++] = 0xc0008900;
467		ptr[i++] = 0;
468	}
469	ptr[i++] = 0xc0008400;
470	ptr[i++] = 1;
471	ib_info[0].ib_mc_address = ib_result_mc_address;
472	ib_info[0].size = i;
473	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
474
475	ptr = (uint32_t *)ib_result_cpu + 4;
476	ptr[0] = 0xc0008600;
477	ptr[1] = 0x00000001;
478	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
479	ib_info[1].size = 2;
480
481	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
482	ibs_request.number_of_ibs = 2;
483	ibs_request.ibs = ib_info;
484	ibs_request.resources = bo_list;
485	ibs_request.fence_info.handle = NULL;
486
487	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
488
489	CU_ASSERT_EQUAL(r, 0);
490
491	fence_status.context = context_handle;
492	fence_status.ip_type = AMDGPU_HW_IP_GFX;
493	fence_status.ip_instance = 0;
494	fence_status.fence = ibs_request.seq_no;
495
496	r = amdgpu_cs_query_fence_status(&fence_status,
497					 AMDGPU_TIMEOUT_INFINITE,
498					 0, &expired);
499	CU_ASSERT_EQUAL(r, 0);
500
501	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
502				     ib_result_mc_address, 4096);
503	CU_ASSERT_EQUAL(r, 0);
504
505	r = amdgpu_bo_list_destroy(bo_list);
506	CU_ASSERT_EQUAL(r, 0);
507
508	r = amdgpu_cs_ctx_free(context_handle);
509	CU_ASSERT_EQUAL(r, 0);
510}
511
512static void amdgpu_command_submission_gfx_cp_write_data(void)
513{
514	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
515}
516
517static void amdgpu_command_submission_gfx_cp_const_fill(void)
518{
519	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
520}
521
522static void amdgpu_command_submission_gfx_cp_copy_data(void)
523{
524	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
525}
526
527static void amdgpu_bo_eviction_test(void)
528{
529	const int sdma_write_length = 1024;
530	const int pm4_dw = 256;
531	amdgpu_context_handle context_handle;
532	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
533	amdgpu_bo_handle *resources;
534	uint32_t *pm4;
535	struct amdgpu_cs_ib_info *ib_info;
536	struct amdgpu_cs_request *ibs_request;
537	uint64_t bo1_mc, bo2_mc;
538	volatile unsigned char *bo1_cpu, *bo2_cpu;
539	int i, j, r, loop1, loop2;
540	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
541	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
542	struct amdgpu_heap_info vram_info, gtt_info;
543
544	pm4 = calloc(pm4_dw, sizeof(*pm4));
545	CU_ASSERT_NOT_EQUAL(pm4, NULL);
546
547	ib_info = calloc(1, sizeof(*ib_info));
548	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
549
550	ibs_request = calloc(1, sizeof(*ibs_request));
551	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
552
553	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
554	CU_ASSERT_EQUAL(r, 0);
555
556	/* prepare resource */
557	resources = calloc(4, sizeof(amdgpu_bo_handle));
558	CU_ASSERT_NOT_EQUAL(resources, NULL);
559
560	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
561				   0, &vram_info);
562	CU_ASSERT_EQUAL(r, 0);
563
564	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
565				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
566	CU_ASSERT_EQUAL(r, 0);
567	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
568				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
569	CU_ASSERT_EQUAL(r, 0);
570
571	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
572				   0, &gtt_info);
573	CU_ASSERT_EQUAL(r, 0);
574
575	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
576				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
577	CU_ASSERT_EQUAL(r, 0);
578	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
579				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
580	CU_ASSERT_EQUAL(r, 0);
581
582
583
584	loop1 = loop2 = 0;
585	/* run 9 circle to test all mapping combination */
586	while(loop1 < 2) {
587		while(loop2 < 2) {
588			/* allocate UC bo1for sDMA use */
589			r = amdgpu_bo_alloc_and_map(device_handle,
590						    sdma_write_length, 4096,
591						    AMDGPU_GEM_DOMAIN_GTT,
592						    gtt_flags[loop1], &bo1,
593						    (void**)&bo1_cpu, &bo1_mc,
594						    &bo1_va_handle);
595			CU_ASSERT_EQUAL(r, 0);
596
597			/* set bo1 */
598			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
599
600			/* allocate UC bo2 for sDMA use */
601			r = amdgpu_bo_alloc_and_map(device_handle,
602						    sdma_write_length, 4096,
603						    AMDGPU_GEM_DOMAIN_GTT,
604						    gtt_flags[loop2], &bo2,
605						    (void**)&bo2_cpu, &bo2_mc,
606						    &bo2_va_handle);
607			CU_ASSERT_EQUAL(r, 0);
608
609			/* clear bo2 */
610			memset((void*)bo2_cpu, 0, sdma_write_length);
611
612			resources[0] = bo1;
613			resources[1] = bo2;
614			resources[2] = vram_max[loop2];
615			resources[3] = gtt_max[loop2];
616
617			/* fulfill PM4: test DMA copy linear */
618			i = j = 0;
619			if (family_id == AMDGPU_FAMILY_SI) {
620				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
621							  sdma_write_length);
622				pm4[i++] = 0xffffffff & bo2_mc;
623				pm4[i++] = 0xffffffff & bo1_mc;
624				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
625				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
626			} else {
627				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
628				if (family_id >= AMDGPU_FAMILY_AI)
629					pm4[i++] = sdma_write_length - 1;
630				else
631					pm4[i++] = sdma_write_length;
632				pm4[i++] = 0;
633				pm4[i++] = 0xffffffff & bo1_mc;
634				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
635				pm4[i++] = 0xffffffff & bo2_mc;
636				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
637			}
638
639			amdgpu_test_exec_cs_helper(context_handle,
640						   AMDGPU_HW_IP_DMA, 0,
641						   i, pm4,
642						   4, resources,
643						   ib_info, ibs_request);
644
645			/* verify if SDMA test result meets with expected */
646			i = 0;
647			while(i < sdma_write_length) {
648				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
649			}
650			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
651						     sdma_write_length);
652			CU_ASSERT_EQUAL(r, 0);
653			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
654						     sdma_write_length);
655			CU_ASSERT_EQUAL(r, 0);
656			loop2++;
657		}
658		loop2 = 0;
659		loop1++;
660	}
661	amdgpu_bo_free(vram_max[0]);
662	amdgpu_bo_free(vram_max[1]);
663	amdgpu_bo_free(gtt_max[0]);
664	amdgpu_bo_free(gtt_max[1]);
665	/* clean resources */
666	free(resources);
667	free(ibs_request);
668	free(ib_info);
669	free(pm4);
670
671	/* end of test */
672	r = amdgpu_cs_ctx_free(context_handle);
673	CU_ASSERT_EQUAL(r, 0);
674}
675
676
677static void amdgpu_command_submission_gfx(void)
678{
679	/* write data using the CP */
680	amdgpu_command_submission_gfx_cp_write_data();
681	/* const fill using the CP */
682	amdgpu_command_submission_gfx_cp_const_fill();
683	/* copy data using the CP */
684	amdgpu_command_submission_gfx_cp_copy_data();
685	/* separate IB buffers for multi-IB submission */
686	amdgpu_command_submission_gfx_separate_ibs();
687	/* shared IB buffer for multi-IB submission */
688	amdgpu_command_submission_gfx_shared_ib();
689}
690
691static void amdgpu_semaphore_test(void)
692{
693	amdgpu_context_handle context_handle[2];
694	amdgpu_semaphore_handle sem;
695	amdgpu_bo_handle ib_result_handle[2];
696	void *ib_result_cpu[2];
697	uint64_t ib_result_mc_address[2];
698	struct amdgpu_cs_request ibs_request[2] = {0};
699	struct amdgpu_cs_ib_info ib_info[2] = {0};
700	struct amdgpu_cs_fence fence_status = {0};
701	uint32_t *ptr;
702	uint32_t expired;
703	uint32_t sdma_nop, gfx_nop;
704	amdgpu_bo_list_handle bo_list[2];
705	amdgpu_va_handle va_handle[2];
706	int r, i;
707
708	if (family_id == AMDGPU_FAMILY_SI) {
709		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
710		gfx_nop = GFX_COMPUTE_NOP_SI;
711	} else {
712		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
713		gfx_nop = GFX_COMPUTE_NOP;
714	}
715
716	r = amdgpu_cs_create_semaphore(&sem);
717	CU_ASSERT_EQUAL(r, 0);
718	for (i = 0; i < 2; i++) {
719		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
720		CU_ASSERT_EQUAL(r, 0);
721
722		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
723					    AMDGPU_GEM_DOMAIN_GTT, 0,
724					    &ib_result_handle[i], &ib_result_cpu[i],
725					    &ib_result_mc_address[i], &va_handle[i]);
726		CU_ASSERT_EQUAL(r, 0);
727
728		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
729				       NULL, &bo_list[i]);
730		CU_ASSERT_EQUAL(r, 0);
731	}
732
733	/* 1. same context different engine */
734	ptr = ib_result_cpu[0];
735	ptr[0] = sdma_nop;
736	ib_info[0].ib_mc_address = ib_result_mc_address[0];
737	ib_info[0].size = 1;
738
739	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
740	ibs_request[0].number_of_ibs = 1;
741	ibs_request[0].ibs = &ib_info[0];
742	ibs_request[0].resources = bo_list[0];
743	ibs_request[0].fence_info.handle = NULL;
744	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
745	CU_ASSERT_EQUAL(r, 0);
746	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
747	CU_ASSERT_EQUAL(r, 0);
748
749	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
750	CU_ASSERT_EQUAL(r, 0);
751	ptr = ib_result_cpu[1];
752	ptr[0] = gfx_nop;
753	ib_info[1].ib_mc_address = ib_result_mc_address[1];
754	ib_info[1].size = 1;
755
756	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
757	ibs_request[1].number_of_ibs = 1;
758	ibs_request[1].ibs = &ib_info[1];
759	ibs_request[1].resources = bo_list[1];
760	ibs_request[1].fence_info.handle = NULL;
761
762	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
763	CU_ASSERT_EQUAL(r, 0);
764
765	fence_status.context = context_handle[0];
766	fence_status.ip_type = AMDGPU_HW_IP_GFX;
767	fence_status.ip_instance = 0;
768	fence_status.fence = ibs_request[1].seq_no;
769	r = amdgpu_cs_query_fence_status(&fence_status,
770					 500000000, 0, &expired);
771	CU_ASSERT_EQUAL(r, 0);
772	CU_ASSERT_EQUAL(expired, true);
773
774	/* 2. same engine different context */
775	ptr = ib_result_cpu[0];
776	ptr[0] = gfx_nop;
777	ib_info[0].ib_mc_address = ib_result_mc_address[0];
778	ib_info[0].size = 1;
779
780	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
781	ibs_request[0].number_of_ibs = 1;
782	ibs_request[0].ibs = &ib_info[0];
783	ibs_request[0].resources = bo_list[0];
784	ibs_request[0].fence_info.handle = NULL;
785	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
786	CU_ASSERT_EQUAL(r, 0);
787	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
788	CU_ASSERT_EQUAL(r, 0);
789
790	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
791	CU_ASSERT_EQUAL(r, 0);
792	ptr = ib_result_cpu[1];
793	ptr[0] = gfx_nop;
794	ib_info[1].ib_mc_address = ib_result_mc_address[1];
795	ib_info[1].size = 1;
796
797	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
798	ibs_request[1].number_of_ibs = 1;
799	ibs_request[1].ibs = &ib_info[1];
800	ibs_request[1].resources = bo_list[1];
801	ibs_request[1].fence_info.handle = NULL;
802	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
803
804	CU_ASSERT_EQUAL(r, 0);
805
806	fence_status.context = context_handle[1];
807	fence_status.ip_type = AMDGPU_HW_IP_GFX;
808	fence_status.ip_instance = 0;
809	fence_status.fence = ibs_request[1].seq_no;
810	r = amdgpu_cs_query_fence_status(&fence_status,
811					 500000000, 0, &expired);
812	CU_ASSERT_EQUAL(r, 0);
813	CU_ASSERT_EQUAL(expired, true);
814
815	for (i = 0; i < 2; i++) {
816		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
817					     ib_result_mc_address[i], 4096);
818		CU_ASSERT_EQUAL(r, 0);
819
820		r = amdgpu_bo_list_destroy(bo_list[i]);
821		CU_ASSERT_EQUAL(r, 0);
822
823		r = amdgpu_cs_ctx_free(context_handle[i]);
824		CU_ASSERT_EQUAL(r, 0);
825	}
826
827	r = amdgpu_cs_destroy_semaphore(sem);
828	CU_ASSERT_EQUAL(r, 0);
829}
830
831static void amdgpu_command_submission_compute_nop(void)
832{
833	amdgpu_context_handle context_handle;
834	amdgpu_bo_handle ib_result_handle;
835	void *ib_result_cpu;
836	uint64_t ib_result_mc_address;
837	struct amdgpu_cs_request ibs_request;
838	struct amdgpu_cs_ib_info ib_info;
839	struct amdgpu_cs_fence fence_status;
840	uint32_t *ptr;
841	uint32_t expired;
842	int r, instance;
843	amdgpu_bo_list_handle bo_list;
844	amdgpu_va_handle va_handle;
845	struct drm_amdgpu_info_hw_ip info;
846
847	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
848	CU_ASSERT_EQUAL(r, 0);
849
850	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
851	CU_ASSERT_EQUAL(r, 0);
852
853	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
854		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
855					    AMDGPU_GEM_DOMAIN_GTT, 0,
856					    &ib_result_handle, &ib_result_cpu,
857					    &ib_result_mc_address, &va_handle);
858		CU_ASSERT_EQUAL(r, 0);
859
860		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
861				       &bo_list);
862		CU_ASSERT_EQUAL(r, 0);
863
864		ptr = ib_result_cpu;
865		memset(ptr, 0, 16);
866		ptr[0]=PACKET3(PACKET3_NOP, 14);
867
868		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
869		ib_info.ib_mc_address = ib_result_mc_address;
870		ib_info.size = 16;
871
872		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
873		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
874		ibs_request.ring = instance;
875		ibs_request.number_of_ibs = 1;
876		ibs_request.ibs = &ib_info;
877		ibs_request.resources = bo_list;
878		ibs_request.fence_info.handle = NULL;
879
880		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
881		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
882		CU_ASSERT_EQUAL(r, 0);
883
884		fence_status.context = context_handle;
885		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
886		fence_status.ip_instance = 0;
887		fence_status.ring = instance;
888		fence_status.fence = ibs_request.seq_no;
889
890		r = amdgpu_cs_query_fence_status(&fence_status,
891						 AMDGPU_TIMEOUT_INFINITE,
892						 0, &expired);
893		CU_ASSERT_EQUAL(r, 0);
894
895		r = amdgpu_bo_list_destroy(bo_list);
896		CU_ASSERT_EQUAL(r, 0);
897
898		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
899					     ib_result_mc_address, 4096);
900		CU_ASSERT_EQUAL(r, 0);
901	}
902
903	r = amdgpu_cs_ctx_free(context_handle);
904	CU_ASSERT_EQUAL(r, 0);
905}
906
907static void amdgpu_command_submission_compute_cp_write_data(void)
908{
909	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
910}
911
912static void amdgpu_command_submission_compute_cp_const_fill(void)
913{
914	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
915}
916
917static void amdgpu_command_submission_compute_cp_copy_data(void)
918{
919	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
920}
921
922static void amdgpu_command_submission_compute(void)
923{
924	/* write data using the CP */
925	amdgpu_command_submission_compute_cp_write_data();
926	/* const fill using the CP */
927	amdgpu_command_submission_compute_cp_const_fill();
928	/* copy data using the CP */
929	amdgpu_command_submission_compute_cp_copy_data();
930	/* nop test */
931	amdgpu_command_submission_compute_nop();
932}
933
934/*
935 * caller need create/release:
936 * pm4_src, resources, ib_info, and ibs_request
937 * submit command stream described in ibs_request and wait for this IB accomplished
938 */
939static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
940				       unsigned ip_type,
941				       int instance, int pm4_dw, uint32_t *pm4_src,
942				       int res_cnt, amdgpu_bo_handle *resources,
943				       struct amdgpu_cs_ib_info *ib_info,
944				       struct amdgpu_cs_request *ibs_request)
945{
946	int r;
947	uint32_t expired;
948	uint32_t *ring_ptr;
949	amdgpu_bo_handle ib_result_handle;
950	void *ib_result_cpu;
951	uint64_t ib_result_mc_address;
952	struct amdgpu_cs_fence fence_status = {0};
953	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
954	amdgpu_va_handle va_handle;
955
956	/* prepare CS */
957	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
958	CU_ASSERT_NOT_EQUAL(resources, NULL);
959	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
960	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
961	CU_ASSERT_TRUE(pm4_dw <= 1024);
962
963	/* allocate IB */
964	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
965				    AMDGPU_GEM_DOMAIN_GTT, 0,
966				    &ib_result_handle, &ib_result_cpu,
967				    &ib_result_mc_address, &va_handle);
968	CU_ASSERT_EQUAL(r, 0);
969
970	/* copy PM4 packet to ring from caller */
971	ring_ptr = ib_result_cpu;
972	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
973
974	ib_info->ib_mc_address = ib_result_mc_address;
975	ib_info->size = pm4_dw;
976
977	ibs_request->ip_type = ip_type;
978	ibs_request->ring = instance;
979	ibs_request->number_of_ibs = 1;
980	ibs_request->ibs = ib_info;
981	ibs_request->fence_info.handle = NULL;
982
983	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
984	all_res[res_cnt] = ib_result_handle;
985
986	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
987				  NULL, &ibs_request->resources);
988	CU_ASSERT_EQUAL(r, 0);
989
990	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
991
992	/* submit CS */
993	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
994	CU_ASSERT_EQUAL(r, 0);
995
996	r = amdgpu_bo_list_destroy(ibs_request->resources);
997	CU_ASSERT_EQUAL(r, 0);
998
999	fence_status.ip_type = ip_type;
1000	fence_status.ip_instance = 0;
1001	fence_status.ring = ibs_request->ring;
1002	fence_status.context = context_handle;
1003	fence_status.fence = ibs_request->seq_no;
1004
1005	/* wait for IB accomplished */
1006	r = amdgpu_cs_query_fence_status(&fence_status,
1007					 AMDGPU_TIMEOUT_INFINITE,
1008					 0, &expired);
1009	CU_ASSERT_EQUAL(r, 0);
1010	CU_ASSERT_EQUAL(expired, true);
1011
1012	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1013				     ib_result_mc_address, 4096);
1014	CU_ASSERT_EQUAL(r, 0);
1015}
1016
1017static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1018{
1019	const int sdma_write_length = 128;
1020	const int pm4_dw = 256;
1021	amdgpu_context_handle context_handle;
1022	amdgpu_bo_handle bo;
1023	amdgpu_bo_handle *resources;
1024	uint32_t *pm4;
1025	struct amdgpu_cs_ib_info *ib_info;
1026	struct amdgpu_cs_request *ibs_request;
1027	uint64_t bo_mc;
1028	volatile uint32_t *bo_cpu;
1029	int i, j, r, loop, ring_id;
1030	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1031	amdgpu_va_handle va_handle;
1032	struct drm_amdgpu_info_hw_ip hw_ip_info;
1033
1034	pm4 = calloc(pm4_dw, sizeof(*pm4));
1035	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1036
1037	ib_info = calloc(1, sizeof(*ib_info));
1038	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1039
1040	ibs_request = calloc(1, sizeof(*ibs_request));
1041	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1042
1043	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1044	CU_ASSERT_EQUAL(r, 0);
1045
1046	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1047	CU_ASSERT_EQUAL(r, 0);
1048
1049	/* prepare resource */
1050	resources = calloc(1, sizeof(amdgpu_bo_handle));
1051	CU_ASSERT_NOT_EQUAL(resources, NULL);
1052
1053	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1054		loop = 0;
1055		while(loop < 2) {
1056			/* allocate UC bo for sDMA use */
1057			r = amdgpu_bo_alloc_and_map(device_handle,
1058						    sdma_write_length * sizeof(uint32_t),
1059						    4096, AMDGPU_GEM_DOMAIN_GTT,
1060						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1061						    &bo_mc, &va_handle);
1062			CU_ASSERT_EQUAL(r, 0);
1063
1064			/* clear bo */
1065			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1066
1067			resources[0] = bo;
1068
1069			/* fulfill PM4: test DMA write-linear */
1070			i = j = 0;
1071			if (ip_type == AMDGPU_HW_IP_DMA) {
1072				if (family_id == AMDGPU_FAMILY_SI)
1073					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1074								  sdma_write_length);
1075				else
1076					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1077							       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1078				pm4[i++] = 0xffffffff & bo_mc;
1079				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1080				if (family_id >= AMDGPU_FAMILY_AI)
1081					pm4[i++] = sdma_write_length - 1;
1082				else if (family_id != AMDGPU_FAMILY_SI)
1083					pm4[i++] = sdma_write_length;
1084				while(j++ < sdma_write_length)
1085					pm4[i++] = 0xdeadbeaf;
1086			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1087				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1088				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1089				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1090				pm4[i++] = 0xfffffffc & bo_mc;
1091				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1092				while(j++ < sdma_write_length)
1093					pm4[i++] = 0xdeadbeaf;
1094			}
1095
1096			amdgpu_test_exec_cs_helper(context_handle,
1097						   ip_type, ring_id,
1098						   i, pm4,
1099						   1, resources,
1100						   ib_info, ibs_request);
1101
1102			/* verify if SDMA test result meets with expected */
1103			i = 0;
1104			while(i < sdma_write_length) {
1105				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1106			}
1107
1108			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1109						     sdma_write_length * sizeof(uint32_t));
1110			CU_ASSERT_EQUAL(r, 0);
1111			loop++;
1112		}
1113	}
1114	/* clean resources */
1115	free(resources);
1116	free(ibs_request);
1117	free(ib_info);
1118	free(pm4);
1119
1120	/* end of test */
1121	r = amdgpu_cs_ctx_free(context_handle);
1122	CU_ASSERT_EQUAL(r, 0);
1123}
1124
1125static void amdgpu_command_submission_sdma_write_linear(void)
1126{
1127	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1128}
1129
1130static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1131{
1132	const int sdma_write_length = 1024 * 1024;
1133	const int pm4_dw = 256;
1134	amdgpu_context_handle context_handle;
1135	amdgpu_bo_handle bo;
1136	amdgpu_bo_handle *resources;
1137	uint32_t *pm4;
1138	struct amdgpu_cs_ib_info *ib_info;
1139	struct amdgpu_cs_request *ibs_request;
1140	uint64_t bo_mc;
1141	volatile uint32_t *bo_cpu;
1142	int i, j, r, loop, ring_id;
1143	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1144	amdgpu_va_handle va_handle;
1145	struct drm_amdgpu_info_hw_ip hw_ip_info;
1146
1147	pm4 = calloc(pm4_dw, sizeof(*pm4));
1148	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1149
1150	ib_info = calloc(1, sizeof(*ib_info));
1151	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1152
1153	ibs_request = calloc(1, sizeof(*ibs_request));
1154	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1155
1156	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1157	CU_ASSERT_EQUAL(r, 0);
1158
1159	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1160	CU_ASSERT_EQUAL(r, 0);
1161
1162	/* prepare resource */
1163	resources = calloc(1, sizeof(amdgpu_bo_handle));
1164	CU_ASSERT_NOT_EQUAL(resources, NULL);
1165
1166	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1167		loop = 0;
1168		while(loop < 2) {
1169			/* allocate UC bo for sDMA use */
1170			r = amdgpu_bo_alloc_and_map(device_handle,
1171						    sdma_write_length, 4096,
1172						    AMDGPU_GEM_DOMAIN_GTT,
1173						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1174						    &bo_mc, &va_handle);
1175			CU_ASSERT_EQUAL(r, 0);
1176
1177			/* clear bo */
1178			memset((void*)bo_cpu, 0, sdma_write_length);
1179
1180			resources[0] = bo;
1181
1182			/* fulfill PM4: test DMA const fill */
1183			i = j = 0;
1184			if (ip_type == AMDGPU_HW_IP_DMA) {
1185				if (family_id == AMDGPU_FAMILY_SI) {
1186					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1187								  0, 0, 0,
1188								  sdma_write_length / 4);
1189					pm4[i++] = 0xfffffffc & bo_mc;
1190					pm4[i++] = 0xdeadbeaf;
1191					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1192				} else {
1193					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1194							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1195					pm4[i++] = 0xffffffff & bo_mc;
1196					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1197					pm4[i++] = 0xdeadbeaf;
1198					if (family_id >= AMDGPU_FAMILY_AI)
1199						pm4[i++] = sdma_write_length - 1;
1200					else
1201						pm4[i++] = sdma_write_length;
1202				}
1203			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1204				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1205				if (family_id == AMDGPU_FAMILY_SI) {
1206					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1207					pm4[i++] = 0xdeadbeaf;
1208					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1209						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1210						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1211						   PACKET3_DMA_DATA_SI_CP_SYNC;
1212					pm4[i++] = 0xffffffff & bo_mc;
1213					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1214					pm4[i++] = sdma_write_length;
1215				} else {
1216					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1217					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1218						   PACKET3_DMA_DATA_DST_SEL(0) |
1219						   PACKET3_DMA_DATA_SRC_SEL(2) |
1220						   PACKET3_DMA_DATA_CP_SYNC;
1221					pm4[i++] = 0xdeadbeaf;
1222					pm4[i++] = 0;
1223					pm4[i++] = 0xfffffffc & bo_mc;
1224					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1225					pm4[i++] = sdma_write_length;
1226				}
1227			}
1228
1229			amdgpu_test_exec_cs_helper(context_handle,
1230						   ip_type, ring_id,
1231						   i, pm4,
1232						   1, resources,
1233						   ib_info, ibs_request);
1234
1235			/* verify if SDMA test result meets with expected */
1236			i = 0;
1237			while(i < (sdma_write_length / 4)) {
1238				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1239			}
1240
1241			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1242						     sdma_write_length);
1243			CU_ASSERT_EQUAL(r, 0);
1244			loop++;
1245		}
1246	}
1247	/* clean resources */
1248	free(resources);
1249	free(ibs_request);
1250	free(ib_info);
1251	free(pm4);
1252
1253	/* end of test */
1254	r = amdgpu_cs_ctx_free(context_handle);
1255	CU_ASSERT_EQUAL(r, 0);
1256}
1257
1258static void amdgpu_command_submission_sdma_const_fill(void)
1259{
1260	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1261}
1262
1263static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1264{
1265	const int sdma_write_length = 1024;
1266	const int pm4_dw = 256;
1267	amdgpu_context_handle context_handle;
1268	amdgpu_bo_handle bo1, bo2;
1269	amdgpu_bo_handle *resources;
1270	uint32_t *pm4;
1271	struct amdgpu_cs_ib_info *ib_info;
1272	struct amdgpu_cs_request *ibs_request;
1273	uint64_t bo1_mc, bo2_mc;
1274	volatile unsigned char *bo1_cpu, *bo2_cpu;
1275	int i, j, r, loop1, loop2, ring_id;
1276	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1277	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1278	struct drm_amdgpu_info_hw_ip hw_ip_info;
1279
1280	pm4 = calloc(pm4_dw, sizeof(*pm4));
1281	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1282
1283	ib_info = calloc(1, sizeof(*ib_info));
1284	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1285
1286	ibs_request = calloc(1, sizeof(*ibs_request));
1287	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1288
1289	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1290	CU_ASSERT_EQUAL(r, 0);
1291
1292	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1293	CU_ASSERT_EQUAL(r, 0);
1294
1295	/* prepare resource */
1296	resources = calloc(2, sizeof(amdgpu_bo_handle));
1297	CU_ASSERT_NOT_EQUAL(resources, NULL);
1298
1299	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1300		loop1 = loop2 = 0;
1301		/* run 9 circle to test all mapping combination */
1302		while(loop1 < 2) {
1303			while(loop2 < 2) {
1304				/* allocate UC bo1for sDMA use */
1305				r = amdgpu_bo_alloc_and_map(device_handle,
1306							    sdma_write_length, 4096,
1307							    AMDGPU_GEM_DOMAIN_GTT,
1308							    gtt_flags[loop1], &bo1,
1309							    (void**)&bo1_cpu, &bo1_mc,
1310							    &bo1_va_handle);
1311				CU_ASSERT_EQUAL(r, 0);
1312
1313				/* set bo1 */
1314				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1315
1316				/* allocate UC bo2 for sDMA use */
1317				r = amdgpu_bo_alloc_and_map(device_handle,
1318							    sdma_write_length, 4096,
1319							    AMDGPU_GEM_DOMAIN_GTT,
1320							    gtt_flags[loop2], &bo2,
1321							    (void**)&bo2_cpu, &bo2_mc,
1322							    &bo2_va_handle);
1323				CU_ASSERT_EQUAL(r, 0);
1324
1325				/* clear bo2 */
1326				memset((void*)bo2_cpu, 0, sdma_write_length);
1327
1328				resources[0] = bo1;
1329				resources[1] = bo2;
1330
1331				/* fulfill PM4: test DMA copy linear */
1332				i = j = 0;
1333				if (ip_type == AMDGPU_HW_IP_DMA) {
1334					if (family_id == AMDGPU_FAMILY_SI) {
1335						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1336									  0, 0, 0,
1337									  sdma_write_length);
1338						pm4[i++] = 0xffffffff & bo2_mc;
1339						pm4[i++] = 0xffffffff & bo1_mc;
1340						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1341						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1342					} else {
1343						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1344								       SDMA_COPY_SUB_OPCODE_LINEAR,
1345								       0);
1346						if (family_id >= AMDGPU_FAMILY_AI)
1347							pm4[i++] = sdma_write_length - 1;
1348						else
1349							pm4[i++] = sdma_write_length;
1350						pm4[i++] = 0;
1351						pm4[i++] = 0xffffffff & bo1_mc;
1352						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1353						pm4[i++] = 0xffffffff & bo2_mc;
1354						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1355					}
1356				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1357					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1358					if (family_id == AMDGPU_FAMILY_SI) {
1359						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1360						pm4[i++] = 0xfffffffc & bo1_mc;
1361						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1362							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1363							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1364							   PACKET3_DMA_DATA_SI_CP_SYNC |
1365							   (0xffff00000000 & bo1_mc) >> 32;
1366						pm4[i++] = 0xfffffffc & bo2_mc;
1367						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1368						pm4[i++] = sdma_write_length;
1369					} else {
1370						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1371						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1372							   PACKET3_DMA_DATA_DST_SEL(0) |
1373							   PACKET3_DMA_DATA_SRC_SEL(0) |
1374							   PACKET3_DMA_DATA_CP_SYNC;
1375						pm4[i++] = 0xfffffffc & bo1_mc;
1376						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1377						pm4[i++] = 0xfffffffc & bo2_mc;
1378						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1379						pm4[i++] = sdma_write_length;
1380					}
1381				}
1382
1383				amdgpu_test_exec_cs_helper(context_handle,
1384							   ip_type, ring_id,
1385							   i, pm4,
1386							   2, resources,
1387							   ib_info, ibs_request);
1388
1389				/* verify if SDMA test result meets with expected */
1390				i = 0;
1391				while(i < sdma_write_length) {
1392					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1393				}
1394				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1395							     sdma_write_length);
1396				CU_ASSERT_EQUAL(r, 0);
1397				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1398							     sdma_write_length);
1399				CU_ASSERT_EQUAL(r, 0);
1400				loop2++;
1401			}
1402			loop1++;
1403		}
1404	}
1405	/* clean resources */
1406	free(resources);
1407	free(ibs_request);
1408	free(ib_info);
1409	free(pm4);
1410
1411	/* end of test */
1412	r = amdgpu_cs_ctx_free(context_handle);
1413	CU_ASSERT_EQUAL(r, 0);
1414}
1415
1416static void amdgpu_command_submission_sdma_copy_linear(void)
1417{
1418	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1419}
1420
1421static void amdgpu_command_submission_sdma(void)
1422{
1423	amdgpu_command_submission_sdma_write_linear();
1424	amdgpu_command_submission_sdma_const_fill();
1425	amdgpu_command_submission_sdma_copy_linear();
1426}
1427
1428static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1429{
1430	amdgpu_context_handle context_handle;
1431	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1432	void *ib_result_cpu, *ib_result_ce_cpu;
1433	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1434	struct amdgpu_cs_request ibs_request[2] = {0};
1435	struct amdgpu_cs_ib_info ib_info[2];
1436	struct amdgpu_cs_fence fence_status[2] = {0};
1437	uint32_t *ptr;
1438	uint32_t expired;
1439	amdgpu_bo_list_handle bo_list;
1440	amdgpu_va_handle va_handle, va_handle_ce;
1441	int r;
1442	int i = 0, ib_cs_num = 2;
1443
1444	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1445	CU_ASSERT_EQUAL(r, 0);
1446
1447	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1448				    AMDGPU_GEM_DOMAIN_GTT, 0,
1449				    &ib_result_handle, &ib_result_cpu,
1450				    &ib_result_mc_address, &va_handle);
1451	CU_ASSERT_EQUAL(r, 0);
1452
1453	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1454				    AMDGPU_GEM_DOMAIN_GTT, 0,
1455				    &ib_result_ce_handle, &ib_result_ce_cpu,
1456				    &ib_result_ce_mc_address, &va_handle_ce);
1457	CU_ASSERT_EQUAL(r, 0);
1458
1459	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1460			       ib_result_ce_handle, &bo_list);
1461	CU_ASSERT_EQUAL(r, 0);
1462
1463	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1464
1465	/* IT_SET_CE_DE_COUNTERS */
1466	ptr = ib_result_ce_cpu;
1467	if (family_id != AMDGPU_FAMILY_SI) {
1468		ptr[i++] = 0xc0008900;
1469		ptr[i++] = 0;
1470	}
1471	ptr[i++] = 0xc0008400;
1472	ptr[i++] = 1;
1473	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1474	ib_info[0].size = i;
1475	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1476
1477	/* IT_WAIT_ON_CE_COUNTER */
1478	ptr = ib_result_cpu;
1479	ptr[0] = 0xc0008600;
1480	ptr[1] = 0x00000001;
1481	ib_info[1].ib_mc_address = ib_result_mc_address;
1482	ib_info[1].size = 2;
1483
1484	for (i = 0; i < ib_cs_num; i++) {
1485		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1486		ibs_request[i].number_of_ibs = 2;
1487		ibs_request[i].ibs = ib_info;
1488		ibs_request[i].resources = bo_list;
1489		ibs_request[i].fence_info.handle = NULL;
1490	}
1491
1492	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1493
1494	CU_ASSERT_EQUAL(r, 0);
1495
1496	for (i = 0; i < ib_cs_num; i++) {
1497		fence_status[i].context = context_handle;
1498		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1499		fence_status[i].fence = ibs_request[i].seq_no;
1500	}
1501
1502	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1503				AMDGPU_TIMEOUT_INFINITE,
1504				&expired, NULL);
1505	CU_ASSERT_EQUAL(r, 0);
1506
1507	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1508				     ib_result_mc_address, 4096);
1509	CU_ASSERT_EQUAL(r, 0);
1510
1511	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1512				     ib_result_ce_mc_address, 4096);
1513	CU_ASSERT_EQUAL(r, 0);
1514
1515	r = amdgpu_bo_list_destroy(bo_list);
1516	CU_ASSERT_EQUAL(r, 0);
1517
1518	r = amdgpu_cs_ctx_free(context_handle);
1519	CU_ASSERT_EQUAL(r, 0);
1520}
1521
1522static void amdgpu_command_submission_multi_fence(void)
1523{
1524	amdgpu_command_submission_multi_fence_wait_all(true);
1525	amdgpu_command_submission_multi_fence_wait_all(false);
1526}
1527
1528static void amdgpu_userptr_test(void)
1529{
1530	int i, r, j;
1531	uint32_t *pm4 = NULL;
1532	uint64_t bo_mc;
1533	void *ptr = NULL;
1534	int pm4_dw = 256;
1535	int sdma_write_length = 4;
1536	amdgpu_bo_handle handle;
1537	amdgpu_context_handle context_handle;
1538	struct amdgpu_cs_ib_info *ib_info;
1539	struct amdgpu_cs_request *ibs_request;
1540	amdgpu_bo_handle buf_handle;
1541	amdgpu_va_handle va_handle;
1542
1543	pm4 = calloc(pm4_dw, sizeof(*pm4));
1544	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1545
1546	ib_info = calloc(1, sizeof(*ib_info));
1547	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1548
1549	ibs_request = calloc(1, sizeof(*ibs_request));
1550	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1551
1552	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1553	CU_ASSERT_EQUAL(r, 0);
1554
1555	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
1556	CU_ASSERT_NOT_EQUAL(ptr, NULL);
1557	memset(ptr, 0, BUFFER_SIZE);
1558
1559	r = amdgpu_create_bo_from_user_mem(device_handle,
1560					   ptr, BUFFER_SIZE, &buf_handle);
1561	CU_ASSERT_EQUAL(r, 0);
1562
1563	r = amdgpu_va_range_alloc(device_handle,
1564				  amdgpu_gpu_va_range_general,
1565				  BUFFER_SIZE, 1, 0, &bo_mc,
1566				  &va_handle, 0);
1567	CU_ASSERT_EQUAL(r, 0);
1568
1569	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
1570	CU_ASSERT_EQUAL(r, 0);
1571
1572	handle = buf_handle;
1573
1574	j = i = 0;
1575
1576	if (family_id == AMDGPU_FAMILY_SI)
1577		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1578				sdma_write_length);
1579	else
1580		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1581				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1582	pm4[i++] = 0xffffffff & bo_mc;
1583	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1584	if (family_id >= AMDGPU_FAMILY_AI)
1585		pm4[i++] = sdma_write_length - 1;
1586	else if (family_id != AMDGPU_FAMILY_SI)
1587		pm4[i++] = sdma_write_length;
1588
1589	while (j++ < sdma_write_length)
1590		pm4[i++] = 0xdeadbeaf;
1591
1592	if (!fork()) {
1593		pm4[0] = 0x0;
1594		exit(0);
1595	}
1596
1597	amdgpu_test_exec_cs_helper(context_handle,
1598				   AMDGPU_HW_IP_DMA, 0,
1599				   i, pm4,
1600				   1, &handle,
1601				   ib_info, ibs_request);
1602	i = 0;
1603	while (i < sdma_write_length) {
1604		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
1605	}
1606	free(ibs_request);
1607	free(ib_info);
1608	free(pm4);
1609
1610	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
1611	CU_ASSERT_EQUAL(r, 0);
1612	r = amdgpu_va_range_free(va_handle);
1613	CU_ASSERT_EQUAL(r, 0);
1614	r = amdgpu_bo_free(buf_handle);
1615	CU_ASSERT_EQUAL(r, 0);
1616	free(ptr);
1617
1618	r = amdgpu_cs_ctx_free(context_handle);
1619	CU_ASSERT_EQUAL(r, 0);
1620
1621	wait(NULL);
1622}
1623
1624static void amdgpu_sync_dependency_test(void)
1625{
1626	amdgpu_context_handle context_handle[2];
1627	amdgpu_bo_handle ib_result_handle;
1628	void *ib_result_cpu;
1629	uint64_t ib_result_mc_address;
1630	struct amdgpu_cs_request ibs_request;
1631	struct amdgpu_cs_ib_info ib_info;
1632	struct amdgpu_cs_fence fence_status;
1633	uint32_t expired;
1634	int i, j, r;
1635	amdgpu_bo_list_handle bo_list;
1636	amdgpu_va_handle va_handle;
1637	static uint32_t *ptr;
1638	uint64_t seq_no;
1639
1640	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
1641	CU_ASSERT_EQUAL(r, 0);
1642	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
1643	CU_ASSERT_EQUAL(r, 0);
1644
1645	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
1646			AMDGPU_GEM_DOMAIN_GTT, 0,
1647						    &ib_result_handle, &ib_result_cpu,
1648						    &ib_result_mc_address, &va_handle);
1649	CU_ASSERT_EQUAL(r, 0);
1650
1651	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1652			       &bo_list);
1653	CU_ASSERT_EQUAL(r, 0);
1654
1655	ptr = ib_result_cpu;
1656	i = 0;
1657
1658	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
1659
1660	/* Dispatch minimal init config and verify it's executed */
1661	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
1662	ptr[i++] = 0x80000000;
1663	ptr[i++] = 0x80000000;
1664
1665	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
1666	ptr[i++] = 0x80000000;
1667
1668
1669	/* Program compute regs */
1670	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1671	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1672	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
1673	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
1674
1675
1676	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1677	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
1678	/*
1679	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
1680	                                      SGPRS = 1
1681	                                      PRIORITY = 0
1682	                                      FLOAT_MODE = 192 (0xc0)
1683	                                      PRIV = 0
1684	                                      DX10_CLAMP = 1
1685	                                      DEBUG_MODE = 0
1686	                                      IEEE_MODE = 0
1687	                                      BULKY = 0
1688	                                      CDBG_USER = 0
1689	 *
1690	 */
1691	ptr[i++] = 0x002c0040;
1692
1693
1694	/*
1695	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
1696	                                      USER_SGPR = 8
1697	                                      TRAP_PRESENT = 0
1698	                                      TGID_X_EN = 0
1699	                                      TGID_Y_EN = 0
1700	                                      TGID_Z_EN = 0
1701	                                      TG_SIZE_EN = 0
1702	                                      TIDIG_COMP_CNT = 0
1703	                                      EXCP_EN_MSB = 0
1704	                                      LDS_SIZE = 0
1705	                                      EXCP_EN = 0
1706	 *
1707	 */
1708	ptr[i++] = 0x00000010;
1709
1710
1711/*
1712 * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
1713                                         WAVESIZE = 0
1714 *
1715 */
1716	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1717	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
1718	ptr[i++] = 0x00000100;
1719
1720	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1721	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
1722	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
1723	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
1724
1725	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1726	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
1727	ptr[i++] = 0;
1728
1729	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
1730	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
1731	ptr[i++] = 1;
1732	ptr[i++] = 1;
1733	ptr[i++] = 1;
1734
1735
1736	/* Dispatch */
1737	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1738	ptr[i++] = 1;
1739	ptr[i++] = 1;
1740	ptr[i++] = 1;
1741	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
1742
1743
1744	while (i & 7)
1745		ptr[i++] =  0xffff1000; /* type3 nop packet */
1746
1747	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1748	ib_info.ib_mc_address = ib_result_mc_address;
1749	ib_info.size = i;
1750
1751	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1752	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1753	ibs_request.ring = 0;
1754	ibs_request.number_of_ibs = 1;
1755	ibs_request.ibs = &ib_info;
1756	ibs_request.resources = bo_list;
1757	ibs_request.fence_info.handle = NULL;
1758
1759	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
1760	CU_ASSERT_EQUAL(r, 0);
1761	seq_no = ibs_request.seq_no;
1762
1763
1764
1765	/* Prepare second command with dependency on the first */
1766	j = i;
1767	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
1768	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1769	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
1770	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
1771	ptr[i++] = 99;
1772
1773	while (i & 7)
1774		ptr[i++] =  0xffff1000; /* type3 nop packet */
1775
1776	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1777	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
1778	ib_info.size = i - j;
1779
1780	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1781	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1782	ibs_request.ring = 0;
1783	ibs_request.number_of_ibs = 1;
1784	ibs_request.ibs = &ib_info;
1785	ibs_request.resources = bo_list;
1786	ibs_request.fence_info.handle = NULL;
1787
1788	ibs_request.number_of_dependencies = 1;
1789
1790	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
1791	ibs_request.dependencies[0].context = context_handle[1];
1792	ibs_request.dependencies[0].ip_instance = 0;
1793	ibs_request.dependencies[0].ring = 0;
1794	ibs_request.dependencies[0].fence = seq_no;
1795
1796
1797	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
1798	CU_ASSERT_EQUAL(r, 0);
1799
1800
1801	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1802	fence_status.context = context_handle[0];
1803	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1804	fence_status.ip_instance = 0;
1805	fence_status.ring = 0;
1806	fence_status.fence = ibs_request.seq_no;
1807
1808	r = amdgpu_cs_query_fence_status(&fence_status,
1809		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
1810	CU_ASSERT_EQUAL(r, 0);
1811
1812	/* Expect the second command to wait for shader to complete */
1813	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
1814
1815	r = amdgpu_bo_list_destroy(bo_list);
1816	CU_ASSERT_EQUAL(r, 0);
1817
1818	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1819				     ib_result_mc_address, 4096);
1820	CU_ASSERT_EQUAL(r, 0);
1821
1822	r = amdgpu_cs_ctx_free(context_handle[0]);
1823	CU_ASSERT_EQUAL(r, 0);
1824	r = amdgpu_cs_ctx_free(context_handle[1]);
1825	CU_ASSERT_EQUAL(r, 0);
1826
1827	free(ibs_request.dependencies);
1828}
1829