basic_tests.c revision b0ab5608
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24#include <stdio.h>
25#include <stdlib.h>
26#include <unistd.h>
27#include <sys/types.h>
28#ifdef MAJOR_IN_SYSMACROS
29#include <sys/sysmacros.h>
30#endif
31#include <sys/stat.h>
32#include <fcntl.h>
33#if HAVE_ALLOCA_H
34# include <alloca.h>
35#endif
36#include <sys/wait.h>
37
38#include "CUnit/Basic.h"
39
40#include "amdgpu_test.h"
41#include "amdgpu_drm.h"
42#include "amdgpu_internal.h"
43#include "util_math.h"
44
45static  amdgpu_device_handle device_handle;
46static  uint32_t  major_version;
47static  uint32_t  minor_version;
48static  uint32_t  family_id;
49static  uint32_t  chip_id;
50static  uint32_t  chip_rev;
51
52static void amdgpu_query_info_test(void);
53static void amdgpu_command_submission_gfx(void);
54static void amdgpu_command_submission_compute(void);
55static void amdgpu_command_submission_multi_fence(void);
56static void amdgpu_command_submission_sdma(void);
57static void amdgpu_userptr_test(void);
58static void amdgpu_semaphore_test(void);
59static void amdgpu_sync_dependency_test(void);
60static void amdgpu_bo_eviction_test(void);
61static void amdgpu_compute_dispatch_test(void);
62static void amdgpu_gfx_dispatch_test(void);
63static void amdgpu_draw_test(void);
64static void amdgpu_gpu_reset_test(void);
65static void amdgpu_stable_pstate_test(void);
66
67static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
68static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
69static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
70static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
71				       unsigned ip_type,
72				       int instance, int pm4_dw, uint32_t *pm4_src,
73				       int res_cnt, amdgpu_bo_handle *resources,
74				       struct amdgpu_cs_ib_info *ib_info,
75				       struct amdgpu_cs_request *ibs_request);
76
77CU_TestInfo basic_tests[] = {
78	{ "Query Info Test",  amdgpu_query_info_test },
79	{ "Userptr Test",  amdgpu_userptr_test },
80	{ "bo eviction Test",  amdgpu_bo_eviction_test },
81	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
82	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
83	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
84	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
85	{ "SW semaphore Test",  amdgpu_semaphore_test },
86	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
87	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
88	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
89	{ "Draw Test",  amdgpu_draw_test },
90	{ "GPU reset Test", amdgpu_gpu_reset_test },
91	{ "Stable pstate Test", amdgpu_stable_pstate_test },
92	CU_TEST_INFO_NULL,
93};
94#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
95#define SDMA_PKT_HEADER_op_offset 0
96#define SDMA_PKT_HEADER_op_mask   0x000000FF
97#define SDMA_PKT_HEADER_op_shift  0
98#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
99#define SDMA_OPCODE_CONSTANT_FILL  11
100#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
101	/* 0 = byte fill
102	 * 2 = DW fill
103	 */
104#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
105					(((sub_op) & 0xFF) << 8) |	\
106					(((op) & 0xFF) << 0))
107#define	SDMA_OPCODE_WRITE				  2
108#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
109#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
110
111#define	SDMA_OPCODE_COPY				  1
112#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
113
114#define	SDMA_OPCODE_ATOMIC				  10
115#		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
116        /* 0 - single_pass_atomic.
117         * 1 - loop_until_compare_satisfied.
118         */
119#		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
120		/* 0 - non-TMZ.
121		 * 1 - TMZ.
122	     */
123#		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
124		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
125		 * same as Packet 3
126		 */
127
128#define GFX_COMPUTE_NOP  0xffff1000
129#define SDMA_NOP  0x0
130
131/* PM4 */
132#define	PACKET_TYPE0	0
133#define	PACKET_TYPE1	1
134#define	PACKET_TYPE2	2
135#define	PACKET_TYPE3	3
136
137#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
138#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
139#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
140#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
141#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
142			 ((reg) & 0xFFFF) |			\
143			 ((n) & 0x3FFF) << 16)
144#define CP_PACKET2			0x80000000
145#define		PACKET2_PAD_SHIFT		0
146#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
147
148#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
149
150#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
151			 (((op) & 0xFF) << 8) |				\
152			 ((n) & 0x3FFF) << 16)
153#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
154
155/* Packet 3 types */
156#define	PACKET3_NOP					0x10
157
158#define	PACKET3_WRITE_DATA				0x37
159#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
160		/* 0 - register
161		 * 1 - memory (sync - via GRBM)
162		 * 2 - gl2
163		 * 3 - gds
164		 * 4 - reserved
165		 * 5 - memory (async - direct)
166		 */
167#define		WR_ONE_ADDR                             (1 << 16)
168#define		WR_CONFIRM                              (1 << 20)
169#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
170		/* 0 - LRU
171		 * 1 - Stream
172		 */
173#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
174		/* 0 - me
175		 * 1 - pfp
176		 * 2 - ce
177		 */
178
179#define	PACKET3_ATOMIC_MEM				0x1E
180#define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
181#define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
182            /* 0 - single_pass_atomic.
183             * 1 - loop_until_compare_satisfied.
184             */
185#define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
186            /* 0 - lru.
187             * 1 - stream.
188             */
189#define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
190            /* 0 - micro_engine.
191			 */
192
193#define	PACKET3_DMA_DATA				0x50
194/* 1. header
195 * 2. CONTROL
196 * 3. SRC_ADDR_LO or DATA [31:0]
197 * 4. SRC_ADDR_HI [31:0]
198 * 5. DST_ADDR_LO [31:0]
199 * 6. DST_ADDR_HI [7:0]
200 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
201 */
202/* CONTROL */
203#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
204		/* 0 - ME
205		 * 1 - PFP
206		 */
207#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
208		/* 0 - LRU
209		 * 1 - Stream
210		 * 2 - Bypass
211		 */
212#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
213#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
214		/* 0 - DST_ADDR using DAS
215		 * 1 - GDS
216		 * 3 - DST_ADDR using L2
217		 */
218#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
219		/* 0 - LRU
220		 * 1 - Stream
221		 * 2 - Bypass
222		 */
223#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
224#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
225		/* 0 - SRC_ADDR using SAS
226		 * 1 - GDS
227		 * 2 - DATA
228		 * 3 - SRC_ADDR using L2
229		 */
230#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
231/* COMMAND */
232#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
233#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
234		/* 0 - none
235		 * 1 - 8 in 16
236		 * 2 - 8 in 32
237		 * 3 - 8 in 64
238		 */
239#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
240		/* 0 - none
241		 * 1 - 8 in 16
242		 * 2 - 8 in 32
243		 * 3 - 8 in 64
244		 */
245#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
246		/* 0 - memory
247		 * 1 - register
248		 */
249#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
250		/* 0 - memory
251		 * 1 - register
252		 */
253#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
254#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
255#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
256
257#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
258						(((b) & 0x1) << 26) |		\
259						(((t) & 0x1) << 23) |		\
260						(((s) & 0x1) << 22) |		\
261						(((cnt) & 0xFFFFF) << 0))
262#define	SDMA_OPCODE_COPY_SI	3
263#define SDMA_OPCODE_CONSTANT_FILL_SI	13
264#define SDMA_NOP_SI  0xf
265#define GFX_COMPUTE_NOP_SI 0x80000000
266#define	PACKET3_DMA_DATA_SI	0x41
267#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
268		/* 0 - ME
269		 * 1 - PFP
270		 */
271#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
272		/* 0 - DST_ADDR using DAS
273		 * 1 - GDS
274		 * 3 - DST_ADDR using L2
275		 */
276#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
277		/* 0 - SRC_ADDR using SAS
278		 * 1 - GDS
279		 * 2 - DATA
280		 * 3 - SRC_ADDR using L2
281		 */
282#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
283
284
285#define PKT3_CONTEXT_CONTROL                   0x28
286#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
287#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
288#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
289
290#define PKT3_CLEAR_STATE                       0x12
291
292#define PKT3_SET_SH_REG                        0x76
293#define		PACKET3_SET_SH_REG_START			0x00002c00
294
295#define PKT3_SET_SH_REG_INDEX			0x9B
296
297#define	PACKET3_DISPATCH_DIRECT				0x15
298#define PACKET3_EVENT_WRITE				0x46
299#define PACKET3_ACQUIRE_MEM				0x58
300#define PACKET3_SET_CONTEXT_REG				0x69
301#define PACKET3_SET_UCONFIG_REG				0x79
302#define PACKET3_DRAW_INDEX_AUTO				0x2D
303/* gfx 8 */
304#define mmCOMPUTE_PGM_LO                                                        0x2e0c
305#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
306#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
307#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
308#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
309#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
310#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
311
312
313
314#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
315		      ((num & 0x0000ff00) << 8) | \
316		      ((num & 0x00ff0000) >> 8) | \
317		      ((num & 0x000000ff) << 24))
318
319
320/* Shader code
321 * void main()
322{
323
324	float x = some_input;
325		for (unsigned i = 0; i < 1000000; i++)
326  	x = sin(x);
327
328	u[0] = 42u;
329}
330*/
331
332static  uint32_t shader_bin[] = {
333	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
334	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
335	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
336	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
337};
338
339#define CODE_OFFSET 512
340#define DATA_OFFSET 1024
341
342enum cs_type {
343	CS_BUFFERCLEAR,
344	CS_BUFFERCOPY,
345	CS_HANG,
346	CS_HANG_SLOW
347};
348
349static const uint32_t bufferclear_cs_shader_gfx9[] = {
350    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
351    0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
352    0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
353    0xbf810000
354};
355
356static const uint32_t bufferclear_cs_shader_gfx10[] = {
357	0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205,
358	0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004,
359	0xBF810000
360};
361
362static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
363	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
364	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
365	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
366	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
367	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
368};
369
370static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
371
372static const uint32_t buffercopy_cs_shader_gfx9[] = {
373    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
374    0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
375    0xe01c2000, 0x80010200, 0xbf810000
376};
377
378static const uint32_t buffercopy_cs_shader_gfx10[] = {
379	0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201,
380	0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000
381};
382
383static const uint32_t preamblecache_gfx9[] = {
384	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
385	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
386	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
387	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
388	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
389	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
390	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
391	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
392	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
393	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
394	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
395	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
396	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
397	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
398	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
399	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
400	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
401	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
402	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
403	0xc0017900, 0x24b, 0x0
404};
405
406static const uint32_t preamblecache_gfx10[] = {
407	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
408	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
409	0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
410	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
411	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
412	0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
413	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
414	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
415	0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20,
416	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
417	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0,
418	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
419	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
420	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
421	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
422	0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
423	0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2,
424	0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0,
425	0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff,
426	0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0,
427	0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0,
428	0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
429	0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
430};
431
432enum ps_type {
433	PS_CONST,
434	PS_TEX,
435	PS_HANG,
436	PS_HANG_SLOW
437};
438
439static const uint32_t ps_const_shader_gfx9[] = {
440    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
441    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
442    0xC4001C0F, 0x00000100, 0xBF810000
443};
444
445static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
446
447static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
448    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
449     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
450     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
451     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
452     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
453     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
454     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
455     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
456     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
457     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
458    }
459};
460
461static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
462    0x00000004
463};
464
465static const uint32_t ps_num_sh_registers_gfx9 = 2;
466
467static const uint32_t ps_const_sh_registers_gfx9[][2] = {
468    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
469    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
470};
471
472static const uint32_t ps_num_context_registers_gfx9 = 7;
473
474static const uint32_t ps_const_context_reg_gfx9[][2] = {
475    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
476    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
477    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
478    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
479    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
480    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
481    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
482};
483
484static const uint32_t ps_const_shader_gfx10[] = {
485    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
486    0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000,
487    0xF8001C0F, 0x00000100, 0xBF810000
488};
489
490static const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6;
491
492static const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = {
493    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
494     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 },
495     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 },
496     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 },
497     { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
498     { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 },
499     { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 },
500     { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 },
501     { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 },
502     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 }
503    }
504};
505
506static const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = {
507    0x00000004
508};
509
510static const uint32_t ps_num_sh_registers_gfx10 = 2;
511
512static const uint32_t ps_const_sh_registers_gfx10[][2] = {
513    {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 },
514    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
515};
516
517static const uint32_t ps_tex_shader_gfx9[] = {
518    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
519    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
520    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
521    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
522    0x00000100, 0xBF810000
523};
524
525static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
526    0x0000000B
527};
528
529static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
530
531static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
532    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
533     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
534     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
535     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
536     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
537     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
538     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
539     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
540     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
541     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
542    }
543};
544
545static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
546    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
547    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
548};
549
550static const uint32_t ps_tex_context_reg_gfx9[][2] = {
551    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
552    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
553    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
554    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
555    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
556    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
557    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
558};
559
560static const uint32_t ps_tex_shader_gfx10[] = {
561    0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000,
562    0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A,
563    0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70,
564    0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000,
565    0xF8001C0F, 0x00000100, 0xBF810000
566};
567
568static const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = {
569    0x0000000C
570};
571
572static const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6;
573
574static const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = {
575    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
576     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 },
577     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 },
578     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 },
579     { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
580     { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 },
581     { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 },
582     { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
583     { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
584     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 }
585    }
586};
587
588static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
589    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
590    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
591    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
592    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
593    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
594    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
595    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
596    0xC400020F, 0x05060403, 0xBF810000
597};
598
599static const uint32_t vs_RectPosTexFast_shader_gfx10[] = {
600    0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206,
601    0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200,
602    0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207,
603    0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001,
604    0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002,
605    0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209,
606    0xF80008CF, 0x05030100, 0xF800020F, 0x05060402,
607    0xBF810000
608};
609
610static const uint32_t cached_cmd_gfx9[] = {
611	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
612	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
613	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
614	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
615	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
616	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
617	0xc0026900, 0x292, 0x20, 0x60201b8,
618	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
619};
620
621static const uint32_t cached_cmd_gfx10[] = {
622	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
623	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
624	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
625	0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18,
626	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
627	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
628	0xc0026900, 0x292, 0x20, 0x6020000,
629	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
630};
631
632unsigned int memcpy_ps_hang[] = {
633        0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
634        0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
635        0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
636        0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
637        0xF800180F, 0x03020100, 0xBF810000
638};
639
640struct amdgpu_test_shader {
641	uint32_t *shader;
642	uint32_t header_length;
643	uint32_t body_length;
644	uint32_t foot_length;
645};
646
647unsigned int memcpy_cs_hang_slow_ai_codes[] = {
648    0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
649    0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
650};
651
652struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
653        memcpy_cs_hang_slow_ai_codes,
654        4,
655        3,
656        1
657};
658
659unsigned int memcpy_cs_hang_slow_rv_codes[] = {
660    0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
661    0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
662};
663
664struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
665        memcpy_cs_hang_slow_rv_codes,
666        4,
667        3,
668        1
669};
670
671unsigned int memcpy_cs_hang_slow_nv_codes[] = {
672    0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100,
673    0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000
674};
675
676struct amdgpu_test_shader memcpy_cs_hang_slow_nv = {
677        memcpy_cs_hang_slow_nv_codes,
678        4,
679        3,
680        1
681};
682
683unsigned int memcpy_ps_hang_slow_ai_codes[] = {
684        0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
685        0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
686        0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
687        0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
688        0x03020100, 0xbf810000
689};
690
691struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
692        memcpy_ps_hang_slow_ai_codes,
693        7,
694        2,
695        9
696};
697
698int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
699			unsigned alignment, unsigned heap, uint64_t alloc_flags,
700			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
701			uint64_t *mc_address,
702			amdgpu_va_handle *va_handle)
703{
704	struct amdgpu_bo_alloc_request request = {};
705	amdgpu_bo_handle buf_handle;
706	amdgpu_va_handle handle;
707	uint64_t vmc_addr;
708	int r;
709
710	request.alloc_size = size;
711	request.phys_alignment = alignment;
712	request.preferred_heap = heap;
713	request.flags = alloc_flags;
714
715	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
716	if (r)
717		return r;
718
719	r = amdgpu_va_range_alloc(dev,
720				  amdgpu_gpu_va_range_general,
721				  size, alignment, 0, &vmc_addr,
722				  &handle, 0);
723	if (r)
724		goto error_va_alloc;
725
726	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
727				   AMDGPU_VM_PAGE_READABLE |
728				   AMDGPU_VM_PAGE_WRITEABLE |
729				   AMDGPU_VM_PAGE_EXECUTABLE |
730				   mapping_flags,
731				   AMDGPU_VA_OP_MAP);
732	if (r)
733		goto error_va_map;
734
735	r = amdgpu_bo_cpu_map(buf_handle, cpu);
736	if (r)
737		goto error_cpu_map;
738
739	*bo = buf_handle;
740	*mc_address = vmc_addr;
741	*va_handle = handle;
742
743	return 0;
744
745 error_cpu_map:
746	amdgpu_bo_cpu_unmap(buf_handle);
747
748 error_va_map:
749	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
750
751 error_va_alloc:
752	amdgpu_bo_free(buf_handle);
753	return r;
754}
755
756
757
758CU_BOOL suite_basic_tests_enable(void)
759{
760
761	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
762					     &minor_version, &device_handle))
763		return CU_FALSE;
764
765
766	family_id = device_handle->info.family_id;
767	chip_id = device_handle->info.chip_external_rev;
768	chip_rev = device_handle->info.chip_rev;
769
770	if (amdgpu_device_deinitialize(device_handle))
771		return CU_FALSE;
772
773	/* disable gfx engine basic test cases for some asics have no CPG */
774	if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
775		if (amdgpu_set_test_active("Basic Tests",
776					"Command submission Test (GFX)",
777					CU_FALSE))
778			fprintf(stderr, "test deactivation failed - %s\n",
779				CU_get_error_msg());
780
781		if (amdgpu_set_test_active("Basic Tests",
782					"Command submission Test (Multi-Fence)",
783					CU_FALSE))
784			fprintf(stderr, "test deactivation failed - %s\n",
785				CU_get_error_msg());
786
787		if (amdgpu_set_test_active("Basic Tests",
788					"Sync dependency Test",
789					CU_FALSE))
790			fprintf(stderr, "test deactivation failed - %s\n",
791				CU_get_error_msg());
792	}
793
794	return CU_TRUE;
795}
796
797int suite_basic_tests_init(void)
798{
799	struct amdgpu_gpu_info gpu_info = {0};
800	int r;
801
802	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
803				   &minor_version, &device_handle);
804
805	if (r) {
806		if ((r == -EACCES) && (errno == EACCES))
807			printf("\n\nError:%s. "
808				"Hint:Try to run this test program as root.",
809				strerror(errno));
810		return CUE_SINIT_FAILED;
811	}
812
813	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
814	if (r)
815		return CUE_SINIT_FAILED;
816
817	family_id = gpu_info.family_id;
818
819	return CUE_SUCCESS;
820}
821
822int suite_basic_tests_clean(void)
823{
824	int r = amdgpu_device_deinitialize(device_handle);
825
826	if (r == 0)
827		return CUE_SUCCESS;
828	else
829		return CUE_SCLEAN_FAILED;
830}
831
832static void amdgpu_query_info_test(void)
833{
834	struct amdgpu_gpu_info gpu_info = {0};
835	uint32_t version, feature;
836	int r;
837
838	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
839	CU_ASSERT_EQUAL(r, 0);
840
841	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
842					  0, &version, &feature);
843	CU_ASSERT_EQUAL(r, 0);
844}
845
846static void amdgpu_command_submission_gfx_separate_ibs(void)
847{
848	amdgpu_context_handle context_handle;
849	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
850	void *ib_result_cpu, *ib_result_ce_cpu;
851	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
852	struct amdgpu_cs_request ibs_request = {0};
853	struct amdgpu_cs_ib_info ib_info[2];
854	struct amdgpu_cs_fence fence_status = {0};
855	uint32_t *ptr;
856	uint32_t expired;
857	amdgpu_bo_list_handle bo_list;
858	amdgpu_va_handle va_handle, va_handle_ce;
859	int r, i = 0;
860	struct drm_amdgpu_info_hw_ip info;
861
862	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
863	CU_ASSERT_EQUAL(r, 0);
864
865	if (info.hw_ip_version_major >= 11)
866		return;
867
868	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
869	CU_ASSERT_EQUAL(r, 0);
870
871	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
872				    AMDGPU_GEM_DOMAIN_GTT, 0,
873				    &ib_result_handle, &ib_result_cpu,
874				    &ib_result_mc_address, &va_handle);
875	CU_ASSERT_EQUAL(r, 0);
876
877	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
878				    AMDGPU_GEM_DOMAIN_GTT, 0,
879				    &ib_result_ce_handle, &ib_result_ce_cpu,
880				    &ib_result_ce_mc_address, &va_handle_ce);
881	CU_ASSERT_EQUAL(r, 0);
882
883	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
884			       ib_result_ce_handle, &bo_list);
885	CU_ASSERT_EQUAL(r, 0);
886
887	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
888
889	/* IT_SET_CE_DE_COUNTERS */
890	ptr = ib_result_ce_cpu;
891	if (family_id != AMDGPU_FAMILY_SI) {
892		ptr[i++] = 0xc0008900;
893		ptr[i++] = 0;
894	}
895	ptr[i++] = 0xc0008400;
896	ptr[i++] = 1;
897	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
898	ib_info[0].size = i;
899	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
900
901	/* IT_WAIT_ON_CE_COUNTER */
902	ptr = ib_result_cpu;
903	ptr[0] = 0xc0008600;
904	ptr[1] = 0x00000001;
905	ib_info[1].ib_mc_address = ib_result_mc_address;
906	ib_info[1].size = 2;
907
908	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
909	ibs_request.number_of_ibs = 2;
910	ibs_request.ibs = ib_info;
911	ibs_request.resources = bo_list;
912	ibs_request.fence_info.handle = NULL;
913
914	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
915
916	CU_ASSERT_EQUAL(r, 0);
917
918	fence_status.context = context_handle;
919	fence_status.ip_type = AMDGPU_HW_IP_GFX;
920	fence_status.ip_instance = 0;
921	fence_status.fence = ibs_request.seq_no;
922
923	r = amdgpu_cs_query_fence_status(&fence_status,
924					 AMDGPU_TIMEOUT_INFINITE,
925					 0, &expired);
926	CU_ASSERT_EQUAL(r, 0);
927
928	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
929				     ib_result_mc_address, 4096);
930	CU_ASSERT_EQUAL(r, 0);
931
932	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
933				     ib_result_ce_mc_address, 4096);
934	CU_ASSERT_EQUAL(r, 0);
935
936	r = amdgpu_bo_list_destroy(bo_list);
937	CU_ASSERT_EQUAL(r, 0);
938
939	r = amdgpu_cs_ctx_free(context_handle);
940	CU_ASSERT_EQUAL(r, 0);
941
942}
943
944static void amdgpu_command_submission_gfx_shared_ib(void)
945{
946	amdgpu_context_handle context_handle;
947	amdgpu_bo_handle ib_result_handle;
948	void *ib_result_cpu;
949	uint64_t ib_result_mc_address;
950	struct amdgpu_cs_request ibs_request = {0};
951	struct amdgpu_cs_ib_info ib_info[2];
952	struct amdgpu_cs_fence fence_status = {0};
953	uint32_t *ptr;
954	uint32_t expired;
955	amdgpu_bo_list_handle bo_list;
956	amdgpu_va_handle va_handle;
957	int r, i = 0;
958	struct drm_amdgpu_info_hw_ip info;
959
960	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
961	CU_ASSERT_EQUAL(r, 0);
962
963	if (info.hw_ip_version_major >= 11)
964		return;
965
966	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
967	CU_ASSERT_EQUAL(r, 0);
968
969	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
970				    AMDGPU_GEM_DOMAIN_GTT, 0,
971				    &ib_result_handle, &ib_result_cpu,
972				    &ib_result_mc_address, &va_handle);
973	CU_ASSERT_EQUAL(r, 0);
974
975	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
976			       &bo_list);
977	CU_ASSERT_EQUAL(r, 0);
978
979	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
980
981	/* IT_SET_CE_DE_COUNTERS */
982	ptr = ib_result_cpu;
983	if (family_id != AMDGPU_FAMILY_SI) {
984		ptr[i++] = 0xc0008900;
985		ptr[i++] = 0;
986	}
987	ptr[i++] = 0xc0008400;
988	ptr[i++] = 1;
989	ib_info[0].ib_mc_address = ib_result_mc_address;
990	ib_info[0].size = i;
991	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
992
993	ptr = (uint32_t *)ib_result_cpu + 4;
994	ptr[0] = 0xc0008600;
995	ptr[1] = 0x00000001;
996	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
997	ib_info[1].size = 2;
998
999	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1000	ibs_request.number_of_ibs = 2;
1001	ibs_request.ibs = ib_info;
1002	ibs_request.resources = bo_list;
1003	ibs_request.fence_info.handle = NULL;
1004
1005	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
1006
1007	CU_ASSERT_EQUAL(r, 0);
1008
1009	fence_status.context = context_handle;
1010	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1011	fence_status.ip_instance = 0;
1012	fence_status.fence = ibs_request.seq_no;
1013
1014	r = amdgpu_cs_query_fence_status(&fence_status,
1015					 AMDGPU_TIMEOUT_INFINITE,
1016					 0, &expired);
1017	CU_ASSERT_EQUAL(r, 0);
1018
1019	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1020				     ib_result_mc_address, 4096);
1021	CU_ASSERT_EQUAL(r, 0);
1022
1023	r = amdgpu_bo_list_destroy(bo_list);
1024	CU_ASSERT_EQUAL(r, 0);
1025
1026	r = amdgpu_cs_ctx_free(context_handle);
1027	CU_ASSERT_EQUAL(r, 0);
1028}
1029
1030static void amdgpu_command_submission_gfx_cp_write_data(void)
1031{
1032	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
1033}
1034
1035static void amdgpu_command_submission_gfx_cp_const_fill(void)
1036{
1037	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
1038}
1039
1040static void amdgpu_command_submission_gfx_cp_copy_data(void)
1041{
1042	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
1043}
1044
1045static void amdgpu_bo_eviction_test(void)
1046{
1047	const int sdma_write_length = 1024;
1048	const int pm4_dw = 256;
1049	amdgpu_context_handle context_handle;
1050	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
1051	amdgpu_bo_handle *resources;
1052	uint32_t *pm4;
1053	struct amdgpu_cs_ib_info *ib_info;
1054	struct amdgpu_cs_request *ibs_request;
1055	uint64_t bo1_mc, bo2_mc;
1056	volatile unsigned char *bo1_cpu, *bo2_cpu;
1057	int i, j, r, loop1, loop2;
1058	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1059	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1060	struct amdgpu_heap_info vram_info, gtt_info;
1061
1062	pm4 = calloc(pm4_dw, sizeof(*pm4));
1063	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1064
1065	ib_info = calloc(1, sizeof(*ib_info));
1066	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1067
1068	ibs_request = calloc(1, sizeof(*ibs_request));
1069	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1070
1071	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1072	CU_ASSERT_EQUAL(r, 0);
1073
1074	/* prepare resource */
1075	resources = calloc(4, sizeof(amdgpu_bo_handle));
1076	CU_ASSERT_NOT_EQUAL(resources, NULL);
1077
1078	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
1079				   0, &vram_info);
1080	CU_ASSERT_EQUAL(r, 0);
1081
1082	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
1083				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
1084	CU_ASSERT_EQUAL(r, 0);
1085	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
1086				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
1087	CU_ASSERT_EQUAL(r, 0);
1088
1089	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
1090				   0, &gtt_info);
1091	CU_ASSERT_EQUAL(r, 0);
1092
1093	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
1094				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
1095	CU_ASSERT_EQUAL(r, 0);
1096	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
1097				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
1098	CU_ASSERT_EQUAL(r, 0);
1099
1100
1101
1102	loop1 = loop2 = 0;
1103	/* run 9 circle to test all mapping combination */
1104	while(loop1 < 2) {
1105		while(loop2 < 2) {
1106			/* allocate UC bo1for sDMA use */
1107			r = amdgpu_bo_alloc_and_map(device_handle,
1108						    sdma_write_length, 4096,
1109						    AMDGPU_GEM_DOMAIN_GTT,
1110						    gtt_flags[loop1], &bo1,
1111						    (void**)&bo1_cpu, &bo1_mc,
1112						    &bo1_va_handle);
1113			CU_ASSERT_EQUAL(r, 0);
1114
1115			/* set bo1 */
1116			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1117
1118			/* allocate UC bo2 for sDMA use */
1119			r = amdgpu_bo_alloc_and_map(device_handle,
1120						    sdma_write_length, 4096,
1121						    AMDGPU_GEM_DOMAIN_GTT,
1122						    gtt_flags[loop2], &bo2,
1123						    (void**)&bo2_cpu, &bo2_mc,
1124						    &bo2_va_handle);
1125			CU_ASSERT_EQUAL(r, 0);
1126
1127			/* clear bo2 */
1128			memset((void*)bo2_cpu, 0, sdma_write_length);
1129
1130			resources[0] = bo1;
1131			resources[1] = bo2;
1132			resources[2] = vram_max[loop2];
1133			resources[3] = gtt_max[loop2];
1134
1135			/* fulfill PM4: test DMA copy linear */
1136			i = j = 0;
1137			if (family_id == AMDGPU_FAMILY_SI) {
1138				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
1139							  sdma_write_length);
1140				pm4[i++] = 0xffffffff & bo2_mc;
1141				pm4[i++] = 0xffffffff & bo1_mc;
1142				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1143				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1144			} else {
1145				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
1146				if (family_id >= AMDGPU_FAMILY_AI)
1147					pm4[i++] = sdma_write_length - 1;
1148				else
1149					pm4[i++] = sdma_write_length;
1150				pm4[i++] = 0;
1151				pm4[i++] = 0xffffffff & bo1_mc;
1152				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1153				pm4[i++] = 0xffffffff & bo2_mc;
1154				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1155			}
1156
1157			amdgpu_test_exec_cs_helper(context_handle,
1158						   AMDGPU_HW_IP_DMA, 0,
1159						   i, pm4,
1160						   4, resources,
1161						   ib_info, ibs_request);
1162
1163			/* verify if SDMA test result meets with expected */
1164			i = 0;
1165			while(i < sdma_write_length) {
1166				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1167			}
1168			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1169						     sdma_write_length);
1170			CU_ASSERT_EQUAL(r, 0);
1171			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1172						     sdma_write_length);
1173			CU_ASSERT_EQUAL(r, 0);
1174			loop2++;
1175		}
1176		loop2 = 0;
1177		loop1++;
1178	}
1179	amdgpu_bo_free(vram_max[0]);
1180	amdgpu_bo_free(vram_max[1]);
1181	amdgpu_bo_free(gtt_max[0]);
1182	amdgpu_bo_free(gtt_max[1]);
1183	/* clean resources */
1184	free(resources);
1185	free(ibs_request);
1186	free(ib_info);
1187	free(pm4);
1188
1189	/* end of test */
1190	r = amdgpu_cs_ctx_free(context_handle);
1191	CU_ASSERT_EQUAL(r, 0);
1192}
1193
1194
1195static void amdgpu_command_submission_gfx(void)
1196{
1197	/* write data using the CP */
1198	amdgpu_command_submission_gfx_cp_write_data();
1199	/* const fill using the CP */
1200	amdgpu_command_submission_gfx_cp_const_fill();
1201	/* copy data using the CP */
1202	amdgpu_command_submission_gfx_cp_copy_data();
1203	/* separate IB buffers for multi-IB submission */
1204	amdgpu_command_submission_gfx_separate_ibs();
1205	/* shared IB buffer for multi-IB submission */
1206	amdgpu_command_submission_gfx_shared_ib();
1207}
1208
1209static void amdgpu_semaphore_test(void)
1210{
1211	amdgpu_context_handle context_handle[2];
1212	amdgpu_semaphore_handle sem;
1213	amdgpu_bo_handle ib_result_handle[2];
1214	void *ib_result_cpu[2];
1215	uint64_t ib_result_mc_address[2];
1216	struct amdgpu_cs_request ibs_request[2] = {0};
1217	struct amdgpu_cs_ib_info ib_info[2] = {0};
1218	struct amdgpu_cs_fence fence_status = {0};
1219	uint32_t *ptr;
1220	uint32_t expired;
1221	uint32_t sdma_nop, gfx_nop;
1222	amdgpu_bo_list_handle bo_list[2];
1223	amdgpu_va_handle va_handle[2];
1224	int r, i;
1225	struct amdgpu_gpu_info gpu_info = {0};
1226	unsigned gc_ip_type;
1227
1228	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
1229	CU_ASSERT_EQUAL(r, 0);
1230
1231	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
1232			AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
1233
1234	if (family_id == AMDGPU_FAMILY_SI) {
1235		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1236		gfx_nop = GFX_COMPUTE_NOP_SI;
1237	} else {
1238		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1239		gfx_nop = GFX_COMPUTE_NOP;
1240	}
1241
1242	r = amdgpu_cs_create_semaphore(&sem);
1243	CU_ASSERT_EQUAL(r, 0);
1244	for (i = 0; i < 2; i++) {
1245		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1246		CU_ASSERT_EQUAL(r, 0);
1247
1248		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1249					    AMDGPU_GEM_DOMAIN_GTT, 0,
1250					    &ib_result_handle[i], &ib_result_cpu[i],
1251					    &ib_result_mc_address[i], &va_handle[i]);
1252		CU_ASSERT_EQUAL(r, 0);
1253
1254		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1255				       NULL, &bo_list[i]);
1256		CU_ASSERT_EQUAL(r, 0);
1257	}
1258
1259	/* 1. same context different engine */
1260	ptr = ib_result_cpu[0];
1261	ptr[0] = sdma_nop;
1262	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1263	ib_info[0].size = 1;
1264
1265	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1266	ibs_request[0].number_of_ibs = 1;
1267	ibs_request[0].ibs = &ib_info[0];
1268	ibs_request[0].resources = bo_list[0];
1269	ibs_request[0].fence_info.handle = NULL;
1270	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1271	CU_ASSERT_EQUAL(r, 0);
1272	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1273	CU_ASSERT_EQUAL(r, 0);
1274
1275	r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1276	CU_ASSERT_EQUAL(r, 0);
1277	ptr = ib_result_cpu[1];
1278	ptr[0] = gfx_nop;
1279	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1280	ib_info[1].size = 1;
1281
1282	ibs_request[1].ip_type = gc_ip_type;
1283	ibs_request[1].number_of_ibs = 1;
1284	ibs_request[1].ibs = &ib_info[1];
1285	ibs_request[1].resources = bo_list[1];
1286	ibs_request[1].fence_info.handle = NULL;
1287
1288	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1289	CU_ASSERT_EQUAL(r, 0);
1290
1291	fence_status.context = context_handle[0];
1292	fence_status.ip_type = gc_ip_type;
1293	fence_status.ip_instance = 0;
1294	fence_status.fence = ibs_request[1].seq_no;
1295	r = amdgpu_cs_query_fence_status(&fence_status,
1296					 500000000, 0, &expired);
1297	CU_ASSERT_EQUAL(r, 0);
1298	CU_ASSERT_EQUAL(expired, true);
1299
1300	/* 2. same engine different context */
1301	ptr = ib_result_cpu[0];
1302	ptr[0] = gfx_nop;
1303	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1304	ib_info[0].size = 1;
1305
1306	ibs_request[0].ip_type = gc_ip_type;
1307	ibs_request[0].number_of_ibs = 1;
1308	ibs_request[0].ibs = &ib_info[0];
1309	ibs_request[0].resources = bo_list[0];
1310	ibs_request[0].fence_info.handle = NULL;
1311	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1312	CU_ASSERT_EQUAL(r, 0);
1313	r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1314	CU_ASSERT_EQUAL(r, 0);
1315
1316	r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
1317	CU_ASSERT_EQUAL(r, 0);
1318	ptr = ib_result_cpu[1];
1319	ptr[0] = gfx_nop;
1320	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1321	ib_info[1].size = 1;
1322
1323	ibs_request[1].ip_type = gc_ip_type;
1324	ibs_request[1].number_of_ibs = 1;
1325	ibs_request[1].ibs = &ib_info[1];
1326	ibs_request[1].resources = bo_list[1];
1327	ibs_request[1].fence_info.handle = NULL;
1328	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1329
1330	CU_ASSERT_EQUAL(r, 0);
1331
1332	fence_status.context = context_handle[1];
1333	fence_status.ip_type = gc_ip_type;
1334	fence_status.ip_instance = 0;
1335	fence_status.fence = ibs_request[1].seq_no;
1336	r = amdgpu_cs_query_fence_status(&fence_status,
1337					 500000000, 0, &expired);
1338	CU_ASSERT_EQUAL(r, 0);
1339	CU_ASSERT_EQUAL(expired, true);
1340
1341	for (i = 0; i < 2; i++) {
1342		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1343					     ib_result_mc_address[i], 4096);
1344		CU_ASSERT_EQUAL(r, 0);
1345
1346		r = amdgpu_bo_list_destroy(bo_list[i]);
1347		CU_ASSERT_EQUAL(r, 0);
1348
1349		r = amdgpu_cs_ctx_free(context_handle[i]);
1350		CU_ASSERT_EQUAL(r, 0);
1351	}
1352
1353	r = amdgpu_cs_destroy_semaphore(sem);
1354	CU_ASSERT_EQUAL(r, 0);
1355}
1356
1357static void amdgpu_command_submission_compute_nop(void)
1358{
1359	amdgpu_context_handle context_handle;
1360	amdgpu_bo_handle ib_result_handle;
1361	void *ib_result_cpu;
1362	uint64_t ib_result_mc_address;
1363	struct amdgpu_cs_request ibs_request;
1364	struct amdgpu_cs_ib_info ib_info;
1365	struct amdgpu_cs_fence fence_status;
1366	uint32_t *ptr;
1367	uint32_t expired;
1368	int r, instance;
1369	amdgpu_bo_list_handle bo_list;
1370	amdgpu_va_handle va_handle;
1371	struct drm_amdgpu_info_hw_ip info;
1372
1373	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1374	CU_ASSERT_EQUAL(r, 0);
1375
1376	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1377	CU_ASSERT_EQUAL(r, 0);
1378
1379	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1380		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1381					    AMDGPU_GEM_DOMAIN_GTT, 0,
1382					    &ib_result_handle, &ib_result_cpu,
1383					    &ib_result_mc_address, &va_handle);
1384		CU_ASSERT_EQUAL(r, 0);
1385
1386		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1387				       &bo_list);
1388		CU_ASSERT_EQUAL(r, 0);
1389
1390		ptr = ib_result_cpu;
1391		memset(ptr, 0, 16);
1392		ptr[0]=PACKET3(PACKET3_NOP, 14);
1393
1394		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1395		ib_info.ib_mc_address = ib_result_mc_address;
1396		ib_info.size = 16;
1397
1398		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1399		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1400		ibs_request.ring = instance;
1401		ibs_request.number_of_ibs = 1;
1402		ibs_request.ibs = &ib_info;
1403		ibs_request.resources = bo_list;
1404		ibs_request.fence_info.handle = NULL;
1405
1406		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1407		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1408		CU_ASSERT_EQUAL(r, 0);
1409
1410		fence_status.context = context_handle;
1411		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1412		fence_status.ip_instance = 0;
1413		fence_status.ring = instance;
1414		fence_status.fence = ibs_request.seq_no;
1415
1416		r = amdgpu_cs_query_fence_status(&fence_status,
1417						 AMDGPU_TIMEOUT_INFINITE,
1418						 0, &expired);
1419		CU_ASSERT_EQUAL(r, 0);
1420
1421		r = amdgpu_bo_list_destroy(bo_list);
1422		CU_ASSERT_EQUAL(r, 0);
1423
1424		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1425					     ib_result_mc_address, 4096);
1426		CU_ASSERT_EQUAL(r, 0);
1427	}
1428
1429	r = amdgpu_cs_ctx_free(context_handle);
1430	CU_ASSERT_EQUAL(r, 0);
1431}
1432
1433static void amdgpu_command_submission_compute_cp_write_data(void)
1434{
1435	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1436}
1437
1438static void amdgpu_command_submission_compute_cp_const_fill(void)
1439{
1440	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1441}
1442
1443static void amdgpu_command_submission_compute_cp_copy_data(void)
1444{
1445	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1446}
1447
1448static void amdgpu_command_submission_compute(void)
1449{
1450	/* write data using the CP */
1451	amdgpu_command_submission_compute_cp_write_data();
1452	/* const fill using the CP */
1453	amdgpu_command_submission_compute_cp_const_fill();
1454	/* copy data using the CP */
1455	amdgpu_command_submission_compute_cp_copy_data();
1456	/* nop test */
1457	amdgpu_command_submission_compute_nop();
1458}
1459
1460/*
1461 * caller need create/release:
1462 * pm4_src, resources, ib_info, and ibs_request
1463 * submit command stream described in ibs_request and wait for this IB accomplished
1464 */
1465void
1466amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1467			       amdgpu_context_handle context_handle,
1468			       unsigned ip_type, int instance, int pm4_dw,
1469			       uint32_t *pm4_src, int res_cnt,
1470			       amdgpu_bo_handle *resources,
1471			       struct amdgpu_cs_ib_info *ib_info,
1472			       struct amdgpu_cs_request *ibs_request,
1473			       bool secure)
1474{
1475	int r;
1476	uint32_t expired;
1477	uint32_t *ring_ptr;
1478	amdgpu_bo_handle ib_result_handle;
1479	void *ib_result_cpu;
1480	uint64_t ib_result_mc_address;
1481	struct amdgpu_cs_fence fence_status = {0};
1482	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1483	amdgpu_va_handle va_handle;
1484
1485	/* prepare CS */
1486	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1487	CU_ASSERT_NOT_EQUAL(resources, NULL);
1488	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1489	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1490	CU_ASSERT_TRUE(pm4_dw <= 1024);
1491
1492	/* allocate IB */
1493	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1494				    AMDGPU_GEM_DOMAIN_GTT, 0,
1495				    &ib_result_handle, &ib_result_cpu,
1496				    &ib_result_mc_address, &va_handle);
1497	CU_ASSERT_EQUAL(r, 0);
1498
1499	/* copy PM4 packet to ring from caller */
1500	ring_ptr = ib_result_cpu;
1501	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1502
1503	ib_info->ib_mc_address = ib_result_mc_address;
1504	ib_info->size = pm4_dw;
1505	if (secure)
1506		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1507
1508	ibs_request->ip_type = ip_type;
1509	ibs_request->ring = instance;
1510	ibs_request->number_of_ibs = 1;
1511	ibs_request->ibs = ib_info;
1512	ibs_request->fence_info.handle = NULL;
1513
1514	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1515	all_res[res_cnt] = ib_result_handle;
1516
1517	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1518				  NULL, &ibs_request->resources);
1519	CU_ASSERT_EQUAL(r, 0);
1520
1521	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1522
1523	/* submit CS */
1524	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1525	CU_ASSERT_EQUAL(r, 0);
1526
1527	r = amdgpu_bo_list_destroy(ibs_request->resources);
1528	CU_ASSERT_EQUAL(r, 0);
1529
1530	fence_status.ip_type = ip_type;
1531	fence_status.ip_instance = 0;
1532	fence_status.ring = ibs_request->ring;
1533	fence_status.context = context_handle;
1534	fence_status.fence = ibs_request->seq_no;
1535
1536	/* wait for IB accomplished */
1537	r = amdgpu_cs_query_fence_status(&fence_status,
1538					 AMDGPU_TIMEOUT_INFINITE,
1539					 0, &expired);
1540	CU_ASSERT_EQUAL(r, 0);
1541	CU_ASSERT_EQUAL(expired, true);
1542
1543	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1544				     ib_result_mc_address, 4096);
1545	CU_ASSERT_EQUAL(r, 0);
1546}
1547
1548static void
1549amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1550			   unsigned ip_type, int instance, int pm4_dw,
1551			   uint32_t *pm4_src, int res_cnt,
1552			   amdgpu_bo_handle *resources,
1553			   struct amdgpu_cs_ib_info *ib_info,
1554			   struct amdgpu_cs_request *ibs_request)
1555{
1556	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1557				       ip_type, instance, pm4_dw, pm4_src,
1558				       res_cnt, resources, ib_info,
1559				       ibs_request, false);
1560}
1561
1562void
1563amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1564							  device, unsigned
1565							  ip_type, bool secure)
1566{
1567	const int sdma_write_length = 128;
1568	const int pm4_dw = 256;
1569	amdgpu_context_handle context_handle;
1570	amdgpu_bo_handle bo;
1571	amdgpu_bo_handle *resources;
1572	uint32_t *pm4;
1573	struct amdgpu_cs_ib_info *ib_info;
1574	struct amdgpu_cs_request *ibs_request;
1575	uint64_t bo_mc;
1576	volatile uint32_t *bo_cpu;
1577	uint32_t bo_cpu_origin;
1578	int i, j, r, loop, ring_id;
1579	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1580	amdgpu_va_handle va_handle;
1581	struct drm_amdgpu_info_hw_ip hw_ip_info;
1582
1583	pm4 = calloc(pm4_dw, sizeof(*pm4));
1584	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1585
1586	ib_info = calloc(1, sizeof(*ib_info));
1587	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1588
1589	ibs_request = calloc(1, sizeof(*ibs_request));
1590	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1591
1592	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1593	CU_ASSERT_EQUAL(r, 0);
1594
1595	for (i = 0; secure && (i < 2); i++)
1596		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1597
1598	r = amdgpu_cs_ctx_create(device, &context_handle);
1599
1600	CU_ASSERT_EQUAL(r, 0);
1601
1602	/* prepare resource */
1603	resources = calloc(1, sizeof(amdgpu_bo_handle));
1604	CU_ASSERT_NOT_EQUAL(resources, NULL);
1605
1606	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1607		loop = 0;
1608		while(loop < 2) {
1609			/* allocate UC bo for sDMA use */
1610			r = amdgpu_bo_alloc_and_map(device,
1611						    sdma_write_length * sizeof(uint32_t),
1612						    4096, AMDGPU_GEM_DOMAIN_GTT,
1613						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1614						    &bo_mc, &va_handle);
1615			CU_ASSERT_EQUAL(r, 0);
1616
1617			/* clear bo */
1618			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1619
1620			resources[0] = bo;
1621
1622			/* fulfill PM4: test DMA write-linear */
1623			i = j = 0;
1624			if (ip_type == AMDGPU_HW_IP_DMA) {
1625				if (family_id == AMDGPU_FAMILY_SI)
1626					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1627								  sdma_write_length);
1628				else
1629					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1630							       SDMA_WRITE_SUB_OPCODE_LINEAR,
1631							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
1632				pm4[i++] = 0xfffffffc & bo_mc;
1633				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1634				if (family_id >= AMDGPU_FAMILY_AI)
1635					pm4[i++] = sdma_write_length - 1;
1636				else if (family_id != AMDGPU_FAMILY_SI)
1637					pm4[i++] = sdma_write_length;
1638				while(j++ < sdma_write_length)
1639					pm4[i++] = 0xdeadbeaf;
1640			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1641				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1642				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1643				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1644				pm4[i++] = 0xfffffffc & bo_mc;
1645				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1646				while(j++ < sdma_write_length)
1647					pm4[i++] = 0xdeadbeaf;
1648			}
1649
1650			amdgpu_test_exec_cs_helper_raw(device, context_handle,
1651						       ip_type, ring_id, i, pm4,
1652						       1, resources, ib_info,
1653						       ibs_request, secure);
1654
1655			/* verify if SDMA test result meets with expected */
1656			i = 0;
1657			if (!secure) {
1658				while(i < sdma_write_length) {
1659					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1660				}
1661			} else if (ip_type == AMDGPU_HW_IP_GFX) {
1662				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1663				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1664				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1665				 * command, 1-loop_until_compare_satisfied.
1666				 * single_pass_atomic, 0-lru
1667				 * engine_sel, 0-micro_engine
1668				 */
1669				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1670							ATOMIC_MEM_COMMAND(1) |
1671							ATOMIC_MEM_CACHEPOLICAY(0) |
1672							ATOMIC_MEM_ENGINESEL(0));
1673				pm4[i++] = 0xfffffffc & bo_mc;
1674				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1675				pm4[i++] = 0x12345678;
1676				pm4[i++] = 0x0;
1677				pm4[i++] = 0xdeadbeaf;
1678				pm4[i++] = 0x0;
1679				pm4[i++] = 0x100;
1680				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1681							ip_type, ring_id, i, pm4,
1682							1, resources, ib_info,
1683							ibs_request, true);
1684			} else if (ip_type == AMDGPU_HW_IP_DMA) {
1685				/* restore the bo_cpu to compare */
1686				bo_cpu_origin = bo_cpu[0];
1687				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1688				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1689				 * loop, 1-loop_until_compare_satisfied.
1690				 * single_pass_atomic, 0-lru
1691				 */
1692				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1693							       0,
1694							       SDMA_ATOMIC_LOOP(1) |
1695							       SDMA_ATOMIC_TMZ(1) |
1696							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1697				pm4[i++] = 0xfffffffc & bo_mc;
1698				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1699				pm4[i++] = 0x12345678;
1700				pm4[i++] = 0x0;
1701				pm4[i++] = 0xdeadbeaf;
1702				pm4[i++] = 0x0;
1703				pm4[i++] = 0x100;
1704				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1705							ip_type, ring_id, i, pm4,
1706							1, resources, ib_info,
1707							ibs_request, true);
1708				/* DMA's atomic behavir is unlike GFX
1709				 * If the comparing data is not equal to destination data,
1710				 * For GFX, loop again till gfx timeout(system hang).
1711				 * For DMA, loop again till timer expired and then send interrupt.
1712				 * So testcase can't use interrupt mechanism.
1713				 * We take another way to verify. When the comparing data is not
1714				 * equal to destination data, overwrite the source data to the destination
1715				 * buffer. Otherwise, original destination data unchanged.
1716				 * So if the bo_cpu data is overwritten, the result is passed.
1717				 */
1718				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1719
1720				/* compare again for the case of dest_data != cmp_data */
1721				i = 0;
1722				/* restore again, here dest_data should be */
1723				bo_cpu_origin = bo_cpu[0];
1724				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1725				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1726							       0,
1727							       SDMA_ATOMIC_LOOP(1) |
1728							       SDMA_ATOMIC_TMZ(1) |
1729							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1730				pm4[i++] = 0xfffffffc & bo_mc;
1731				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1732				pm4[i++] = 0x87654321;
1733				pm4[i++] = 0x0;
1734				pm4[i++] = 0xdeadbeaf;
1735				pm4[i++] = 0x0;
1736				pm4[i++] = 0x100;
1737				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1738							ip_type, ring_id, i, pm4,
1739							1, resources, ib_info,
1740							ibs_request, true);
1741				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1742				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1743			}
1744
1745			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1746						     sdma_write_length * sizeof(uint32_t));
1747			CU_ASSERT_EQUAL(r, 0);
1748			loop++;
1749		}
1750	}
1751	/* clean resources */
1752	free(resources);
1753	free(ibs_request);
1754	free(ib_info);
1755	free(pm4);
1756
1757	/* end of test */
1758	r = amdgpu_cs_ctx_free(context_handle);
1759	CU_ASSERT_EQUAL(r, 0);
1760}
1761
1762static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1763{
1764	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1765								  ip_type,
1766								  false);
1767}
1768
1769static void amdgpu_command_submission_sdma_write_linear(void)
1770{
1771	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1772}
1773
1774static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1775{
1776	const int sdma_write_length = 1024 * 1024;
1777	const int pm4_dw = 256;
1778	amdgpu_context_handle context_handle;
1779	amdgpu_bo_handle bo;
1780	amdgpu_bo_handle *resources;
1781	uint32_t *pm4;
1782	struct amdgpu_cs_ib_info *ib_info;
1783	struct amdgpu_cs_request *ibs_request;
1784	uint64_t bo_mc;
1785	volatile uint32_t *bo_cpu;
1786	int i, j, r, loop, ring_id;
1787	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1788	amdgpu_va_handle va_handle;
1789	struct drm_amdgpu_info_hw_ip hw_ip_info;
1790
1791	pm4 = calloc(pm4_dw, sizeof(*pm4));
1792	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1793
1794	ib_info = calloc(1, sizeof(*ib_info));
1795	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1796
1797	ibs_request = calloc(1, sizeof(*ibs_request));
1798	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1799
1800	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1801	CU_ASSERT_EQUAL(r, 0);
1802
1803	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1804	CU_ASSERT_EQUAL(r, 0);
1805
1806	/* prepare resource */
1807	resources = calloc(1, sizeof(amdgpu_bo_handle));
1808	CU_ASSERT_NOT_EQUAL(resources, NULL);
1809
1810	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1811		loop = 0;
1812		while(loop < 2) {
1813			/* allocate UC bo for sDMA use */
1814			r = amdgpu_bo_alloc_and_map(device_handle,
1815						    sdma_write_length, 4096,
1816						    AMDGPU_GEM_DOMAIN_GTT,
1817						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1818						    &bo_mc, &va_handle);
1819			CU_ASSERT_EQUAL(r, 0);
1820
1821			/* clear bo */
1822			memset((void*)bo_cpu, 0, sdma_write_length);
1823
1824			resources[0] = bo;
1825
1826			/* fulfill PM4: test DMA const fill */
1827			i = j = 0;
1828			if (ip_type == AMDGPU_HW_IP_DMA) {
1829				if (family_id == AMDGPU_FAMILY_SI) {
1830					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1831								  0, 0, 0,
1832								  sdma_write_length / 4);
1833					pm4[i++] = 0xfffffffc & bo_mc;
1834					pm4[i++] = 0xdeadbeaf;
1835					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1836				} else {
1837					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1838							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1839					pm4[i++] = 0xffffffff & bo_mc;
1840					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1841					pm4[i++] = 0xdeadbeaf;
1842					if (family_id >= AMDGPU_FAMILY_AI)
1843						pm4[i++] = sdma_write_length - 1;
1844					else
1845						pm4[i++] = sdma_write_length;
1846				}
1847			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1848				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1849				if (family_id == AMDGPU_FAMILY_SI) {
1850					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1851					pm4[i++] = 0xdeadbeaf;
1852					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1853						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1854						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1855						   PACKET3_DMA_DATA_SI_CP_SYNC;
1856					pm4[i++] = 0xffffffff & bo_mc;
1857					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1858					pm4[i++] = sdma_write_length;
1859				} else {
1860					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1861					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1862						   PACKET3_DMA_DATA_DST_SEL(0) |
1863						   PACKET3_DMA_DATA_SRC_SEL(2) |
1864						   PACKET3_DMA_DATA_CP_SYNC;
1865					pm4[i++] = 0xdeadbeaf;
1866					pm4[i++] = 0;
1867					pm4[i++] = 0xfffffffc & bo_mc;
1868					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1869					pm4[i++] = sdma_write_length;
1870				}
1871			}
1872
1873			amdgpu_test_exec_cs_helper(context_handle,
1874						   ip_type, ring_id,
1875						   i, pm4,
1876						   1, resources,
1877						   ib_info, ibs_request);
1878
1879			/* verify if SDMA test result meets with expected */
1880			i = 0;
1881			while(i < (sdma_write_length / 4)) {
1882				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1883			}
1884
1885			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1886						     sdma_write_length);
1887			CU_ASSERT_EQUAL(r, 0);
1888			loop++;
1889		}
1890	}
1891	/* clean resources */
1892	free(resources);
1893	free(ibs_request);
1894	free(ib_info);
1895	free(pm4);
1896
1897	/* end of test */
1898	r = amdgpu_cs_ctx_free(context_handle);
1899	CU_ASSERT_EQUAL(r, 0);
1900}
1901
1902static void amdgpu_command_submission_sdma_const_fill(void)
1903{
1904	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1905}
1906
1907static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1908{
1909	const int sdma_write_length = 1024;
1910	const int pm4_dw = 256;
1911	amdgpu_context_handle context_handle;
1912	amdgpu_bo_handle bo1, bo2;
1913	amdgpu_bo_handle *resources;
1914	uint32_t *pm4;
1915	struct amdgpu_cs_ib_info *ib_info;
1916	struct amdgpu_cs_request *ibs_request;
1917	uint64_t bo1_mc, bo2_mc;
1918	volatile unsigned char *bo1_cpu, *bo2_cpu;
1919	int i, j, r, loop1, loop2, ring_id;
1920	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1921	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1922	struct drm_amdgpu_info_hw_ip hw_ip_info;
1923
1924	pm4 = calloc(pm4_dw, sizeof(*pm4));
1925	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1926
1927	ib_info = calloc(1, sizeof(*ib_info));
1928	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1929
1930	ibs_request = calloc(1, sizeof(*ibs_request));
1931	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1932
1933	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1934	CU_ASSERT_EQUAL(r, 0);
1935
1936	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1937	CU_ASSERT_EQUAL(r, 0);
1938
1939	/* prepare resource */
1940	resources = calloc(2, sizeof(amdgpu_bo_handle));
1941	CU_ASSERT_NOT_EQUAL(resources, NULL);
1942
1943	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1944		loop1 = loop2 = 0;
1945		/* run 9 circle to test all mapping combination */
1946		while(loop1 < 2) {
1947			while(loop2 < 2) {
1948				/* allocate UC bo1for sDMA use */
1949				r = amdgpu_bo_alloc_and_map(device_handle,
1950							    sdma_write_length, 4096,
1951							    AMDGPU_GEM_DOMAIN_GTT,
1952							    gtt_flags[loop1], &bo1,
1953							    (void**)&bo1_cpu, &bo1_mc,
1954							    &bo1_va_handle);
1955				CU_ASSERT_EQUAL(r, 0);
1956
1957				/* set bo1 */
1958				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1959
1960				/* allocate UC bo2 for sDMA use */
1961				r = amdgpu_bo_alloc_and_map(device_handle,
1962							    sdma_write_length, 4096,
1963							    AMDGPU_GEM_DOMAIN_GTT,
1964							    gtt_flags[loop2], &bo2,
1965							    (void**)&bo2_cpu, &bo2_mc,
1966							    &bo2_va_handle);
1967				CU_ASSERT_EQUAL(r, 0);
1968
1969				/* clear bo2 */
1970				memset((void*)bo2_cpu, 0, sdma_write_length);
1971
1972				resources[0] = bo1;
1973				resources[1] = bo2;
1974
1975				/* fulfill PM4: test DMA copy linear */
1976				i = j = 0;
1977				if (ip_type == AMDGPU_HW_IP_DMA) {
1978					if (family_id == AMDGPU_FAMILY_SI) {
1979						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1980									  0, 0, 0,
1981									  sdma_write_length);
1982						pm4[i++] = 0xffffffff & bo2_mc;
1983						pm4[i++] = 0xffffffff & bo1_mc;
1984						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1985						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1986					} else {
1987						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1988								       SDMA_COPY_SUB_OPCODE_LINEAR,
1989								       0);
1990						if (family_id >= AMDGPU_FAMILY_AI)
1991							pm4[i++] = sdma_write_length - 1;
1992						else
1993							pm4[i++] = sdma_write_length;
1994						pm4[i++] = 0;
1995						pm4[i++] = 0xffffffff & bo1_mc;
1996						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1997						pm4[i++] = 0xffffffff & bo2_mc;
1998						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1999					}
2000				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
2001					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
2002					if (family_id == AMDGPU_FAMILY_SI) {
2003						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
2004						pm4[i++] = 0xfffffffc & bo1_mc;
2005						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
2006							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
2007							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
2008							   PACKET3_DMA_DATA_SI_CP_SYNC |
2009							   (0xffff00000000 & bo1_mc) >> 32;
2010						pm4[i++] = 0xfffffffc & bo2_mc;
2011						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
2012						pm4[i++] = sdma_write_length;
2013					} else {
2014						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
2015						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
2016							   PACKET3_DMA_DATA_DST_SEL(0) |
2017							   PACKET3_DMA_DATA_SRC_SEL(0) |
2018							   PACKET3_DMA_DATA_CP_SYNC;
2019						pm4[i++] = 0xfffffffc & bo1_mc;
2020						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
2021						pm4[i++] = 0xfffffffc & bo2_mc;
2022						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
2023						pm4[i++] = sdma_write_length;
2024					}
2025				}
2026
2027				amdgpu_test_exec_cs_helper(context_handle,
2028							   ip_type, ring_id,
2029							   i, pm4,
2030							   2, resources,
2031							   ib_info, ibs_request);
2032
2033				/* verify if SDMA test result meets with expected */
2034				i = 0;
2035				while(i < sdma_write_length) {
2036					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
2037				}
2038				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
2039							     sdma_write_length);
2040				CU_ASSERT_EQUAL(r, 0);
2041				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
2042							     sdma_write_length);
2043				CU_ASSERT_EQUAL(r, 0);
2044				loop2++;
2045			}
2046			loop1++;
2047		}
2048	}
2049	/* clean resources */
2050	free(resources);
2051	free(ibs_request);
2052	free(ib_info);
2053	free(pm4);
2054
2055	/* end of test */
2056	r = amdgpu_cs_ctx_free(context_handle);
2057	CU_ASSERT_EQUAL(r, 0);
2058}
2059
2060static void amdgpu_command_submission_sdma_copy_linear(void)
2061{
2062	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
2063}
2064
2065static void amdgpu_command_submission_sdma(void)
2066{
2067	amdgpu_command_submission_sdma_write_linear();
2068	amdgpu_command_submission_sdma_const_fill();
2069	amdgpu_command_submission_sdma_copy_linear();
2070}
2071
2072static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
2073{
2074	amdgpu_context_handle context_handle;
2075	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
2076	void *ib_result_cpu, *ib_result_ce_cpu;
2077	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
2078	struct amdgpu_cs_request ibs_request[2] = {0};
2079	struct amdgpu_cs_ib_info ib_info[2];
2080	struct amdgpu_cs_fence fence_status[2] = {0};
2081	uint32_t *ptr;
2082	uint32_t expired;
2083	amdgpu_bo_list_handle bo_list;
2084	amdgpu_va_handle va_handle, va_handle_ce;
2085	int r;
2086	int i = 0, ib_cs_num = 2;
2087	struct drm_amdgpu_info_hw_ip info;
2088
2089	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2090	CU_ASSERT_EQUAL(r, 0);
2091
2092	if (info.hw_ip_version_major >= 11)
2093		return;
2094
2095	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2096	CU_ASSERT_EQUAL(r, 0);
2097
2098	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2099				    AMDGPU_GEM_DOMAIN_GTT, 0,
2100				    &ib_result_handle, &ib_result_cpu,
2101				    &ib_result_mc_address, &va_handle);
2102	CU_ASSERT_EQUAL(r, 0);
2103
2104	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2105				    AMDGPU_GEM_DOMAIN_GTT, 0,
2106				    &ib_result_ce_handle, &ib_result_ce_cpu,
2107				    &ib_result_ce_mc_address, &va_handle_ce);
2108	CU_ASSERT_EQUAL(r, 0);
2109
2110	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
2111			       ib_result_ce_handle, &bo_list);
2112	CU_ASSERT_EQUAL(r, 0);
2113
2114	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
2115
2116	/* IT_SET_CE_DE_COUNTERS */
2117	ptr = ib_result_ce_cpu;
2118	if (family_id != AMDGPU_FAMILY_SI) {
2119		ptr[i++] = 0xc0008900;
2120		ptr[i++] = 0;
2121	}
2122	ptr[i++] = 0xc0008400;
2123	ptr[i++] = 1;
2124	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
2125	ib_info[0].size = i;
2126	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
2127
2128	/* IT_WAIT_ON_CE_COUNTER */
2129	ptr = ib_result_cpu;
2130	ptr[0] = 0xc0008600;
2131	ptr[1] = 0x00000001;
2132	ib_info[1].ib_mc_address = ib_result_mc_address;
2133	ib_info[1].size = 2;
2134
2135	for (i = 0; i < ib_cs_num; i++) {
2136		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
2137		ibs_request[i].number_of_ibs = 2;
2138		ibs_request[i].ibs = ib_info;
2139		ibs_request[i].resources = bo_list;
2140		ibs_request[i].fence_info.handle = NULL;
2141	}
2142
2143	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
2144
2145	CU_ASSERT_EQUAL(r, 0);
2146
2147	for (i = 0; i < ib_cs_num; i++) {
2148		fence_status[i].context = context_handle;
2149		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
2150		fence_status[i].fence = ibs_request[i].seq_no;
2151	}
2152
2153	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
2154				AMDGPU_TIMEOUT_INFINITE,
2155				&expired, NULL);
2156	CU_ASSERT_EQUAL(r, 0);
2157
2158	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2159				     ib_result_mc_address, 4096);
2160	CU_ASSERT_EQUAL(r, 0);
2161
2162	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2163				     ib_result_ce_mc_address, 4096);
2164	CU_ASSERT_EQUAL(r, 0);
2165
2166	r = amdgpu_bo_list_destroy(bo_list);
2167	CU_ASSERT_EQUAL(r, 0);
2168
2169	r = amdgpu_cs_ctx_free(context_handle);
2170	CU_ASSERT_EQUAL(r, 0);
2171}
2172
2173static void amdgpu_command_submission_multi_fence(void)
2174{
2175	amdgpu_command_submission_multi_fence_wait_all(true);
2176	amdgpu_command_submission_multi_fence_wait_all(false);
2177}
2178
2179static void amdgpu_userptr_test(void)
2180{
2181	int i, r, j;
2182	uint32_t *pm4 = NULL;
2183	uint64_t bo_mc;
2184	void *ptr = NULL;
2185	int pm4_dw = 256;
2186	int sdma_write_length = 4;
2187	amdgpu_bo_handle handle;
2188	amdgpu_context_handle context_handle;
2189	struct amdgpu_cs_ib_info *ib_info;
2190	struct amdgpu_cs_request *ibs_request;
2191	amdgpu_bo_handle buf_handle;
2192	amdgpu_va_handle va_handle;
2193
2194	pm4 = calloc(pm4_dw, sizeof(*pm4));
2195	CU_ASSERT_NOT_EQUAL(pm4, NULL);
2196
2197	ib_info = calloc(1, sizeof(*ib_info));
2198	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2199
2200	ibs_request = calloc(1, sizeof(*ibs_request));
2201	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2202
2203	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2204	CU_ASSERT_EQUAL(r, 0);
2205
2206	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2207	CU_ASSERT_NOT_EQUAL(ptr, NULL);
2208	memset(ptr, 0, BUFFER_SIZE);
2209
2210	r = amdgpu_create_bo_from_user_mem(device_handle,
2211					   ptr, BUFFER_SIZE, &buf_handle);
2212	CU_ASSERT_EQUAL(r, 0);
2213
2214	r = amdgpu_va_range_alloc(device_handle,
2215				  amdgpu_gpu_va_range_general,
2216				  BUFFER_SIZE, 1, 0, &bo_mc,
2217				  &va_handle, 0);
2218	CU_ASSERT_EQUAL(r, 0);
2219
2220	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2221	CU_ASSERT_EQUAL(r, 0);
2222
2223	handle = buf_handle;
2224
2225	j = i = 0;
2226
2227	if (family_id == AMDGPU_FAMILY_SI)
2228		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2229				sdma_write_length);
2230	else
2231		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2232				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2233	pm4[i++] = 0xffffffff & bo_mc;
2234	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2235	if (family_id >= AMDGPU_FAMILY_AI)
2236		pm4[i++] = sdma_write_length - 1;
2237	else if (family_id != AMDGPU_FAMILY_SI)
2238		pm4[i++] = sdma_write_length;
2239
2240	while (j++ < sdma_write_length)
2241		pm4[i++] = 0xdeadbeaf;
2242
2243	if (!fork()) {
2244		pm4[0] = 0x0;
2245		exit(0);
2246	}
2247
2248	amdgpu_test_exec_cs_helper(context_handle,
2249				   AMDGPU_HW_IP_DMA, 0,
2250				   i, pm4,
2251				   1, &handle,
2252				   ib_info, ibs_request);
2253	i = 0;
2254	while (i < sdma_write_length) {
2255		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2256	}
2257	free(ibs_request);
2258	free(ib_info);
2259	free(pm4);
2260
2261	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2262	CU_ASSERT_EQUAL(r, 0);
2263	r = amdgpu_va_range_free(va_handle);
2264	CU_ASSERT_EQUAL(r, 0);
2265	r = amdgpu_bo_free(buf_handle);
2266	CU_ASSERT_EQUAL(r, 0);
2267	free(ptr);
2268
2269	r = amdgpu_cs_ctx_free(context_handle);
2270	CU_ASSERT_EQUAL(r, 0);
2271
2272	wait(NULL);
2273}
2274
2275static void amdgpu_sync_dependency_test(void)
2276{
2277	amdgpu_context_handle context_handle[2];
2278	amdgpu_bo_handle ib_result_handle;
2279	void *ib_result_cpu;
2280	uint64_t ib_result_mc_address;
2281	struct amdgpu_cs_request ibs_request;
2282	struct amdgpu_cs_ib_info ib_info;
2283	struct amdgpu_cs_fence fence_status;
2284	uint32_t expired;
2285	int i, j, r;
2286	amdgpu_bo_list_handle bo_list;
2287	amdgpu_va_handle va_handle;
2288	static uint32_t *ptr;
2289	uint64_t seq_no;
2290
2291	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2292	CU_ASSERT_EQUAL(r, 0);
2293	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2294	CU_ASSERT_EQUAL(r, 0);
2295
2296	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2297			AMDGPU_GEM_DOMAIN_GTT, 0,
2298						    &ib_result_handle, &ib_result_cpu,
2299						    &ib_result_mc_address, &va_handle);
2300	CU_ASSERT_EQUAL(r, 0);
2301
2302	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2303			       &bo_list);
2304	CU_ASSERT_EQUAL(r, 0);
2305
2306	ptr = ib_result_cpu;
2307	i = 0;
2308
2309	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2310
2311	/* Dispatch minimal init config and verify it's executed */
2312	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2313	ptr[i++] = 0x80000000;
2314	ptr[i++] = 0x80000000;
2315
2316	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2317	ptr[i++] = 0x80000000;
2318
2319
2320	/* Program compute regs */
2321	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2322	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2323	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2324	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2325
2326
2327	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2328	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2329	/*
2330	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
2331	                                      SGPRS = 1
2332	                                      PRIORITY = 0
2333	                                      FLOAT_MODE = 192 (0xc0)
2334	                                      PRIV = 0
2335	                                      DX10_CLAMP = 1
2336	                                      DEBUG_MODE = 0
2337	                                      IEEE_MODE = 0
2338	                                      BULKY = 0
2339	                                      CDBG_USER = 0
2340	 *
2341	 */
2342	ptr[i++] = 0x002c0040;
2343
2344
2345	/*
2346	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2347	                                      USER_SGPR = 8
2348	                                      TRAP_PRESENT = 0
2349	                                      TGID_X_EN = 0
2350	                                      TGID_Y_EN = 0
2351	                                      TGID_Z_EN = 0
2352	                                      TG_SIZE_EN = 0
2353	                                      TIDIG_COMP_CNT = 0
2354	                                      EXCP_EN_MSB = 0
2355	                                      LDS_SIZE = 0
2356	                                      EXCP_EN = 0
2357	 *
2358	 */
2359	ptr[i++] = 0x00000010;
2360
2361
2362/*
2363 * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2364                                         WAVESIZE = 0
2365 *
2366 */
2367	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2368	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2369	ptr[i++] = 0x00000100;
2370
2371	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2372	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2373	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2374	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2375
2376	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2377	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2378	ptr[i++] = 0;
2379
2380	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2381	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2382	ptr[i++] = 1;
2383	ptr[i++] = 1;
2384	ptr[i++] = 1;
2385
2386
2387	/* Dispatch */
2388	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2389	ptr[i++] = 1;
2390	ptr[i++] = 1;
2391	ptr[i++] = 1;
2392	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2393
2394
2395	while (i & 7)
2396		ptr[i++] =  0xffff1000; /* type3 nop packet */
2397
2398	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2399	ib_info.ib_mc_address = ib_result_mc_address;
2400	ib_info.size = i;
2401
2402	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2403	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2404	ibs_request.ring = 0;
2405	ibs_request.number_of_ibs = 1;
2406	ibs_request.ibs = &ib_info;
2407	ibs_request.resources = bo_list;
2408	ibs_request.fence_info.handle = NULL;
2409
2410	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2411	CU_ASSERT_EQUAL(r, 0);
2412	seq_no = ibs_request.seq_no;
2413
2414
2415
2416	/* Prepare second command with dependency on the first */
2417	j = i;
2418	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2419	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2420	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2421	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2422	ptr[i++] = 99;
2423
2424	while (i & 7)
2425		ptr[i++] =  0xffff1000; /* type3 nop packet */
2426
2427	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2428	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2429	ib_info.size = i - j;
2430
2431	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2432	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2433	ibs_request.ring = 0;
2434	ibs_request.number_of_ibs = 1;
2435	ibs_request.ibs = &ib_info;
2436	ibs_request.resources = bo_list;
2437	ibs_request.fence_info.handle = NULL;
2438
2439	ibs_request.number_of_dependencies = 1;
2440
2441	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2442	ibs_request.dependencies[0].context = context_handle[1];
2443	ibs_request.dependencies[0].ip_instance = 0;
2444	ibs_request.dependencies[0].ring = 0;
2445	ibs_request.dependencies[0].fence = seq_no;
2446
2447
2448	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2449	CU_ASSERT_EQUAL(r, 0);
2450
2451
2452	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2453	fence_status.context = context_handle[0];
2454	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2455	fence_status.ip_instance = 0;
2456	fence_status.ring = 0;
2457	fence_status.fence = ibs_request.seq_no;
2458
2459	r = amdgpu_cs_query_fence_status(&fence_status,
2460		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2461	CU_ASSERT_EQUAL(r, 0);
2462
2463	/* Expect the second command to wait for shader to complete */
2464	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2465
2466	r = amdgpu_bo_list_destroy(bo_list);
2467	CU_ASSERT_EQUAL(r, 0);
2468
2469	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2470				     ib_result_mc_address, 4096);
2471	CU_ASSERT_EQUAL(r, 0);
2472
2473	r = amdgpu_cs_ctx_free(context_handle[0]);
2474	CU_ASSERT_EQUAL(r, 0);
2475	r = amdgpu_cs_ctx_free(context_handle[1]);
2476	CU_ASSERT_EQUAL(r, 0);
2477
2478	free(ibs_request.dependencies);
2479}
2480
2481static void amdgpu_compute_dispatch_test(void)
2482{
2483	amdgpu_test_dispatch_helper(device_handle, AMDGPU_HW_IP_COMPUTE);
2484}
2485static void amdgpu_gfx_dispatch_test(void)
2486{
2487	amdgpu_test_dispatch_helper(device_handle, AMDGPU_HW_IP_GFX);
2488}
2489
2490static void amdgpu_draw_test(void)
2491{
2492	amdgpu_test_draw_helper(device_handle);
2493}
2494static void amdgpu_gpu_reset_test(void)
2495{
2496	int r;
2497	char debugfs_path[256], tmp[10];
2498	int fd;
2499	struct stat sbuf;
2500	amdgpu_context_handle context_handle;
2501	uint32_t hang_state, hangs;
2502
2503	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2504	CU_ASSERT_EQUAL(r, 0);
2505
2506	r = fstat(drm_amdgpu[0], &sbuf);
2507	CU_ASSERT_EQUAL(r, 0);
2508
2509	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
2510	fd = open(debugfs_path, O_RDONLY);
2511	CU_ASSERT(fd >= 0);
2512
2513	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
2514	CU_ASSERT(r > 0);
2515
2516	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2517	CU_ASSERT_EQUAL(r, 0);
2518	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2519
2520	close(fd);
2521	r = amdgpu_cs_ctx_free(context_handle);
2522	CU_ASSERT_EQUAL(r, 0);
2523
2524	amdgpu_compute_dispatch_test();
2525	amdgpu_gfx_dispatch_test();
2526}
2527
2528static void amdgpu_stable_pstate_test(void)
2529{
2530	int r;
2531	amdgpu_context_handle context_handle;
2532	uint32_t current_pstate = 0, new_pstate = 0;
2533
2534	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2535	CU_ASSERT_EQUAL(r, 0);
2536
2537	r = amdgpu_cs_ctx_stable_pstate(context_handle,
2538					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
2539					0, &current_pstate);
2540	CU_ASSERT_EQUAL(r, 0);
2541	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE);
2542
2543	r = amdgpu_cs_ctx_stable_pstate(context_handle,
2544					AMDGPU_CTX_OP_SET_STABLE_PSTATE,
2545					AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL);
2546	CU_ASSERT_EQUAL(r, 0);
2547
2548	r = amdgpu_cs_ctx_stable_pstate(context_handle,
2549					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
2550					0, &new_pstate);
2551	CU_ASSERT_EQUAL(r, 0);
2552	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK);
2553
2554	r = amdgpu_cs_ctx_free(context_handle);
2555	CU_ASSERT_EQUAL(r, 0);
2556}
2557