basic_tests.c revision 41687f09
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24#include <stdio.h>
25#include <stdlib.h>
26#include <unistd.h>
27#include <sys/types.h>
28#ifdef MAJOR_IN_SYSMACROS
29#include <sys/sysmacros.h>
30#endif
31#include <sys/stat.h>
32#include <fcntl.h>
33#if HAVE_ALLOCA_H
34# include <alloca.h>
35#endif
36#include <sys/wait.h>
37
38#include "CUnit/Basic.h"
39
40#include "amdgpu_test.h"
41#include "amdgpu_drm.h"
42#include "amdgpu_internal.h"
43#include "util_math.h"
44
45static  amdgpu_device_handle device_handle;
46static  uint32_t  major_version;
47static  uint32_t  minor_version;
48static  uint32_t  family_id;
49
50static void amdgpu_query_info_test(void);
51static void amdgpu_command_submission_gfx(void);
52static void amdgpu_command_submission_compute(void);
53static void amdgpu_command_submission_multi_fence(void);
54static void amdgpu_command_submission_sdma(void);
55static void amdgpu_userptr_test(void);
56static void amdgpu_semaphore_test(void);
57static void amdgpu_sync_dependency_test(void);
58static void amdgpu_bo_eviction_test(void);
59static void amdgpu_compute_dispatch_test(void);
60static void amdgpu_gfx_dispatch_test(void);
61static void amdgpu_draw_test(void);
62static void amdgpu_gpu_reset_test(void);
63
64static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
65static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
66static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
67static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
68				       unsigned ip_type,
69				       int instance, int pm4_dw, uint32_t *pm4_src,
70				       int res_cnt, amdgpu_bo_handle *resources,
71				       struct amdgpu_cs_ib_info *ib_info,
72				       struct amdgpu_cs_request *ibs_request);
73
74CU_TestInfo basic_tests[] = {
75	{ "Query Info Test",  amdgpu_query_info_test },
76	{ "Userptr Test",  amdgpu_userptr_test },
77	{ "bo eviction Test",  amdgpu_bo_eviction_test },
78	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
79	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
80	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
81	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
82	{ "SW semaphore Test",  amdgpu_semaphore_test },
83	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
84	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
85	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
86	{ "Draw Test",  amdgpu_draw_test },
87	{ "GPU reset Test", amdgpu_gpu_reset_test },
88	CU_TEST_INFO_NULL,
89};
90#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
91#define SDMA_PKT_HEADER_op_offset 0
92#define SDMA_PKT_HEADER_op_mask   0x000000FF
93#define SDMA_PKT_HEADER_op_shift  0
94#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
95#define SDMA_OPCODE_CONSTANT_FILL  11
96#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
97	/* 0 = byte fill
98	 * 2 = DW fill
99	 */
100#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
101					(((sub_op) & 0xFF) << 8) |	\
102					(((op) & 0xFF) << 0))
103#define	SDMA_OPCODE_WRITE				  2
104#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
105#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
106
107#define	SDMA_OPCODE_COPY				  1
108#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
109
110#define	SDMA_OPCODE_ATOMIC				  10
111#		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
112        /* 0 - single_pass_atomic.
113         * 1 - loop_until_compare_satisfied.
114         */
115#		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
116		/* 0 - non-TMZ.
117		 * 1 - TMZ.
118	     */
119#		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
120		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
121		 * same as Packet 3
122		 */
123
124#define GFX_COMPUTE_NOP  0xffff1000
125#define SDMA_NOP  0x0
126
127/* PM4 */
128#define	PACKET_TYPE0	0
129#define	PACKET_TYPE1	1
130#define	PACKET_TYPE2	2
131#define	PACKET_TYPE3	3
132
133#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
134#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
135#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
136#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
137#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
138			 ((reg) & 0xFFFF) |			\
139			 ((n) & 0x3FFF) << 16)
140#define CP_PACKET2			0x80000000
141#define		PACKET2_PAD_SHIFT		0
142#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
143
144#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
145
146#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
147			 (((op) & 0xFF) << 8) |				\
148			 ((n) & 0x3FFF) << 16)
149#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
150
151/* Packet 3 types */
152#define	PACKET3_NOP					0x10
153
154#define	PACKET3_WRITE_DATA				0x37
155#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
156		/* 0 - register
157		 * 1 - memory (sync - via GRBM)
158		 * 2 - gl2
159		 * 3 - gds
160		 * 4 - reserved
161		 * 5 - memory (async - direct)
162		 */
163#define		WR_ONE_ADDR                             (1 << 16)
164#define		WR_CONFIRM                              (1 << 20)
165#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
166		/* 0 - LRU
167		 * 1 - Stream
168		 */
169#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
170		/* 0 - me
171		 * 1 - pfp
172		 * 2 - ce
173		 */
174
175#define	PACKET3_ATOMIC_MEM				0x1E
176#define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
177#define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
178            /* 0 - single_pass_atomic.
179             * 1 - loop_until_compare_satisfied.
180             */
181#define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
182            /* 0 - lru.
183             * 1 - stream.
184             */
185#define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
186            /* 0 - micro_engine.
187			 */
188
189#define	PACKET3_DMA_DATA				0x50
190/* 1. header
191 * 2. CONTROL
192 * 3. SRC_ADDR_LO or DATA [31:0]
193 * 4. SRC_ADDR_HI [31:0]
194 * 5. DST_ADDR_LO [31:0]
195 * 6. DST_ADDR_HI [7:0]
196 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
197 */
198/* CONTROL */
199#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
200		/* 0 - ME
201		 * 1 - PFP
202		 */
203#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
204		/* 0 - LRU
205		 * 1 - Stream
206		 * 2 - Bypass
207		 */
208#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
209#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
210		/* 0 - DST_ADDR using DAS
211		 * 1 - GDS
212		 * 3 - DST_ADDR using L2
213		 */
214#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
215		/* 0 - LRU
216		 * 1 - Stream
217		 * 2 - Bypass
218		 */
219#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
220#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
221		/* 0 - SRC_ADDR using SAS
222		 * 1 - GDS
223		 * 2 - DATA
224		 * 3 - SRC_ADDR using L2
225		 */
226#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
227/* COMMAND */
228#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
229#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
230		/* 0 - none
231		 * 1 - 8 in 16
232		 * 2 - 8 in 32
233		 * 3 - 8 in 64
234		 */
235#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
236		/* 0 - none
237		 * 1 - 8 in 16
238		 * 2 - 8 in 32
239		 * 3 - 8 in 64
240		 */
241#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
242		/* 0 - memory
243		 * 1 - register
244		 */
245#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
246		/* 0 - memory
247		 * 1 - register
248		 */
249#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
250#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
251#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
252
253#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
254						(((b) & 0x1) << 26) |		\
255						(((t) & 0x1) << 23) |		\
256						(((s) & 0x1) << 22) |		\
257						(((cnt) & 0xFFFFF) << 0))
258#define	SDMA_OPCODE_COPY_SI	3
259#define SDMA_OPCODE_CONSTANT_FILL_SI	13
260#define SDMA_NOP_SI  0xf
261#define GFX_COMPUTE_NOP_SI 0x80000000
262#define	PACKET3_DMA_DATA_SI	0x41
263#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
264		/* 0 - ME
265		 * 1 - PFP
266		 */
267#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
268		/* 0 - DST_ADDR using DAS
269		 * 1 - GDS
270		 * 3 - DST_ADDR using L2
271		 */
272#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
273		/* 0 - SRC_ADDR using SAS
274		 * 1 - GDS
275		 * 2 - DATA
276		 * 3 - SRC_ADDR using L2
277		 */
278#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
279
280
281#define PKT3_CONTEXT_CONTROL                   0x28
282#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
283#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
284#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
285
286#define PKT3_CLEAR_STATE                       0x12
287
288#define PKT3_SET_SH_REG                        0x76
289#define		PACKET3_SET_SH_REG_START			0x00002c00
290
291#define	PACKET3_DISPATCH_DIRECT				0x15
292#define PACKET3_EVENT_WRITE				0x46
293#define PACKET3_ACQUIRE_MEM				0x58
294#define PACKET3_SET_CONTEXT_REG				0x69
295#define PACKET3_SET_UCONFIG_REG				0x79
296#define PACKET3_DRAW_INDEX_AUTO				0x2D
297/* gfx 8 */
298#define mmCOMPUTE_PGM_LO                                                        0x2e0c
299#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
300#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
301#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
302#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
303#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
304#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
305
306
307
308#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
309		      ((num & 0x0000ff00) << 8) | \
310		      ((num & 0x00ff0000) >> 8) | \
311		      ((num & 0x000000ff) << 24))
312
313
314/* Shader code
315 * void main()
316{
317
318	float x = some_input;
319		for (unsigned i = 0; i < 1000000; i++)
320  	x = sin(x);
321
322	u[0] = 42u;
323}
324*/
325
326static  uint32_t shader_bin[] = {
327	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
328	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
329	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
330	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
331};
332
333#define CODE_OFFSET 512
334#define DATA_OFFSET 1024
335
336enum cs_type {
337	CS_BUFFERCLEAR,
338	CS_BUFFERCOPY,
339	CS_HANG,
340	CS_HANG_SLOW
341};
342
343static const uint32_t bufferclear_cs_shader_gfx9[] = {
344    0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
345    0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
346    0xBF810000
347};
348
349static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
350	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
351	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
352	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
353	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
354	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
355};
356
357static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
358
359static const uint32_t buffercopy_cs_shader_gfx9[] = {
360    0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
361    0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
362};
363
364static const uint32_t preamblecache_gfx9[] = {
365	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
366	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
367	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
368	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
369	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
370	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
371	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
372	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
373	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
374	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
375	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
376	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
377	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
378	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
379	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
380	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
381	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
382	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
383	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
384	0xc0017900, 0x24b, 0x0
385};
386
387enum ps_type {
388	PS_CONST,
389	PS_TEX,
390	PS_HANG,
391	PS_HANG_SLOW
392};
393
394static const uint32_t ps_const_shader_gfx9[] = {
395    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
396    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
397    0xC4001C0F, 0x00000100, 0xBF810000
398};
399
400static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
401
402static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
403    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
404     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
405     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
406     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
407     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
408     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
409     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
410     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
411     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
412     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
413    }
414};
415
416static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
417    0x00000004
418};
419
420static const uint32_t ps_num_sh_registers_gfx9 = 2;
421
422static const uint32_t ps_const_sh_registers_gfx9[][2] = {
423    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
424    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
425};
426
427static const uint32_t ps_num_context_registers_gfx9 = 7;
428
429static const uint32_t ps_const_context_reg_gfx9[][2] = {
430    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
431    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
432    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
433    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
434    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
435    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
436    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
437};
438
439static const uint32_t ps_tex_shader_gfx9[] = {
440    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
441    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
442    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
443    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
444    0x00000100, 0xBF810000
445};
446
447static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
448    0x0000000B
449};
450
451static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
452
453static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
454    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
455     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
456     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
457     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
458     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
459     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
460     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
461     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
462     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
463     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
464    }
465};
466
467static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
468    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
469    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
470};
471
472static const uint32_t ps_tex_context_reg_gfx9[][2] = {
473    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
474    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
475    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
476    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
477    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
478    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
479    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
480};
481
482static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
483    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
484    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
485    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
486    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
487    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
488    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
489    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
490    0xC400020F, 0x05060403, 0xBF810000
491};
492
493static const uint32_t cached_cmd_gfx9[] = {
494	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
495	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
496	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
497	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
498	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
499	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
500	0xc0026900, 0x292, 0x20, 0x60201b8,
501	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
502};
503
504unsigned int memcpy_ps_hang[] = {
505        0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
506        0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
507        0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
508        0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
509        0xF800180F, 0x03020100, 0xBF810000
510};
511
512struct amdgpu_test_shader {
513	uint32_t *shader;
514	uint32_t header_length;
515	uint32_t body_length;
516	uint32_t foot_length;
517};
518
519unsigned int memcpy_cs_hang_slow_ai_codes[] = {
520    0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
521    0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
522};
523
524struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
525        memcpy_cs_hang_slow_ai_codes,
526        4,
527        3,
528        1
529};
530
531unsigned int memcpy_cs_hang_slow_rv_codes[] = {
532    0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
533    0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
534};
535
536struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
537        memcpy_cs_hang_slow_rv_codes,
538        4,
539        3,
540        1
541};
542
543unsigned int memcpy_ps_hang_slow_ai_codes[] = {
544        0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
545        0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
546        0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
547        0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
548        0x03020100, 0xbf810000
549};
550
551struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
552        memcpy_ps_hang_slow_ai_codes,
553        7,
554        2,
555        9
556};
557
558int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
559			unsigned alignment, unsigned heap, uint64_t alloc_flags,
560			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
561			uint64_t *mc_address,
562			amdgpu_va_handle *va_handle)
563{
564	struct amdgpu_bo_alloc_request request = {};
565	amdgpu_bo_handle buf_handle;
566	amdgpu_va_handle handle;
567	uint64_t vmc_addr;
568	int r;
569
570	request.alloc_size = size;
571	request.phys_alignment = alignment;
572	request.preferred_heap = heap;
573	request.flags = alloc_flags;
574
575	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
576	if (r)
577		return r;
578
579	r = amdgpu_va_range_alloc(dev,
580				  amdgpu_gpu_va_range_general,
581				  size, alignment, 0, &vmc_addr,
582				  &handle, 0);
583	if (r)
584		goto error_va_alloc;
585
586	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
587				   AMDGPU_VM_PAGE_READABLE |
588				   AMDGPU_VM_PAGE_WRITEABLE |
589				   AMDGPU_VM_PAGE_EXECUTABLE |
590				   mapping_flags,
591				   AMDGPU_VA_OP_MAP);
592	if (r)
593		goto error_va_map;
594
595	r = amdgpu_bo_cpu_map(buf_handle, cpu);
596	if (r)
597		goto error_cpu_map;
598
599	*bo = buf_handle;
600	*mc_address = vmc_addr;
601	*va_handle = handle;
602
603	return 0;
604
605 error_cpu_map:
606	amdgpu_bo_cpu_unmap(buf_handle);
607
608 error_va_map:
609	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
610
611 error_va_alloc:
612	amdgpu_bo_free(buf_handle);
613	return r;
614}
615
616
617
618CU_BOOL suite_basic_tests_enable(void)
619{
620	uint32_t asic_id;
621
622	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
623					     &minor_version, &device_handle))
624		return CU_FALSE;
625
626	asic_id = device_handle->info.asic_id;
627
628	if (amdgpu_device_deinitialize(device_handle))
629		return CU_FALSE;
630
631	/* disable gfx engine basic test cases for Arturus due to no CPG */
632	if (asic_is_arcturus(asic_id)) {
633		if (amdgpu_set_test_active("Basic Tests",
634					"Command submission Test (GFX)",
635					CU_FALSE))
636			fprintf(stderr, "test deactivation failed - %s\n",
637				CU_get_error_msg());
638
639		if (amdgpu_set_test_active("Basic Tests",
640					"Command submission Test (Multi-Fence)",
641					CU_FALSE))
642			fprintf(stderr, "test deactivation failed - %s\n",
643				CU_get_error_msg());
644
645		if (amdgpu_set_test_active("Basic Tests",
646					"Sync dependency Test",
647					CU_FALSE))
648			fprintf(stderr, "test deactivation failed - %s\n",
649				CU_get_error_msg());
650	}
651
652	return CU_TRUE;
653}
654
655int suite_basic_tests_init(void)
656{
657	struct amdgpu_gpu_info gpu_info = {0};
658	int r;
659
660	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
661				   &minor_version, &device_handle);
662
663	if (r) {
664		if ((r == -EACCES) && (errno == EACCES))
665			printf("\n\nError:%s. "
666				"Hint:Try to run this test program as root.",
667				strerror(errno));
668		return CUE_SINIT_FAILED;
669	}
670
671	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
672	if (r)
673		return CUE_SINIT_FAILED;
674
675	family_id = gpu_info.family_id;
676
677	return CUE_SUCCESS;
678}
679
680int suite_basic_tests_clean(void)
681{
682	int r = amdgpu_device_deinitialize(device_handle);
683
684	if (r == 0)
685		return CUE_SUCCESS;
686	else
687		return CUE_SCLEAN_FAILED;
688}
689
690static void amdgpu_query_info_test(void)
691{
692	struct amdgpu_gpu_info gpu_info = {0};
693	uint32_t version, feature;
694	int r;
695
696	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
697	CU_ASSERT_EQUAL(r, 0);
698
699	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
700					  0, &version, &feature);
701	CU_ASSERT_EQUAL(r, 0);
702}
703
704static void amdgpu_command_submission_gfx_separate_ibs(void)
705{
706	amdgpu_context_handle context_handle;
707	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
708	void *ib_result_cpu, *ib_result_ce_cpu;
709	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
710	struct amdgpu_cs_request ibs_request = {0};
711	struct amdgpu_cs_ib_info ib_info[2];
712	struct amdgpu_cs_fence fence_status = {0};
713	uint32_t *ptr;
714	uint32_t expired;
715	amdgpu_bo_list_handle bo_list;
716	amdgpu_va_handle va_handle, va_handle_ce;
717	int r, i = 0;
718
719	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
720	CU_ASSERT_EQUAL(r, 0);
721
722	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
723				    AMDGPU_GEM_DOMAIN_GTT, 0,
724				    &ib_result_handle, &ib_result_cpu,
725				    &ib_result_mc_address, &va_handle);
726	CU_ASSERT_EQUAL(r, 0);
727
728	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
729				    AMDGPU_GEM_DOMAIN_GTT, 0,
730				    &ib_result_ce_handle, &ib_result_ce_cpu,
731				    &ib_result_ce_mc_address, &va_handle_ce);
732	CU_ASSERT_EQUAL(r, 0);
733
734	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
735			       ib_result_ce_handle, &bo_list);
736	CU_ASSERT_EQUAL(r, 0);
737
738	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
739
740	/* IT_SET_CE_DE_COUNTERS */
741	ptr = ib_result_ce_cpu;
742	if (family_id != AMDGPU_FAMILY_SI) {
743		ptr[i++] = 0xc0008900;
744		ptr[i++] = 0;
745	}
746	ptr[i++] = 0xc0008400;
747	ptr[i++] = 1;
748	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
749	ib_info[0].size = i;
750	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
751
752	/* IT_WAIT_ON_CE_COUNTER */
753	ptr = ib_result_cpu;
754	ptr[0] = 0xc0008600;
755	ptr[1] = 0x00000001;
756	ib_info[1].ib_mc_address = ib_result_mc_address;
757	ib_info[1].size = 2;
758
759	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
760	ibs_request.number_of_ibs = 2;
761	ibs_request.ibs = ib_info;
762	ibs_request.resources = bo_list;
763	ibs_request.fence_info.handle = NULL;
764
765	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
766
767	CU_ASSERT_EQUAL(r, 0);
768
769	fence_status.context = context_handle;
770	fence_status.ip_type = AMDGPU_HW_IP_GFX;
771	fence_status.ip_instance = 0;
772	fence_status.fence = ibs_request.seq_no;
773
774	r = amdgpu_cs_query_fence_status(&fence_status,
775					 AMDGPU_TIMEOUT_INFINITE,
776					 0, &expired);
777	CU_ASSERT_EQUAL(r, 0);
778
779	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
780				     ib_result_mc_address, 4096);
781	CU_ASSERT_EQUAL(r, 0);
782
783	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
784				     ib_result_ce_mc_address, 4096);
785	CU_ASSERT_EQUAL(r, 0);
786
787	r = amdgpu_bo_list_destroy(bo_list);
788	CU_ASSERT_EQUAL(r, 0);
789
790	r = amdgpu_cs_ctx_free(context_handle);
791	CU_ASSERT_EQUAL(r, 0);
792
793}
794
795static void amdgpu_command_submission_gfx_shared_ib(void)
796{
797	amdgpu_context_handle context_handle;
798	amdgpu_bo_handle ib_result_handle;
799	void *ib_result_cpu;
800	uint64_t ib_result_mc_address;
801	struct amdgpu_cs_request ibs_request = {0};
802	struct amdgpu_cs_ib_info ib_info[2];
803	struct amdgpu_cs_fence fence_status = {0};
804	uint32_t *ptr;
805	uint32_t expired;
806	amdgpu_bo_list_handle bo_list;
807	amdgpu_va_handle va_handle;
808	int r, i = 0;
809
810	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
811	CU_ASSERT_EQUAL(r, 0);
812
813	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
814				    AMDGPU_GEM_DOMAIN_GTT, 0,
815				    &ib_result_handle, &ib_result_cpu,
816				    &ib_result_mc_address, &va_handle);
817	CU_ASSERT_EQUAL(r, 0);
818
819	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
820			       &bo_list);
821	CU_ASSERT_EQUAL(r, 0);
822
823	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
824
825	/* IT_SET_CE_DE_COUNTERS */
826	ptr = ib_result_cpu;
827	if (family_id != AMDGPU_FAMILY_SI) {
828		ptr[i++] = 0xc0008900;
829		ptr[i++] = 0;
830	}
831	ptr[i++] = 0xc0008400;
832	ptr[i++] = 1;
833	ib_info[0].ib_mc_address = ib_result_mc_address;
834	ib_info[0].size = i;
835	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
836
837	ptr = (uint32_t *)ib_result_cpu + 4;
838	ptr[0] = 0xc0008600;
839	ptr[1] = 0x00000001;
840	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
841	ib_info[1].size = 2;
842
843	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
844	ibs_request.number_of_ibs = 2;
845	ibs_request.ibs = ib_info;
846	ibs_request.resources = bo_list;
847	ibs_request.fence_info.handle = NULL;
848
849	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
850
851	CU_ASSERT_EQUAL(r, 0);
852
853	fence_status.context = context_handle;
854	fence_status.ip_type = AMDGPU_HW_IP_GFX;
855	fence_status.ip_instance = 0;
856	fence_status.fence = ibs_request.seq_no;
857
858	r = amdgpu_cs_query_fence_status(&fence_status,
859					 AMDGPU_TIMEOUT_INFINITE,
860					 0, &expired);
861	CU_ASSERT_EQUAL(r, 0);
862
863	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
864				     ib_result_mc_address, 4096);
865	CU_ASSERT_EQUAL(r, 0);
866
867	r = amdgpu_bo_list_destroy(bo_list);
868	CU_ASSERT_EQUAL(r, 0);
869
870	r = amdgpu_cs_ctx_free(context_handle);
871	CU_ASSERT_EQUAL(r, 0);
872}
873
874static void amdgpu_command_submission_gfx_cp_write_data(void)
875{
876	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
877}
878
879static void amdgpu_command_submission_gfx_cp_const_fill(void)
880{
881	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
882}
883
884static void amdgpu_command_submission_gfx_cp_copy_data(void)
885{
886	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
887}
888
889static void amdgpu_bo_eviction_test(void)
890{
891	const int sdma_write_length = 1024;
892	const int pm4_dw = 256;
893	amdgpu_context_handle context_handle;
894	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
895	amdgpu_bo_handle *resources;
896	uint32_t *pm4;
897	struct amdgpu_cs_ib_info *ib_info;
898	struct amdgpu_cs_request *ibs_request;
899	uint64_t bo1_mc, bo2_mc;
900	volatile unsigned char *bo1_cpu, *bo2_cpu;
901	int i, j, r, loop1, loop2;
902	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
903	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
904	struct amdgpu_heap_info vram_info, gtt_info;
905
906	pm4 = calloc(pm4_dw, sizeof(*pm4));
907	CU_ASSERT_NOT_EQUAL(pm4, NULL);
908
909	ib_info = calloc(1, sizeof(*ib_info));
910	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
911
912	ibs_request = calloc(1, sizeof(*ibs_request));
913	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
914
915	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
916	CU_ASSERT_EQUAL(r, 0);
917
918	/* prepare resource */
919	resources = calloc(4, sizeof(amdgpu_bo_handle));
920	CU_ASSERT_NOT_EQUAL(resources, NULL);
921
922	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
923				   0, &vram_info);
924	CU_ASSERT_EQUAL(r, 0);
925
926	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
927				   0, &gtt_info);
928	CU_ASSERT_EQUAL(r, 0);
929
930	if (vram_info.max_allocation > gtt_info.heap_size/3) {
931		vram_info.max_allocation = gtt_info.heap_size/3;
932		gtt_info.max_allocation = vram_info.max_allocation;
933	}
934
935	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
936				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
937	CU_ASSERT_EQUAL(r, 0);
938	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
939				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
940	CU_ASSERT_EQUAL(r, 0);
941
942	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
943				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
944	CU_ASSERT_EQUAL(r, 0);
945	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
946				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
947	CU_ASSERT_EQUAL(r, 0);
948
949
950
951	loop1 = loop2 = 0;
952	/* run 9 circle to test all mapping combination */
953	while(loop1 < 2) {
954		while(loop2 < 2) {
955			/* allocate UC bo1for sDMA use */
956			r = amdgpu_bo_alloc_and_map(device_handle,
957						    sdma_write_length, 4096,
958						    AMDGPU_GEM_DOMAIN_GTT,
959						    gtt_flags[loop1], &bo1,
960						    (void**)&bo1_cpu, &bo1_mc,
961						    &bo1_va_handle);
962			CU_ASSERT_EQUAL(r, 0);
963
964			/* set bo1 */
965			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
966
967			/* allocate UC bo2 for sDMA use */
968			r = amdgpu_bo_alloc_and_map(device_handle,
969						    sdma_write_length, 4096,
970						    AMDGPU_GEM_DOMAIN_GTT,
971						    gtt_flags[loop2], &bo2,
972						    (void**)&bo2_cpu, &bo2_mc,
973						    &bo2_va_handle);
974			CU_ASSERT_EQUAL(r, 0);
975
976			/* clear bo2 */
977			memset((void*)bo2_cpu, 0, sdma_write_length);
978
979			resources[0] = bo1;
980			resources[1] = bo2;
981			resources[2] = vram_max[loop2];
982			resources[3] = gtt_max[loop2];
983
984			/* fulfill PM4: test DMA copy linear */
985			i = j = 0;
986			if (family_id == AMDGPU_FAMILY_SI) {
987				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
988							  sdma_write_length);
989				pm4[i++] = 0xffffffff & bo2_mc;
990				pm4[i++] = 0xffffffff & bo1_mc;
991				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
992				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
993			} else {
994				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
995				if (family_id >= AMDGPU_FAMILY_AI)
996					pm4[i++] = sdma_write_length - 1;
997				else
998					pm4[i++] = sdma_write_length;
999				pm4[i++] = 0;
1000				pm4[i++] = 0xffffffff & bo1_mc;
1001				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1002				pm4[i++] = 0xffffffff & bo2_mc;
1003				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1004			}
1005
1006			amdgpu_test_exec_cs_helper(context_handle,
1007						   AMDGPU_HW_IP_DMA, 0,
1008						   i, pm4,
1009						   4, resources,
1010						   ib_info, ibs_request);
1011
1012			/* verify if SDMA test result meets with expected */
1013			i = 0;
1014			while(i < sdma_write_length) {
1015				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1016			}
1017			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1018						     sdma_write_length);
1019			CU_ASSERT_EQUAL(r, 0);
1020			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1021						     sdma_write_length);
1022			CU_ASSERT_EQUAL(r, 0);
1023			loop2++;
1024		}
1025		loop2 = 0;
1026		loop1++;
1027	}
1028	amdgpu_bo_free(vram_max[0]);
1029	amdgpu_bo_free(vram_max[1]);
1030	amdgpu_bo_free(gtt_max[0]);
1031	amdgpu_bo_free(gtt_max[1]);
1032	/* clean resources */
1033	free(resources);
1034	free(ibs_request);
1035	free(ib_info);
1036	free(pm4);
1037
1038	/* end of test */
1039	r = amdgpu_cs_ctx_free(context_handle);
1040	CU_ASSERT_EQUAL(r, 0);
1041}
1042
1043
1044static void amdgpu_command_submission_gfx(void)
1045{
1046	/* write data using the CP */
1047	amdgpu_command_submission_gfx_cp_write_data();
1048	/* const fill using the CP */
1049	amdgpu_command_submission_gfx_cp_const_fill();
1050	/* copy data using the CP */
1051	amdgpu_command_submission_gfx_cp_copy_data();
1052	/* separate IB buffers for multi-IB submission */
1053	amdgpu_command_submission_gfx_separate_ibs();
1054	/* shared IB buffer for multi-IB submission */
1055	amdgpu_command_submission_gfx_shared_ib();
1056}
1057
1058static void amdgpu_semaphore_test(void)
1059{
1060	amdgpu_context_handle context_handle[2];
1061	amdgpu_semaphore_handle sem;
1062	amdgpu_bo_handle ib_result_handle[2];
1063	void *ib_result_cpu[2];
1064	uint64_t ib_result_mc_address[2];
1065	struct amdgpu_cs_request ibs_request[2] = {0};
1066	struct amdgpu_cs_ib_info ib_info[2] = {0};
1067	struct amdgpu_cs_fence fence_status = {0};
1068	uint32_t *ptr;
1069	uint32_t expired;
1070	uint32_t sdma_nop, gfx_nop;
1071	amdgpu_bo_list_handle bo_list[2];
1072	amdgpu_va_handle va_handle[2];
1073	int r, i;
1074
1075	if (family_id == AMDGPU_FAMILY_SI) {
1076		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1077		gfx_nop = GFX_COMPUTE_NOP_SI;
1078	} else {
1079		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1080		gfx_nop = GFX_COMPUTE_NOP;
1081	}
1082
1083	r = amdgpu_cs_create_semaphore(&sem);
1084	CU_ASSERT_EQUAL(r, 0);
1085	for (i = 0; i < 2; i++) {
1086		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1087		CU_ASSERT_EQUAL(r, 0);
1088
1089		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1090					    AMDGPU_GEM_DOMAIN_GTT, 0,
1091					    &ib_result_handle[i], &ib_result_cpu[i],
1092					    &ib_result_mc_address[i], &va_handle[i]);
1093		CU_ASSERT_EQUAL(r, 0);
1094
1095		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1096				       NULL, &bo_list[i]);
1097		CU_ASSERT_EQUAL(r, 0);
1098	}
1099
1100	/* 1. same context different engine */
1101	ptr = ib_result_cpu[0];
1102	ptr[0] = sdma_nop;
1103	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1104	ib_info[0].size = 1;
1105
1106	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1107	ibs_request[0].number_of_ibs = 1;
1108	ibs_request[0].ibs = &ib_info[0];
1109	ibs_request[0].resources = bo_list[0];
1110	ibs_request[0].fence_info.handle = NULL;
1111	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1112	CU_ASSERT_EQUAL(r, 0);
1113	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1114	CU_ASSERT_EQUAL(r, 0);
1115
1116	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1117	CU_ASSERT_EQUAL(r, 0);
1118	ptr = ib_result_cpu[1];
1119	ptr[0] = gfx_nop;
1120	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1121	ib_info[1].size = 1;
1122
1123	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1124	ibs_request[1].number_of_ibs = 1;
1125	ibs_request[1].ibs = &ib_info[1];
1126	ibs_request[1].resources = bo_list[1];
1127	ibs_request[1].fence_info.handle = NULL;
1128
1129	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1130	CU_ASSERT_EQUAL(r, 0);
1131
1132	fence_status.context = context_handle[0];
1133	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1134	fence_status.ip_instance = 0;
1135	fence_status.fence = ibs_request[1].seq_no;
1136	r = amdgpu_cs_query_fence_status(&fence_status,
1137					 500000000, 0, &expired);
1138	CU_ASSERT_EQUAL(r, 0);
1139	CU_ASSERT_EQUAL(expired, true);
1140
1141	/* 2. same engine different context */
1142	ptr = ib_result_cpu[0];
1143	ptr[0] = gfx_nop;
1144	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1145	ib_info[0].size = 1;
1146
1147	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
1148	ibs_request[0].number_of_ibs = 1;
1149	ibs_request[0].ibs = &ib_info[0];
1150	ibs_request[0].resources = bo_list[0];
1151	ibs_request[0].fence_info.handle = NULL;
1152	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1153	CU_ASSERT_EQUAL(r, 0);
1154	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1155	CU_ASSERT_EQUAL(r, 0);
1156
1157	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
1158	CU_ASSERT_EQUAL(r, 0);
1159	ptr = ib_result_cpu[1];
1160	ptr[0] = gfx_nop;
1161	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1162	ib_info[1].size = 1;
1163
1164	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1165	ibs_request[1].number_of_ibs = 1;
1166	ibs_request[1].ibs = &ib_info[1];
1167	ibs_request[1].resources = bo_list[1];
1168	ibs_request[1].fence_info.handle = NULL;
1169	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1170
1171	CU_ASSERT_EQUAL(r, 0);
1172
1173	fence_status.context = context_handle[1];
1174	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1175	fence_status.ip_instance = 0;
1176	fence_status.fence = ibs_request[1].seq_no;
1177	r = amdgpu_cs_query_fence_status(&fence_status,
1178					 500000000, 0, &expired);
1179	CU_ASSERT_EQUAL(r, 0);
1180	CU_ASSERT_EQUAL(expired, true);
1181
1182	for (i = 0; i < 2; i++) {
1183		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1184					     ib_result_mc_address[i], 4096);
1185		CU_ASSERT_EQUAL(r, 0);
1186
1187		r = amdgpu_bo_list_destroy(bo_list[i]);
1188		CU_ASSERT_EQUAL(r, 0);
1189
1190		r = amdgpu_cs_ctx_free(context_handle[i]);
1191		CU_ASSERT_EQUAL(r, 0);
1192	}
1193
1194	r = amdgpu_cs_destroy_semaphore(sem);
1195	CU_ASSERT_EQUAL(r, 0);
1196}
1197
1198static void amdgpu_command_submission_compute_nop(void)
1199{
1200	amdgpu_context_handle context_handle;
1201	amdgpu_bo_handle ib_result_handle;
1202	void *ib_result_cpu;
1203	uint64_t ib_result_mc_address;
1204	struct amdgpu_cs_request ibs_request;
1205	struct amdgpu_cs_ib_info ib_info;
1206	struct amdgpu_cs_fence fence_status;
1207	uint32_t *ptr;
1208	uint32_t expired;
1209	int r, instance;
1210	amdgpu_bo_list_handle bo_list;
1211	amdgpu_va_handle va_handle;
1212	struct drm_amdgpu_info_hw_ip info;
1213
1214	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1215	CU_ASSERT_EQUAL(r, 0);
1216
1217	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1218	CU_ASSERT_EQUAL(r, 0);
1219
1220	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1221		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1222					    AMDGPU_GEM_DOMAIN_GTT, 0,
1223					    &ib_result_handle, &ib_result_cpu,
1224					    &ib_result_mc_address, &va_handle);
1225		CU_ASSERT_EQUAL(r, 0);
1226
1227		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1228				       &bo_list);
1229		CU_ASSERT_EQUAL(r, 0);
1230
1231		ptr = ib_result_cpu;
1232		memset(ptr, 0, 16);
1233		ptr[0]=PACKET3(PACKET3_NOP, 14);
1234
1235		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1236		ib_info.ib_mc_address = ib_result_mc_address;
1237		ib_info.size = 16;
1238
1239		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1240		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1241		ibs_request.ring = instance;
1242		ibs_request.number_of_ibs = 1;
1243		ibs_request.ibs = &ib_info;
1244		ibs_request.resources = bo_list;
1245		ibs_request.fence_info.handle = NULL;
1246
1247		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1248		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1249		CU_ASSERT_EQUAL(r, 0);
1250
1251		fence_status.context = context_handle;
1252		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1253		fence_status.ip_instance = 0;
1254		fence_status.ring = instance;
1255		fence_status.fence = ibs_request.seq_no;
1256
1257		r = amdgpu_cs_query_fence_status(&fence_status,
1258						 AMDGPU_TIMEOUT_INFINITE,
1259						 0, &expired);
1260		CU_ASSERT_EQUAL(r, 0);
1261
1262		r = amdgpu_bo_list_destroy(bo_list);
1263		CU_ASSERT_EQUAL(r, 0);
1264
1265		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1266					     ib_result_mc_address, 4096);
1267		CU_ASSERT_EQUAL(r, 0);
1268	}
1269
1270	r = amdgpu_cs_ctx_free(context_handle);
1271	CU_ASSERT_EQUAL(r, 0);
1272}
1273
1274static void amdgpu_command_submission_compute_cp_write_data(void)
1275{
1276	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1277}
1278
1279static void amdgpu_command_submission_compute_cp_const_fill(void)
1280{
1281	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1282}
1283
1284static void amdgpu_command_submission_compute_cp_copy_data(void)
1285{
1286	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1287}
1288
1289static void amdgpu_command_submission_compute(void)
1290{
1291	/* write data using the CP */
1292	amdgpu_command_submission_compute_cp_write_data();
1293	/* const fill using the CP */
1294	amdgpu_command_submission_compute_cp_const_fill();
1295	/* copy data using the CP */
1296	amdgpu_command_submission_compute_cp_copy_data();
1297	/* nop test */
1298	amdgpu_command_submission_compute_nop();
1299}
1300
1301/*
1302 * caller need create/release:
1303 * pm4_src, resources, ib_info, and ibs_request
1304 * submit command stream described in ibs_request and wait for this IB accomplished
1305 */
1306void
1307amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1308			       amdgpu_context_handle context_handle,
1309			       unsigned ip_type, int instance, int pm4_dw,
1310			       uint32_t *pm4_src, int res_cnt,
1311			       amdgpu_bo_handle *resources,
1312			       struct amdgpu_cs_ib_info *ib_info,
1313			       struct amdgpu_cs_request *ibs_request,
1314			       bool secure)
1315{
1316	int r;
1317	uint32_t expired;
1318	uint32_t *ring_ptr;
1319	amdgpu_bo_handle ib_result_handle;
1320	void *ib_result_cpu;
1321	uint64_t ib_result_mc_address;
1322	struct amdgpu_cs_fence fence_status = {0};
1323	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1324	amdgpu_va_handle va_handle;
1325
1326	/* prepare CS */
1327	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1328	CU_ASSERT_NOT_EQUAL(resources, NULL);
1329	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1330	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1331	CU_ASSERT_TRUE(pm4_dw <= 1024);
1332
1333	/* allocate IB */
1334	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1335				    AMDGPU_GEM_DOMAIN_GTT, 0,
1336				    &ib_result_handle, &ib_result_cpu,
1337				    &ib_result_mc_address, &va_handle);
1338	CU_ASSERT_EQUAL(r, 0);
1339
1340	/* copy PM4 packet to ring from caller */
1341	ring_ptr = ib_result_cpu;
1342	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1343
1344	ib_info->ib_mc_address = ib_result_mc_address;
1345	ib_info->size = pm4_dw;
1346	if (secure)
1347		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1348
1349	ibs_request->ip_type = ip_type;
1350	ibs_request->ring = instance;
1351	ibs_request->number_of_ibs = 1;
1352	ibs_request->ibs = ib_info;
1353	ibs_request->fence_info.handle = NULL;
1354
1355	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1356	all_res[res_cnt] = ib_result_handle;
1357
1358	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1359				  NULL, &ibs_request->resources);
1360	CU_ASSERT_EQUAL(r, 0);
1361
1362	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1363
1364	/* submit CS */
1365	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1366	CU_ASSERT_EQUAL(r, 0);
1367
1368	r = amdgpu_bo_list_destroy(ibs_request->resources);
1369	CU_ASSERT_EQUAL(r, 0);
1370
1371	fence_status.ip_type = ip_type;
1372	fence_status.ip_instance = 0;
1373	fence_status.ring = ibs_request->ring;
1374	fence_status.context = context_handle;
1375	fence_status.fence = ibs_request->seq_no;
1376
1377	/* wait for IB accomplished */
1378	r = amdgpu_cs_query_fence_status(&fence_status,
1379					 AMDGPU_TIMEOUT_INFINITE,
1380					 0, &expired);
1381	CU_ASSERT_EQUAL(r, 0);
1382	CU_ASSERT_EQUAL(expired, true);
1383
1384	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1385				     ib_result_mc_address, 4096);
1386	CU_ASSERT_EQUAL(r, 0);
1387}
1388
1389static void
1390amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1391			   unsigned ip_type, int instance, int pm4_dw,
1392			   uint32_t *pm4_src, int res_cnt,
1393			   amdgpu_bo_handle *resources,
1394			   struct amdgpu_cs_ib_info *ib_info,
1395			   struct amdgpu_cs_request *ibs_request)
1396{
1397	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1398				       ip_type, instance, pm4_dw, pm4_src,
1399				       res_cnt, resources, ib_info,
1400				       ibs_request, false);
1401}
1402
1403void
1404amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1405							  device, unsigned
1406							  ip_type, bool secure)
1407{
1408	const int sdma_write_length = 128;
1409	const int pm4_dw = 256;
1410	amdgpu_context_handle context_handle;
1411	amdgpu_bo_handle bo;
1412	amdgpu_bo_handle *resources;
1413	uint32_t *pm4;
1414	struct amdgpu_cs_ib_info *ib_info;
1415	struct amdgpu_cs_request *ibs_request;
1416	uint64_t bo_mc;
1417	volatile uint32_t *bo_cpu;
1418	uint32_t bo_cpu_origin;
1419	int i, j, r, loop, ring_id;
1420	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1421	amdgpu_va_handle va_handle;
1422	struct drm_amdgpu_info_hw_ip hw_ip_info;
1423
1424	pm4 = calloc(pm4_dw, sizeof(*pm4));
1425	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1426
1427	ib_info = calloc(1, sizeof(*ib_info));
1428	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1429
1430	ibs_request = calloc(1, sizeof(*ibs_request));
1431	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1432
1433	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1434	CU_ASSERT_EQUAL(r, 0);
1435
1436	for (i = 0; secure && (i < 2); i++)
1437		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1438
1439	r = amdgpu_cs_ctx_create(device, &context_handle);
1440
1441	CU_ASSERT_EQUAL(r, 0);
1442
1443	/* prepare resource */
1444	resources = calloc(1, sizeof(amdgpu_bo_handle));
1445	CU_ASSERT_NOT_EQUAL(resources, NULL);
1446
1447	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1448		loop = 0;
1449		while(loop < 2) {
1450			/* allocate UC bo for sDMA use */
1451			r = amdgpu_bo_alloc_and_map(device,
1452						    sdma_write_length * sizeof(uint32_t),
1453						    4096, AMDGPU_GEM_DOMAIN_GTT,
1454						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1455						    &bo_mc, &va_handle);
1456			CU_ASSERT_EQUAL(r, 0);
1457
1458			/* clear bo */
1459			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1460
1461			resources[0] = bo;
1462
1463			/* fulfill PM4: test DMA write-linear */
1464			i = j = 0;
1465			if (ip_type == AMDGPU_HW_IP_DMA) {
1466				if (family_id == AMDGPU_FAMILY_SI)
1467					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1468								  sdma_write_length);
1469				else
1470					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1471							       SDMA_WRITE_SUB_OPCODE_LINEAR,
1472							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
1473				pm4[i++] = 0xfffffffc & bo_mc;
1474				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1475				if (family_id >= AMDGPU_FAMILY_AI)
1476					pm4[i++] = sdma_write_length - 1;
1477				else if (family_id != AMDGPU_FAMILY_SI)
1478					pm4[i++] = sdma_write_length;
1479				while(j++ < sdma_write_length)
1480					pm4[i++] = 0xdeadbeaf;
1481			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1482				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1483				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1484				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1485				pm4[i++] = 0xfffffffc & bo_mc;
1486				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1487				while(j++ < sdma_write_length)
1488					pm4[i++] = 0xdeadbeaf;
1489			}
1490
1491			amdgpu_test_exec_cs_helper_raw(device, context_handle,
1492						       ip_type, ring_id, i, pm4,
1493						       1, resources, ib_info,
1494						       ibs_request, secure);
1495
1496			/* verify if SDMA test result meets with expected */
1497			i = 0;
1498			if (!secure) {
1499				while(i < sdma_write_length) {
1500					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1501				}
1502			} else if (ip_type == AMDGPU_HW_IP_GFX) {
1503				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1504				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1505				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1506				 * command, 1-loop_until_compare_satisfied.
1507				 * single_pass_atomic, 0-lru
1508				 * engine_sel, 0-micro_engine
1509				 */
1510				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1511							ATOMIC_MEM_COMMAND(1) |
1512							ATOMIC_MEM_CACHEPOLICAY(0) |
1513							ATOMIC_MEM_ENGINESEL(0));
1514				pm4[i++] = 0xfffffffc & bo_mc;
1515				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1516				pm4[i++] = 0x12345678;
1517				pm4[i++] = 0x0;
1518				pm4[i++] = 0xdeadbeaf;
1519				pm4[i++] = 0x0;
1520				pm4[i++] = 0x100;
1521				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1522							ip_type, ring_id, i, pm4,
1523							1, resources, ib_info,
1524							ibs_request, true);
1525			} else if (ip_type == AMDGPU_HW_IP_DMA) {
1526				/* restore the bo_cpu to compare */
1527				bo_cpu_origin = bo_cpu[0];
1528				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1529				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1530				 * loop, 1-loop_until_compare_satisfied.
1531				 * single_pass_atomic, 0-lru
1532				 */
1533				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1534							       0,
1535							       SDMA_ATOMIC_LOOP(1) |
1536							       SDMA_ATOMIC_TMZ(1) |
1537							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1538				pm4[i++] = 0xfffffffc & bo_mc;
1539				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1540				pm4[i++] = 0x12345678;
1541				pm4[i++] = 0x0;
1542				pm4[i++] = 0xdeadbeaf;
1543				pm4[i++] = 0x0;
1544				pm4[i++] = 0x100;
1545				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1546							ip_type, ring_id, i, pm4,
1547							1, resources, ib_info,
1548							ibs_request, true);
1549				/* DMA's atomic behavir is unlike GFX
1550				 * If the comparing data is not equal to destination data,
1551				 * For GFX, loop again till gfx timeout(system hang).
1552				 * For DMA, loop again till timer expired and then send interrupt.
1553				 * So testcase can't use interrupt mechanism.
1554				 * We take another way to verify. When the comparing data is not
1555				 * equal to destination data, overwrite the source data to the destination
1556				 * buffer. Otherwise, original destination data unchanged.
1557				 * So if the bo_cpu data is overwritten, the result is passed.
1558				 */
1559				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1560
1561				/* compare again for the case of dest_data != cmp_data */
1562				i = 0;
1563				/* restore again, here dest_data should be */
1564				bo_cpu_origin = bo_cpu[0];
1565				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1566				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1567							       0,
1568							       SDMA_ATOMIC_LOOP(1) |
1569							       SDMA_ATOMIC_TMZ(1) |
1570							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1571				pm4[i++] = 0xfffffffc & bo_mc;
1572				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1573				pm4[i++] = 0x87654321;
1574				pm4[i++] = 0x0;
1575				pm4[i++] = 0xdeadbeaf;
1576				pm4[i++] = 0x0;
1577				pm4[i++] = 0x100;
1578				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1579							ip_type, ring_id, i, pm4,
1580							1, resources, ib_info,
1581							ibs_request, true);
1582				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1583				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1584			}
1585
1586			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1587						     sdma_write_length * sizeof(uint32_t));
1588			CU_ASSERT_EQUAL(r, 0);
1589			loop++;
1590		}
1591	}
1592	/* clean resources */
1593	free(resources);
1594	free(ibs_request);
1595	free(ib_info);
1596	free(pm4);
1597
1598	/* end of test */
1599	r = amdgpu_cs_ctx_free(context_handle);
1600	CU_ASSERT_EQUAL(r, 0);
1601}
1602
1603static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1604{
1605	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1606								  ip_type,
1607								  false);
1608}
1609
1610static void amdgpu_command_submission_sdma_write_linear(void)
1611{
1612	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1613}
1614
1615static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1616{
1617	const int sdma_write_length = 1024 * 1024;
1618	const int pm4_dw = 256;
1619	amdgpu_context_handle context_handle;
1620	amdgpu_bo_handle bo;
1621	amdgpu_bo_handle *resources;
1622	uint32_t *pm4;
1623	struct amdgpu_cs_ib_info *ib_info;
1624	struct amdgpu_cs_request *ibs_request;
1625	uint64_t bo_mc;
1626	volatile uint32_t *bo_cpu;
1627	int i, j, r, loop, ring_id;
1628	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1629	amdgpu_va_handle va_handle;
1630	struct drm_amdgpu_info_hw_ip hw_ip_info;
1631
1632	pm4 = calloc(pm4_dw, sizeof(*pm4));
1633	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1634
1635	ib_info = calloc(1, sizeof(*ib_info));
1636	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1637
1638	ibs_request = calloc(1, sizeof(*ibs_request));
1639	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1640
1641	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1642	CU_ASSERT_EQUAL(r, 0);
1643
1644	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1645	CU_ASSERT_EQUAL(r, 0);
1646
1647	/* prepare resource */
1648	resources = calloc(1, sizeof(amdgpu_bo_handle));
1649	CU_ASSERT_NOT_EQUAL(resources, NULL);
1650
1651	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1652		loop = 0;
1653		while(loop < 2) {
1654			/* allocate UC bo for sDMA use */
1655			r = amdgpu_bo_alloc_and_map(device_handle,
1656						    sdma_write_length, 4096,
1657						    AMDGPU_GEM_DOMAIN_GTT,
1658						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1659						    &bo_mc, &va_handle);
1660			CU_ASSERT_EQUAL(r, 0);
1661
1662			/* clear bo */
1663			memset((void*)bo_cpu, 0, sdma_write_length);
1664
1665			resources[0] = bo;
1666
1667			/* fulfill PM4: test DMA const fill */
1668			i = j = 0;
1669			if (ip_type == AMDGPU_HW_IP_DMA) {
1670				if (family_id == AMDGPU_FAMILY_SI) {
1671					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1672								  0, 0, 0,
1673								  sdma_write_length / 4);
1674					pm4[i++] = 0xfffffffc & bo_mc;
1675					pm4[i++] = 0xdeadbeaf;
1676					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1677				} else {
1678					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1679							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1680					pm4[i++] = 0xffffffff & bo_mc;
1681					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1682					pm4[i++] = 0xdeadbeaf;
1683					if (family_id >= AMDGPU_FAMILY_AI)
1684						pm4[i++] = sdma_write_length - 1;
1685					else
1686						pm4[i++] = sdma_write_length;
1687				}
1688			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1689				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1690				if (family_id == AMDGPU_FAMILY_SI) {
1691					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1692					pm4[i++] = 0xdeadbeaf;
1693					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1694						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1695						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1696						   PACKET3_DMA_DATA_SI_CP_SYNC;
1697					pm4[i++] = 0xffffffff & bo_mc;
1698					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1699					pm4[i++] = sdma_write_length;
1700				} else {
1701					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1702					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1703						   PACKET3_DMA_DATA_DST_SEL(0) |
1704						   PACKET3_DMA_DATA_SRC_SEL(2) |
1705						   PACKET3_DMA_DATA_CP_SYNC;
1706					pm4[i++] = 0xdeadbeaf;
1707					pm4[i++] = 0;
1708					pm4[i++] = 0xfffffffc & bo_mc;
1709					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1710					pm4[i++] = sdma_write_length;
1711				}
1712			}
1713
1714			amdgpu_test_exec_cs_helper(context_handle,
1715						   ip_type, ring_id,
1716						   i, pm4,
1717						   1, resources,
1718						   ib_info, ibs_request);
1719
1720			/* verify if SDMA test result meets with expected */
1721			i = 0;
1722			while(i < (sdma_write_length / 4)) {
1723				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1724			}
1725
1726			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1727						     sdma_write_length);
1728			CU_ASSERT_EQUAL(r, 0);
1729			loop++;
1730		}
1731	}
1732	/* clean resources */
1733	free(resources);
1734	free(ibs_request);
1735	free(ib_info);
1736	free(pm4);
1737
1738	/* end of test */
1739	r = amdgpu_cs_ctx_free(context_handle);
1740	CU_ASSERT_EQUAL(r, 0);
1741}
1742
1743static void amdgpu_command_submission_sdma_const_fill(void)
1744{
1745	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1746}
1747
1748static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1749{
1750	const int sdma_write_length = 1024;
1751	const int pm4_dw = 256;
1752	amdgpu_context_handle context_handle;
1753	amdgpu_bo_handle bo1, bo2;
1754	amdgpu_bo_handle *resources;
1755	uint32_t *pm4;
1756	struct amdgpu_cs_ib_info *ib_info;
1757	struct amdgpu_cs_request *ibs_request;
1758	uint64_t bo1_mc, bo2_mc;
1759	volatile unsigned char *bo1_cpu, *bo2_cpu;
1760	int i, j, r, loop1, loop2, ring_id;
1761	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1762	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1763	struct drm_amdgpu_info_hw_ip hw_ip_info;
1764
1765	pm4 = calloc(pm4_dw, sizeof(*pm4));
1766	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1767
1768	ib_info = calloc(1, sizeof(*ib_info));
1769	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1770
1771	ibs_request = calloc(1, sizeof(*ibs_request));
1772	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1773
1774	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1775	CU_ASSERT_EQUAL(r, 0);
1776
1777	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1778	CU_ASSERT_EQUAL(r, 0);
1779
1780	/* prepare resource */
1781	resources = calloc(2, sizeof(amdgpu_bo_handle));
1782	CU_ASSERT_NOT_EQUAL(resources, NULL);
1783
1784	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1785		loop1 = loop2 = 0;
1786		/* run 9 circle to test all mapping combination */
1787		while(loop1 < 2) {
1788			while(loop2 < 2) {
1789				/* allocate UC bo1for sDMA use */
1790				r = amdgpu_bo_alloc_and_map(device_handle,
1791							    sdma_write_length, 4096,
1792							    AMDGPU_GEM_DOMAIN_GTT,
1793							    gtt_flags[loop1], &bo1,
1794							    (void**)&bo1_cpu, &bo1_mc,
1795							    &bo1_va_handle);
1796				CU_ASSERT_EQUAL(r, 0);
1797
1798				/* set bo1 */
1799				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1800
1801				/* allocate UC bo2 for sDMA use */
1802				r = amdgpu_bo_alloc_and_map(device_handle,
1803							    sdma_write_length, 4096,
1804							    AMDGPU_GEM_DOMAIN_GTT,
1805							    gtt_flags[loop2], &bo2,
1806							    (void**)&bo2_cpu, &bo2_mc,
1807							    &bo2_va_handle);
1808				CU_ASSERT_EQUAL(r, 0);
1809
1810				/* clear bo2 */
1811				memset((void*)bo2_cpu, 0, sdma_write_length);
1812
1813				resources[0] = bo1;
1814				resources[1] = bo2;
1815
1816				/* fulfill PM4: test DMA copy linear */
1817				i = j = 0;
1818				if (ip_type == AMDGPU_HW_IP_DMA) {
1819					if (family_id == AMDGPU_FAMILY_SI) {
1820						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1821									  0, 0, 0,
1822									  sdma_write_length);
1823						pm4[i++] = 0xffffffff & bo2_mc;
1824						pm4[i++] = 0xffffffff & bo1_mc;
1825						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1826						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1827					} else {
1828						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1829								       SDMA_COPY_SUB_OPCODE_LINEAR,
1830								       0);
1831						if (family_id >= AMDGPU_FAMILY_AI)
1832							pm4[i++] = sdma_write_length - 1;
1833						else
1834							pm4[i++] = sdma_write_length;
1835						pm4[i++] = 0;
1836						pm4[i++] = 0xffffffff & bo1_mc;
1837						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1838						pm4[i++] = 0xffffffff & bo2_mc;
1839						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1840					}
1841				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1842					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1843					if (family_id == AMDGPU_FAMILY_SI) {
1844						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1845						pm4[i++] = 0xfffffffc & bo1_mc;
1846						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1847							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1848							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1849							   PACKET3_DMA_DATA_SI_CP_SYNC |
1850							   (0xffff00000000 & bo1_mc) >> 32;
1851						pm4[i++] = 0xfffffffc & bo2_mc;
1852						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1853						pm4[i++] = sdma_write_length;
1854					} else {
1855						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1856						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1857							   PACKET3_DMA_DATA_DST_SEL(0) |
1858							   PACKET3_DMA_DATA_SRC_SEL(0) |
1859							   PACKET3_DMA_DATA_CP_SYNC;
1860						pm4[i++] = 0xfffffffc & bo1_mc;
1861						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1862						pm4[i++] = 0xfffffffc & bo2_mc;
1863						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1864						pm4[i++] = sdma_write_length;
1865					}
1866				}
1867
1868				amdgpu_test_exec_cs_helper(context_handle,
1869							   ip_type, ring_id,
1870							   i, pm4,
1871							   2, resources,
1872							   ib_info, ibs_request);
1873
1874				/* verify if SDMA test result meets with expected */
1875				i = 0;
1876				while(i < sdma_write_length) {
1877					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1878				}
1879				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1880							     sdma_write_length);
1881				CU_ASSERT_EQUAL(r, 0);
1882				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1883							     sdma_write_length);
1884				CU_ASSERT_EQUAL(r, 0);
1885				loop2++;
1886			}
1887			loop1++;
1888		}
1889	}
1890	/* clean resources */
1891	free(resources);
1892	free(ibs_request);
1893	free(ib_info);
1894	free(pm4);
1895
1896	/* end of test */
1897	r = amdgpu_cs_ctx_free(context_handle);
1898	CU_ASSERT_EQUAL(r, 0);
1899}
1900
1901static void amdgpu_command_submission_sdma_copy_linear(void)
1902{
1903	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1904}
1905
1906static void amdgpu_command_submission_sdma(void)
1907{
1908	amdgpu_command_submission_sdma_write_linear();
1909	amdgpu_command_submission_sdma_const_fill();
1910	amdgpu_command_submission_sdma_copy_linear();
1911}
1912
1913static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1914{
1915	amdgpu_context_handle context_handle;
1916	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1917	void *ib_result_cpu, *ib_result_ce_cpu;
1918	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1919	struct amdgpu_cs_request ibs_request[2] = {0};
1920	struct amdgpu_cs_ib_info ib_info[2];
1921	struct amdgpu_cs_fence fence_status[2] = {0};
1922	uint32_t *ptr;
1923	uint32_t expired;
1924	amdgpu_bo_list_handle bo_list;
1925	amdgpu_va_handle va_handle, va_handle_ce;
1926	int r;
1927	int i = 0, ib_cs_num = 2;
1928
1929	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1930	CU_ASSERT_EQUAL(r, 0);
1931
1932	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1933				    AMDGPU_GEM_DOMAIN_GTT, 0,
1934				    &ib_result_handle, &ib_result_cpu,
1935				    &ib_result_mc_address, &va_handle);
1936	CU_ASSERT_EQUAL(r, 0);
1937
1938	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1939				    AMDGPU_GEM_DOMAIN_GTT, 0,
1940				    &ib_result_ce_handle, &ib_result_ce_cpu,
1941				    &ib_result_ce_mc_address, &va_handle_ce);
1942	CU_ASSERT_EQUAL(r, 0);
1943
1944	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1945			       ib_result_ce_handle, &bo_list);
1946	CU_ASSERT_EQUAL(r, 0);
1947
1948	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1949
1950	/* IT_SET_CE_DE_COUNTERS */
1951	ptr = ib_result_ce_cpu;
1952	if (family_id != AMDGPU_FAMILY_SI) {
1953		ptr[i++] = 0xc0008900;
1954		ptr[i++] = 0;
1955	}
1956	ptr[i++] = 0xc0008400;
1957	ptr[i++] = 1;
1958	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1959	ib_info[0].size = i;
1960	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1961
1962	/* IT_WAIT_ON_CE_COUNTER */
1963	ptr = ib_result_cpu;
1964	ptr[0] = 0xc0008600;
1965	ptr[1] = 0x00000001;
1966	ib_info[1].ib_mc_address = ib_result_mc_address;
1967	ib_info[1].size = 2;
1968
1969	for (i = 0; i < ib_cs_num; i++) {
1970		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1971		ibs_request[i].number_of_ibs = 2;
1972		ibs_request[i].ibs = ib_info;
1973		ibs_request[i].resources = bo_list;
1974		ibs_request[i].fence_info.handle = NULL;
1975	}
1976
1977	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1978
1979	CU_ASSERT_EQUAL(r, 0);
1980
1981	for (i = 0; i < ib_cs_num; i++) {
1982		fence_status[i].context = context_handle;
1983		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1984		fence_status[i].fence = ibs_request[i].seq_no;
1985	}
1986
1987	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1988				AMDGPU_TIMEOUT_INFINITE,
1989				&expired, NULL);
1990	CU_ASSERT_EQUAL(r, 0);
1991
1992	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1993				     ib_result_mc_address, 4096);
1994	CU_ASSERT_EQUAL(r, 0);
1995
1996	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1997				     ib_result_ce_mc_address, 4096);
1998	CU_ASSERT_EQUAL(r, 0);
1999
2000	r = amdgpu_bo_list_destroy(bo_list);
2001	CU_ASSERT_EQUAL(r, 0);
2002
2003	r = amdgpu_cs_ctx_free(context_handle);
2004	CU_ASSERT_EQUAL(r, 0);
2005}
2006
2007static void amdgpu_command_submission_multi_fence(void)
2008{
2009	amdgpu_command_submission_multi_fence_wait_all(true);
2010	amdgpu_command_submission_multi_fence_wait_all(false);
2011}
2012
2013static void amdgpu_userptr_test(void)
2014{
2015	int i, r, j;
2016	uint32_t *pm4 = NULL;
2017	uint64_t bo_mc;
2018	void *ptr = NULL;
2019	int pm4_dw = 256;
2020	int sdma_write_length = 4;
2021	amdgpu_bo_handle handle;
2022	amdgpu_context_handle context_handle;
2023	struct amdgpu_cs_ib_info *ib_info;
2024	struct amdgpu_cs_request *ibs_request;
2025	amdgpu_bo_handle buf_handle;
2026	amdgpu_va_handle va_handle;
2027
2028	pm4 = calloc(pm4_dw, sizeof(*pm4));
2029	CU_ASSERT_NOT_EQUAL(pm4, NULL);
2030
2031	ib_info = calloc(1, sizeof(*ib_info));
2032	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2033
2034	ibs_request = calloc(1, sizeof(*ibs_request));
2035	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2036
2037	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2038	CU_ASSERT_EQUAL(r, 0);
2039
2040	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2041	CU_ASSERT_NOT_EQUAL(ptr, NULL);
2042	memset(ptr, 0, BUFFER_SIZE);
2043
2044	r = amdgpu_create_bo_from_user_mem(device_handle,
2045					   ptr, BUFFER_SIZE, &buf_handle);
2046	CU_ASSERT_EQUAL(r, 0);
2047
2048	r = amdgpu_va_range_alloc(device_handle,
2049				  amdgpu_gpu_va_range_general,
2050				  BUFFER_SIZE, 1, 0, &bo_mc,
2051				  &va_handle, 0);
2052	CU_ASSERT_EQUAL(r, 0);
2053
2054	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2055	CU_ASSERT_EQUAL(r, 0);
2056
2057	handle = buf_handle;
2058
2059	j = i = 0;
2060
2061	if (family_id == AMDGPU_FAMILY_SI)
2062		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2063				sdma_write_length);
2064	else
2065		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2066				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2067	pm4[i++] = 0xffffffff & bo_mc;
2068	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2069	if (family_id >= AMDGPU_FAMILY_AI)
2070		pm4[i++] = sdma_write_length - 1;
2071	else if (family_id != AMDGPU_FAMILY_SI)
2072		pm4[i++] = sdma_write_length;
2073
2074	while (j++ < sdma_write_length)
2075		pm4[i++] = 0xdeadbeaf;
2076
2077	if (!fork()) {
2078		pm4[0] = 0x0;
2079		exit(0);
2080	}
2081
2082	amdgpu_test_exec_cs_helper(context_handle,
2083				   AMDGPU_HW_IP_DMA, 0,
2084				   i, pm4,
2085				   1, &handle,
2086				   ib_info, ibs_request);
2087	i = 0;
2088	while (i < sdma_write_length) {
2089		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2090	}
2091	free(ibs_request);
2092	free(ib_info);
2093	free(pm4);
2094
2095	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2096	CU_ASSERT_EQUAL(r, 0);
2097	r = amdgpu_va_range_free(va_handle);
2098	CU_ASSERT_EQUAL(r, 0);
2099	r = amdgpu_bo_free(buf_handle);
2100	CU_ASSERT_EQUAL(r, 0);
2101	free(ptr);
2102
2103	r = amdgpu_cs_ctx_free(context_handle);
2104	CU_ASSERT_EQUAL(r, 0);
2105
2106	wait(NULL);
2107}
2108
2109static void amdgpu_sync_dependency_test(void)
2110{
2111	amdgpu_context_handle context_handle[2];
2112	amdgpu_bo_handle ib_result_handle;
2113	void *ib_result_cpu;
2114	uint64_t ib_result_mc_address;
2115	struct amdgpu_cs_request ibs_request;
2116	struct amdgpu_cs_ib_info ib_info;
2117	struct amdgpu_cs_fence fence_status;
2118	uint32_t expired;
2119	int i, j, r;
2120	amdgpu_bo_list_handle bo_list;
2121	amdgpu_va_handle va_handle;
2122	static uint32_t *ptr;
2123	uint64_t seq_no;
2124
2125	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2126	CU_ASSERT_EQUAL(r, 0);
2127	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2128	CU_ASSERT_EQUAL(r, 0);
2129
2130	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2131			AMDGPU_GEM_DOMAIN_GTT, 0,
2132						    &ib_result_handle, &ib_result_cpu,
2133						    &ib_result_mc_address, &va_handle);
2134	CU_ASSERT_EQUAL(r, 0);
2135
2136	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2137			       &bo_list);
2138	CU_ASSERT_EQUAL(r, 0);
2139
2140	ptr = ib_result_cpu;
2141	i = 0;
2142
2143	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2144
2145	/* Dispatch minimal init config and verify it's executed */
2146	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2147	ptr[i++] = 0x80000000;
2148	ptr[i++] = 0x80000000;
2149
2150	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2151	ptr[i++] = 0x80000000;
2152
2153
2154	/* Program compute regs */
2155	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2156	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2157	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2158	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2159
2160
2161	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2162	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2163	/*
2164	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
2165	                                      SGPRS = 1
2166	                                      PRIORITY = 0
2167	                                      FLOAT_MODE = 192 (0xc0)
2168	                                      PRIV = 0
2169	                                      DX10_CLAMP = 1
2170	                                      DEBUG_MODE = 0
2171	                                      IEEE_MODE = 0
2172	                                      BULKY = 0
2173	                                      CDBG_USER = 0
2174	 *
2175	 */
2176	ptr[i++] = 0x002c0040;
2177
2178
2179	/*
2180	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2181	                                      USER_SGPR = 8
2182	                                      TRAP_PRESENT = 0
2183	                                      TGID_X_EN = 0
2184	                                      TGID_Y_EN = 0
2185	                                      TGID_Z_EN = 0
2186	                                      TG_SIZE_EN = 0
2187	                                      TIDIG_COMP_CNT = 0
2188	                                      EXCP_EN_MSB = 0
2189	                                      LDS_SIZE = 0
2190	                                      EXCP_EN = 0
2191	 *
2192	 */
2193	ptr[i++] = 0x00000010;
2194
2195
2196/*
2197 * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2198                                         WAVESIZE = 0
2199 *
2200 */
2201	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2202	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2203	ptr[i++] = 0x00000100;
2204
2205	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2206	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2207	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2208	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2209
2210	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2211	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2212	ptr[i++] = 0;
2213
2214	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2215	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2216	ptr[i++] = 1;
2217	ptr[i++] = 1;
2218	ptr[i++] = 1;
2219
2220
2221	/* Dispatch */
2222	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2223	ptr[i++] = 1;
2224	ptr[i++] = 1;
2225	ptr[i++] = 1;
2226	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2227
2228
2229	while (i & 7)
2230		ptr[i++] =  0xffff1000; /* type3 nop packet */
2231
2232	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2233	ib_info.ib_mc_address = ib_result_mc_address;
2234	ib_info.size = i;
2235
2236	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2237	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2238	ibs_request.ring = 0;
2239	ibs_request.number_of_ibs = 1;
2240	ibs_request.ibs = &ib_info;
2241	ibs_request.resources = bo_list;
2242	ibs_request.fence_info.handle = NULL;
2243
2244	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2245	CU_ASSERT_EQUAL(r, 0);
2246	seq_no = ibs_request.seq_no;
2247
2248
2249
2250	/* Prepare second command with dependency on the first */
2251	j = i;
2252	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2253	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2254	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2255	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2256	ptr[i++] = 99;
2257
2258	while (i & 7)
2259		ptr[i++] =  0xffff1000; /* type3 nop packet */
2260
2261	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2262	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2263	ib_info.size = i - j;
2264
2265	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2266	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2267	ibs_request.ring = 0;
2268	ibs_request.number_of_ibs = 1;
2269	ibs_request.ibs = &ib_info;
2270	ibs_request.resources = bo_list;
2271	ibs_request.fence_info.handle = NULL;
2272
2273	ibs_request.number_of_dependencies = 1;
2274
2275	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2276	ibs_request.dependencies[0].context = context_handle[1];
2277	ibs_request.dependencies[0].ip_instance = 0;
2278	ibs_request.dependencies[0].ring = 0;
2279	ibs_request.dependencies[0].fence = seq_no;
2280
2281
2282	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2283	CU_ASSERT_EQUAL(r, 0);
2284
2285
2286	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2287	fence_status.context = context_handle[0];
2288	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2289	fence_status.ip_instance = 0;
2290	fence_status.ring = 0;
2291	fence_status.fence = ibs_request.seq_no;
2292
2293	r = amdgpu_cs_query_fence_status(&fence_status,
2294		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2295	CU_ASSERT_EQUAL(r, 0);
2296
2297	/* Expect the second command to wait for shader to complete */
2298	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2299
2300	r = amdgpu_bo_list_destroy(bo_list);
2301	CU_ASSERT_EQUAL(r, 0);
2302
2303	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2304				     ib_result_mc_address, 4096);
2305	CU_ASSERT_EQUAL(r, 0);
2306
2307	r = amdgpu_cs_ctx_free(context_handle[0]);
2308	CU_ASSERT_EQUAL(r, 0);
2309	r = amdgpu_cs_ctx_free(context_handle[1]);
2310	CU_ASSERT_EQUAL(r, 0);
2311
2312	free(ibs_request.dependencies);
2313}
2314
2315static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2316{
2317	struct amdgpu_test_shader *shader;
2318	int i, loop = 0x10000;
2319
2320	switch (family) {
2321		case AMDGPU_FAMILY_AI:
2322			shader = &memcpy_cs_hang_slow_ai;
2323			break;
2324		case AMDGPU_FAMILY_RV:
2325			shader = &memcpy_cs_hang_slow_rv;
2326			break;
2327		default:
2328			return -1;
2329			break;
2330	}
2331
2332	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2333
2334	for (i = 0; i < loop; i++)
2335		memcpy(ptr + shader->header_length + shader->body_length * i,
2336			shader->shader + shader->header_length,
2337			shader->body_length * sizeof(uint32_t));
2338
2339	memcpy(ptr + shader->header_length + shader->body_length * loop,
2340		shader->shader + shader->header_length + shader->body_length,
2341		shader->foot_length * sizeof(uint32_t));
2342
2343	return 0;
2344}
2345
2346static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2347					   int cs_type)
2348{
2349	uint32_t shader_size;
2350	const uint32_t *shader;
2351
2352	switch (cs_type) {
2353		case CS_BUFFERCLEAR:
2354			shader = bufferclear_cs_shader_gfx9;
2355			shader_size = sizeof(bufferclear_cs_shader_gfx9);
2356			break;
2357		case CS_BUFFERCOPY:
2358			shader = buffercopy_cs_shader_gfx9;
2359			shader_size = sizeof(buffercopy_cs_shader_gfx9);
2360			break;
2361		case CS_HANG:
2362			shader = memcpy_ps_hang;
2363			shader_size = sizeof(memcpy_ps_hang);
2364			break;
2365		default:
2366			return -1;
2367			break;
2368	}
2369
2370	memcpy(ptr, shader, shader_size);
2371	return 0;
2372}
2373
2374static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2375{
2376	int i = 0;
2377
2378	/* Write context control and load shadowing register if necessary */
2379	if (ip_type == AMDGPU_HW_IP_GFX) {
2380		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2381		ptr[i++] = 0x80000000;
2382		ptr[i++] = 0x80000000;
2383	}
2384
2385	/* Issue commands to set default compute state. */
2386	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2387	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2388	ptr[i++] = 0x204;
2389	i += 3;
2390
2391	/* clear mmCOMPUTE_TMPRING_SIZE */
2392	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2393	ptr[i++] = 0x218;
2394	ptr[i++] = 0;
2395
2396	return i;
2397}
2398
2399static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2400{
2401	int i = 0;
2402
2403	/*  Issue commands to set cu mask used in current dispatch */
2404	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2405	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2406	ptr[i++] = 0x216;
2407	ptr[i++] = 0xffffffff;
2408	ptr[i++] = 0xffffffff;
2409	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2410	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2411	ptr[i++] = 0x219;
2412	ptr[i++] = 0xffffffff;
2413	ptr[i++] = 0xffffffff;
2414
2415	return i;
2416}
2417
2418static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2419{
2420	int i, j;
2421
2422	i = 0;
2423
2424	/* Writes shader state to HW */
2425	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2426	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2427	ptr[i++] = 0x20c;
2428	ptr[i++] = (shader_addr >> 8);
2429	ptr[i++] = (shader_addr >> 40);
2430	/* write sh regs*/
2431	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2432		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2433		/* - Gfx9ShRegBase */
2434		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2435		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2436	}
2437
2438	return i;
2439}
2440
2441static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2442					 uint32_t ip_type,
2443					 uint32_t ring)
2444{
2445	amdgpu_context_handle context_handle;
2446	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2447	volatile unsigned char *ptr_dst;
2448	void *ptr_shader;
2449	uint32_t *ptr_cmd;
2450	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2451	amdgpu_va_handle va_dst, va_shader, va_cmd;
2452	int i, r;
2453	int bo_dst_size = 16384;
2454	int bo_shader_size = 4096;
2455	int bo_cmd_size = 4096;
2456	struct amdgpu_cs_request ibs_request = {0};
2457	struct amdgpu_cs_ib_info ib_info= {0};
2458	amdgpu_bo_list_handle bo_list;
2459	struct amdgpu_cs_fence fence_status = {0};
2460	uint32_t expired;
2461
2462	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2463	CU_ASSERT_EQUAL(r, 0);
2464
2465	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2466					AMDGPU_GEM_DOMAIN_GTT, 0,
2467					&bo_cmd, (void **)&ptr_cmd,
2468					&mc_address_cmd, &va_cmd);
2469	CU_ASSERT_EQUAL(r, 0);
2470	memset(ptr_cmd, 0, bo_cmd_size);
2471
2472	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2473					AMDGPU_GEM_DOMAIN_VRAM, 0,
2474					&bo_shader, &ptr_shader,
2475					&mc_address_shader, &va_shader);
2476	CU_ASSERT_EQUAL(r, 0);
2477	memset(ptr_shader, 0, bo_shader_size);
2478
2479	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2480	CU_ASSERT_EQUAL(r, 0);
2481
2482	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2483					AMDGPU_GEM_DOMAIN_VRAM, 0,
2484					&bo_dst, (void **)&ptr_dst,
2485					&mc_address_dst, &va_dst);
2486	CU_ASSERT_EQUAL(r, 0);
2487
2488	i = 0;
2489	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2490
2491	/*  Issue commands to set cu mask used in current dispatch */
2492	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2493
2494	/* Writes shader state to HW */
2495	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2496
2497	/* Write constant data */
2498	/* Writes the UAV constant data to the SGPRs. */
2499	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2500	ptr_cmd[i++] = 0x240;
2501	ptr_cmd[i++] = mc_address_dst;
2502	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2503	ptr_cmd[i++] = 0x400;
2504	ptr_cmd[i++] = 0x74fac;
2505
2506	/* Sets a range of pixel shader constants */
2507	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2508	ptr_cmd[i++] = 0x244;
2509	ptr_cmd[i++] = 0x22222222;
2510	ptr_cmd[i++] = 0x22222222;
2511	ptr_cmd[i++] = 0x22222222;
2512	ptr_cmd[i++] = 0x22222222;
2513
2514	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2515	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2516	ptr_cmd[i++] = 0x215;
2517	ptr_cmd[i++] = 0;
2518
2519	/* dispatch direct command */
2520	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2521	ptr_cmd[i++] = 0x10;
2522	ptr_cmd[i++] = 1;
2523	ptr_cmd[i++] = 1;
2524	ptr_cmd[i++] = 1;
2525
2526	while (i & 7)
2527		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2528
2529	resources[0] = bo_dst;
2530	resources[1] = bo_shader;
2531	resources[2] = bo_cmd;
2532	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2533	CU_ASSERT_EQUAL(r, 0);
2534
2535	ib_info.ib_mc_address = mc_address_cmd;
2536	ib_info.size = i;
2537	ibs_request.ip_type = ip_type;
2538	ibs_request.ring = ring;
2539	ibs_request.resources = bo_list;
2540	ibs_request.number_of_ibs = 1;
2541	ibs_request.ibs = &ib_info;
2542	ibs_request.fence_info.handle = NULL;
2543
2544	/* submit CS */
2545	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2546	CU_ASSERT_EQUAL(r, 0);
2547
2548	r = amdgpu_bo_list_destroy(bo_list);
2549	CU_ASSERT_EQUAL(r, 0);
2550
2551	fence_status.ip_type = ip_type;
2552	fence_status.ip_instance = 0;
2553	fence_status.ring = ring;
2554	fence_status.context = context_handle;
2555	fence_status.fence = ibs_request.seq_no;
2556
2557	/* wait for IB accomplished */
2558	r = amdgpu_cs_query_fence_status(&fence_status,
2559					 AMDGPU_TIMEOUT_INFINITE,
2560					 0, &expired);
2561	CU_ASSERT_EQUAL(r, 0);
2562	CU_ASSERT_EQUAL(expired, true);
2563
2564	/* verify if memset test result meets with expected */
2565	i = 0;
2566	while(i < bo_dst_size) {
2567		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2568	}
2569
2570	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2571	CU_ASSERT_EQUAL(r, 0);
2572
2573	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2574	CU_ASSERT_EQUAL(r, 0);
2575
2576	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2577	CU_ASSERT_EQUAL(r, 0);
2578
2579	r = amdgpu_cs_ctx_free(context_handle);
2580	CU_ASSERT_EQUAL(r, 0);
2581}
2582
2583static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2584					uint32_t ip_type,
2585					uint32_t ring,
2586					int hang)
2587{
2588	amdgpu_context_handle context_handle;
2589	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2590	volatile unsigned char *ptr_dst;
2591	void *ptr_shader;
2592	unsigned char *ptr_src;
2593	uint32_t *ptr_cmd;
2594	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2595	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2596	int i, r;
2597	int bo_dst_size = 16384;
2598	int bo_shader_size = 4096;
2599	int bo_cmd_size = 4096;
2600	struct amdgpu_cs_request ibs_request = {0};
2601	struct amdgpu_cs_ib_info ib_info= {0};
2602	uint32_t expired, hang_state, hangs;
2603	enum cs_type cs_type;
2604	amdgpu_bo_list_handle bo_list;
2605	struct amdgpu_cs_fence fence_status = {0};
2606
2607	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2608	CU_ASSERT_EQUAL(r, 0);
2609
2610	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2611				    AMDGPU_GEM_DOMAIN_GTT, 0,
2612				    &bo_cmd, (void **)&ptr_cmd,
2613				    &mc_address_cmd, &va_cmd);
2614	CU_ASSERT_EQUAL(r, 0);
2615	memset(ptr_cmd, 0, bo_cmd_size);
2616
2617	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2618					AMDGPU_GEM_DOMAIN_VRAM, 0,
2619					&bo_shader, &ptr_shader,
2620					&mc_address_shader, &va_shader);
2621	CU_ASSERT_EQUAL(r, 0);
2622	memset(ptr_shader, 0, bo_shader_size);
2623
2624	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2625	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2626	CU_ASSERT_EQUAL(r, 0);
2627
2628	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2629					AMDGPU_GEM_DOMAIN_VRAM, 0,
2630					&bo_src, (void **)&ptr_src,
2631					&mc_address_src, &va_src);
2632	CU_ASSERT_EQUAL(r, 0);
2633
2634	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2635					AMDGPU_GEM_DOMAIN_VRAM, 0,
2636					&bo_dst, (void **)&ptr_dst,
2637					&mc_address_dst, &va_dst);
2638	CU_ASSERT_EQUAL(r, 0);
2639
2640	memset(ptr_src, 0x55, bo_dst_size);
2641
2642	i = 0;
2643	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2644
2645	/*  Issue commands to set cu mask used in current dispatch */
2646	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2647
2648	/* Writes shader state to HW */
2649	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2650
2651	/* Write constant data */
2652	/* Writes the texture resource constants data to the SGPRs */
2653	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2654	ptr_cmd[i++] = 0x240;
2655	ptr_cmd[i++] = mc_address_src;
2656	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2657	ptr_cmd[i++] = 0x400;
2658	ptr_cmd[i++] = 0x74fac;
2659
2660	/* Writes the UAV constant data to the SGPRs. */
2661	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2662	ptr_cmd[i++] = 0x244;
2663	ptr_cmd[i++] = mc_address_dst;
2664	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2665	ptr_cmd[i++] = 0x400;
2666	ptr_cmd[i++] = 0x74fac;
2667
2668	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2669	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2670	ptr_cmd[i++] = 0x215;
2671	ptr_cmd[i++] = 0;
2672
2673	/* dispatch direct command */
2674	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2675	ptr_cmd[i++] = 0x10;
2676	ptr_cmd[i++] = 1;
2677	ptr_cmd[i++] = 1;
2678	ptr_cmd[i++] = 1;
2679
2680	while (i & 7)
2681		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2682
2683	resources[0] = bo_shader;
2684	resources[1] = bo_src;
2685	resources[2] = bo_dst;
2686	resources[3] = bo_cmd;
2687	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2688	CU_ASSERT_EQUAL(r, 0);
2689
2690	ib_info.ib_mc_address = mc_address_cmd;
2691	ib_info.size = i;
2692	ibs_request.ip_type = ip_type;
2693	ibs_request.ring = ring;
2694	ibs_request.resources = bo_list;
2695	ibs_request.number_of_ibs = 1;
2696	ibs_request.ibs = &ib_info;
2697	ibs_request.fence_info.handle = NULL;
2698	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2699	CU_ASSERT_EQUAL(r, 0);
2700
2701	fence_status.ip_type = ip_type;
2702	fence_status.ip_instance = 0;
2703	fence_status.ring = ring;
2704	fence_status.context = context_handle;
2705	fence_status.fence = ibs_request.seq_no;
2706
2707	/* wait for IB accomplished */
2708	r = amdgpu_cs_query_fence_status(&fence_status,
2709					 AMDGPU_TIMEOUT_INFINITE,
2710					 0, &expired);
2711
2712	if (!hang) {
2713		CU_ASSERT_EQUAL(r, 0);
2714		CU_ASSERT_EQUAL(expired, true);
2715
2716		/* verify if memcpy test result meets with expected */
2717		i = 0;
2718		while(i < bo_dst_size) {
2719			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2720			i++;
2721		}
2722	} else {
2723		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2724		CU_ASSERT_EQUAL(r, 0);
2725		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2726	}
2727
2728	r = amdgpu_bo_list_destroy(bo_list);
2729	CU_ASSERT_EQUAL(r, 0);
2730
2731	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2732	CU_ASSERT_EQUAL(r, 0);
2733	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2734	CU_ASSERT_EQUAL(r, 0);
2735
2736	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2737	CU_ASSERT_EQUAL(r, 0);
2738
2739	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2740	CU_ASSERT_EQUAL(r, 0);
2741
2742	r = amdgpu_cs_ctx_free(context_handle);
2743	CU_ASSERT_EQUAL(r, 0);
2744}
2745
2746static void amdgpu_compute_dispatch_test(void)
2747{
2748	int r;
2749	struct drm_amdgpu_info_hw_ip info;
2750	uint32_t ring_id;
2751
2752	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2753	CU_ASSERT_EQUAL(r, 0);
2754	if (!info.available_rings)
2755		printf("SKIP ... as there's no compute ring\n");
2756
2757	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2758		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2759		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2760	}
2761}
2762
2763static void amdgpu_gfx_dispatch_test(void)
2764{
2765	int r;
2766	struct drm_amdgpu_info_hw_ip info;
2767	uint32_t ring_id;
2768
2769	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2770	CU_ASSERT_EQUAL(r, 0);
2771	if (!info.available_rings)
2772		printf("SKIP ... as there's no graphics ring\n");
2773
2774	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2775		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2776		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2777	}
2778}
2779
2780void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2781{
2782	int r;
2783	struct drm_amdgpu_info_hw_ip info;
2784	uint32_t ring_id;
2785
2786	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2787	CU_ASSERT_EQUAL(r, 0);
2788	if (!info.available_rings)
2789		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2790
2791	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2792		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2793		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2794		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2795	}
2796}
2797
2798static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2799						  uint32_t ip_type, uint32_t ring)
2800{
2801	amdgpu_context_handle context_handle;
2802	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2803	volatile unsigned char *ptr_dst;
2804	void *ptr_shader;
2805	unsigned char *ptr_src;
2806	uint32_t *ptr_cmd;
2807	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2808	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2809	int i, r;
2810	int bo_dst_size = 0x4000000;
2811	int bo_shader_size = 0x400000;
2812	int bo_cmd_size = 4096;
2813	struct amdgpu_cs_request ibs_request = {0};
2814	struct amdgpu_cs_ib_info ib_info= {0};
2815	uint32_t hang_state, hangs, expired;
2816	struct amdgpu_gpu_info gpu_info = {0};
2817	amdgpu_bo_list_handle bo_list;
2818	struct amdgpu_cs_fence fence_status = {0};
2819
2820	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2821	CU_ASSERT_EQUAL(r, 0);
2822
2823	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2824	CU_ASSERT_EQUAL(r, 0);
2825
2826	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2827				    AMDGPU_GEM_DOMAIN_GTT, 0,
2828				    &bo_cmd, (void **)&ptr_cmd,
2829				    &mc_address_cmd, &va_cmd);
2830	CU_ASSERT_EQUAL(r, 0);
2831	memset(ptr_cmd, 0, bo_cmd_size);
2832
2833	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2834					AMDGPU_GEM_DOMAIN_VRAM, 0,
2835					&bo_shader, &ptr_shader,
2836					&mc_address_shader, &va_shader);
2837	CU_ASSERT_EQUAL(r, 0);
2838	memset(ptr_shader, 0, bo_shader_size);
2839
2840	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2841	CU_ASSERT_EQUAL(r, 0);
2842
2843	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2844					AMDGPU_GEM_DOMAIN_VRAM, 0,
2845					&bo_src, (void **)&ptr_src,
2846					&mc_address_src, &va_src);
2847	CU_ASSERT_EQUAL(r, 0);
2848
2849	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2850					AMDGPU_GEM_DOMAIN_VRAM, 0,
2851					&bo_dst, (void **)&ptr_dst,
2852					&mc_address_dst, &va_dst);
2853	CU_ASSERT_EQUAL(r, 0);
2854
2855	memset(ptr_src, 0x55, bo_dst_size);
2856
2857	i = 0;
2858	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2859
2860	/*  Issue commands to set cu mask used in current dispatch */
2861	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2862
2863	/* Writes shader state to HW */
2864	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2865
2866	/* Write constant data */
2867	/* Writes the texture resource constants data to the SGPRs */
2868	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2869	ptr_cmd[i++] = 0x240;
2870	ptr_cmd[i++] = mc_address_src;
2871	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2872	ptr_cmd[i++] = 0x400000;
2873	ptr_cmd[i++] = 0x74fac;
2874
2875	/* Writes the UAV constant data to the SGPRs. */
2876	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2877	ptr_cmd[i++] = 0x244;
2878	ptr_cmd[i++] = mc_address_dst;
2879	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2880	ptr_cmd[i++] = 0x400000;
2881	ptr_cmd[i++] = 0x74fac;
2882
2883	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2884	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2885	ptr_cmd[i++] = 0x215;
2886	ptr_cmd[i++] = 0;
2887
2888	/* dispatch direct command */
2889	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2890	ptr_cmd[i++] = 0x10000;
2891	ptr_cmd[i++] = 1;
2892	ptr_cmd[i++] = 1;
2893	ptr_cmd[i++] = 1;
2894
2895	while (i & 7)
2896		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2897
2898	resources[0] = bo_shader;
2899	resources[1] = bo_src;
2900	resources[2] = bo_dst;
2901	resources[3] = bo_cmd;
2902	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2903	CU_ASSERT_EQUAL(r, 0);
2904
2905	ib_info.ib_mc_address = mc_address_cmd;
2906	ib_info.size = i;
2907	ibs_request.ip_type = ip_type;
2908	ibs_request.ring = ring;
2909	ibs_request.resources = bo_list;
2910	ibs_request.number_of_ibs = 1;
2911	ibs_request.ibs = &ib_info;
2912	ibs_request.fence_info.handle = NULL;
2913	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2914	CU_ASSERT_EQUAL(r, 0);
2915
2916	fence_status.ip_type = ip_type;
2917	fence_status.ip_instance = 0;
2918	fence_status.ring = ring;
2919	fence_status.context = context_handle;
2920	fence_status.fence = ibs_request.seq_no;
2921
2922	/* wait for IB accomplished */
2923	r = amdgpu_cs_query_fence_status(&fence_status,
2924					 AMDGPU_TIMEOUT_INFINITE,
2925					 0, &expired);
2926
2927	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2928	CU_ASSERT_EQUAL(r, 0);
2929	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2930
2931	r = amdgpu_bo_list_destroy(bo_list);
2932	CU_ASSERT_EQUAL(r, 0);
2933
2934	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2935	CU_ASSERT_EQUAL(r, 0);
2936	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2937	CU_ASSERT_EQUAL(r, 0);
2938
2939	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2940	CU_ASSERT_EQUAL(r, 0);
2941
2942	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2943	CU_ASSERT_EQUAL(r, 0);
2944
2945	r = amdgpu_cs_ctx_free(context_handle);
2946	CU_ASSERT_EQUAL(r, 0);
2947}
2948
2949void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2950{
2951	int r;
2952	struct drm_amdgpu_info_hw_ip info;
2953	uint32_t ring_id;
2954
2955	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2956	CU_ASSERT_EQUAL(r, 0);
2957	if (!info.available_rings)
2958		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2959
2960	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2961		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2962		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2963		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2964	}
2965}
2966
2967static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2968{
2969	struct amdgpu_test_shader *shader;
2970	int i, loop = 0x40000;
2971
2972	switch (family) {
2973		case AMDGPU_FAMILY_AI:
2974		case AMDGPU_FAMILY_RV:
2975			shader = &memcpy_ps_hang_slow_ai;
2976			break;
2977		default:
2978			return -1;
2979			break;
2980	}
2981
2982	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2983
2984	for (i = 0; i < loop; i++)
2985		memcpy(ptr + shader->header_length + shader->body_length * i,
2986			shader->shader + shader->header_length,
2987			shader->body_length * sizeof(uint32_t));
2988
2989	memcpy(ptr + shader->header_length + shader->body_length * loop,
2990		shader->shader + shader->header_length + shader->body_length,
2991		shader->foot_length * sizeof(uint32_t));
2992
2993	return 0;
2994}
2995
2996static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
2997{
2998	int i;
2999	uint32_t shader_offset= 256;
3000	uint32_t mem_offset, patch_code_offset;
3001	uint32_t shader_size, patchinfo_code_size;
3002	const uint32_t *shader;
3003	const uint32_t *patchinfo_code;
3004	const uint32_t *patchcode_offset;
3005
3006	switch (ps_type) {
3007		case PS_CONST:
3008			shader = ps_const_shader_gfx9;
3009			shader_size = sizeof(ps_const_shader_gfx9);
3010			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3011			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3012			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3013			break;
3014		case PS_TEX:
3015			shader = ps_tex_shader_gfx9;
3016			shader_size = sizeof(ps_tex_shader_gfx9);
3017			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3018			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3019			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3020			break;
3021		case PS_HANG:
3022			shader = memcpy_ps_hang;
3023			shader_size = sizeof(memcpy_ps_hang);
3024
3025			memcpy(ptr, shader, shader_size);
3026			return 0;
3027		default:
3028			return -1;
3029			break;
3030	}
3031
3032	/* write main shader program */
3033	for (i = 0 ; i < 10; i++) {
3034		mem_offset = i * shader_offset;
3035		memcpy(ptr + mem_offset, shader, shader_size);
3036	}
3037
3038	/* overwrite patch codes */
3039	for (i = 0 ; i < 10; i++) {
3040		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3041		patch_code_offset = i * patchinfo_code_size;
3042		memcpy(ptr + mem_offset,
3043			patchinfo_code + patch_code_offset,
3044			patchinfo_code_size * sizeof(uint32_t));
3045	}
3046
3047	return 0;
3048}
3049
3050/* load RectPosTexFast_VS */
3051static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
3052{
3053	const uint32_t *shader;
3054	uint32_t shader_size;
3055
3056	shader = vs_RectPosTexFast_shader_gfx9;
3057	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3058
3059	memcpy(ptr, shader, shader_size);
3060
3061	return 0;
3062}
3063
3064static int amdgpu_draw_init(uint32_t *ptr)
3065{
3066	int i = 0;
3067	const uint32_t *preamblecache_ptr;
3068	uint32_t preamblecache_size;
3069
3070	/* Write context control and load shadowing register if necessary */
3071	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3072	ptr[i++] = 0x80000000;
3073	ptr[i++] = 0x80000000;
3074
3075	preamblecache_ptr = preamblecache_gfx9;
3076	preamblecache_size = sizeof(preamblecache_gfx9);
3077
3078	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3079	return i + preamblecache_size/sizeof(uint32_t);
3080}
3081
3082static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3083							 uint64_t dst_addr,
3084							 int hang_slow)
3085{
3086	int i = 0;
3087
3088	/* setup color buffer */
3089	/* offset   reg
3090	   0xA318   CB_COLOR0_BASE
3091	   0xA319   CB_COLOR0_BASE_EXT
3092	   0xA31A   CB_COLOR0_ATTRIB2
3093	   0xA31B   CB_COLOR0_VIEW
3094	   0xA31C   CB_COLOR0_INFO
3095	   0xA31D   CB_COLOR0_ATTRIB
3096	   0xA31E   CB_COLOR0_DCC_CONTROL
3097	   0xA31F   CB_COLOR0_CMASK
3098	   0xA320   CB_COLOR0_CMASK_BASE_EXT
3099	   0xA321   CB_COLOR0_FMASK
3100	   0xA322   CB_COLOR0_FMASK_BASE_EXT
3101	   0xA323   CB_COLOR0_CLEAR_WORD0
3102	   0xA324   CB_COLOR0_CLEAR_WORD1
3103	   0xA325   CB_COLOR0_DCC_BASE
3104	   0xA326   CB_COLOR0_DCC_BASE_EXT */
3105	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3106	ptr[i++] = 0x318;
3107	ptr[i++] = dst_addr >> 8;
3108	ptr[i++] = dst_addr >> 40;
3109	ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
3110	ptr[i++] = 0;
3111	ptr[i++] = 0x50438;
3112	ptr[i++] = 0x10140000;
3113	i += 9;
3114
3115	/* mmCB_MRT0_EPITCH */
3116	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3117	ptr[i++] = 0x1e8;
3118	ptr[i++] = hang_slow ? 0x7ff : 0x1f;
3119
3120	/* 0xA32B   CB_COLOR1_BASE */
3121	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3122	ptr[i++] = 0x32b;
3123	ptr[i++] = 0;
3124
3125	/* 0xA33A   CB_COLOR1_BASE */
3126	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3127	ptr[i++] = 0x33a;
3128	ptr[i++] = 0;
3129
3130	/* SPI_SHADER_COL_FORMAT */
3131	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3132	ptr[i++] = 0x1c5;
3133	ptr[i++] = 9;
3134
3135	/* Setup depth buffer */
3136	/* mmDB_Z_INFO */
3137	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3138	ptr[i++] = 0xe;
3139	i += 2;
3140
3141	return i;
3142}
3143
3144static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
3145{
3146	int i = 0;
3147	const uint32_t *cached_cmd_ptr;
3148	uint32_t cached_cmd_size;
3149
3150	/* mmPA_SC_TILE_STEERING_OVERRIDE */
3151	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3152	ptr[i++] = 0xd7;
3153	ptr[i++] = 0;
3154
3155	ptr[i++] = 0xffff1000;
3156	ptr[i++] = 0xc0021000;
3157
3158	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3159	ptr[i++] = 0xd7;
3160	ptr[i++] = 1;
3161
3162	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3163	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3164	ptr[i++] = 0x2fe;
3165	i += 16;
3166
3167	/* mmPA_SC_CENTROID_PRIORITY_0 */
3168	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3169	ptr[i++] = 0x2f5;
3170	i += 2;
3171
3172	cached_cmd_ptr = cached_cmd_gfx9;
3173	cached_cmd_size = sizeof(cached_cmd_gfx9);
3174
3175	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3176	if (hang_slow)
3177		*(ptr + i + 12) = 0x8000800;
3178	i += cached_cmd_size/sizeof(uint32_t);
3179
3180	return i;
3181}
3182
3183static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3184						  int ps_type,
3185						  uint64_t shader_addr,
3186						  int hang_slow)
3187{
3188	int i = 0;
3189
3190	/* mmPA_CL_VS_OUT_CNTL */
3191	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3192	ptr[i++] = 0x207;
3193	ptr[i++] = 0;
3194
3195	/* mmSPI_SHADER_PGM_RSRC3_VS */
3196	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3197	ptr[i++] = 0x46;
3198	ptr[i++] = 0xffff;
3199
3200	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3201	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3202	ptr[i++] = 0x48;
3203	ptr[i++] = shader_addr >> 8;
3204	ptr[i++] = shader_addr >> 40;
3205
3206	/* mmSPI_SHADER_PGM_RSRC1_VS */
3207	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3208	ptr[i++] = 0x4a;
3209	ptr[i++] = 0xc0081;
3210	/* mmSPI_SHADER_PGM_RSRC2_VS */
3211	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3212	ptr[i++] = 0x4b;
3213	ptr[i++] = 0x18;
3214
3215	/* mmSPI_VS_OUT_CONFIG */
3216	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3217	ptr[i++] = 0x1b1;
3218	ptr[i++] = 2;
3219
3220	/* mmSPI_SHADER_POS_FORMAT */
3221	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3222	ptr[i++] = 0x1c3;
3223	ptr[i++] = 4;
3224
3225	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3226	ptr[i++] = 0x4c;
3227	i += 2;
3228	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3229	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3230
3231	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3232	ptr[i++] = 0x50;
3233	i += 2;
3234	if (ps_type == PS_CONST) {
3235		i += 2;
3236	} else if (ps_type == PS_TEX) {
3237		ptr[i++] = 0x3f800000;
3238		ptr[i++] = 0x3f800000;
3239	}
3240
3241	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3242	ptr[i++] = 0x54;
3243	i += 4;
3244
3245	return i;
3246}
3247
3248static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3249				   int ps_type,
3250				   uint64_t shader_addr)
3251{
3252	int i, j;
3253	const uint32_t *sh_registers;
3254	const uint32_t *context_registers;
3255	uint32_t num_sh_reg, num_context_reg;
3256
3257	if (ps_type == PS_CONST) {
3258		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3259		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3260		num_sh_reg = ps_num_sh_registers_gfx9;
3261		num_context_reg = ps_num_context_registers_gfx9;
3262	} else if (ps_type == PS_TEX) {
3263		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3264		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3265		num_sh_reg = ps_num_sh_registers_gfx9;
3266		num_context_reg = ps_num_context_registers_gfx9;
3267	}
3268
3269	i = 0;
3270
3271	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
3272	   0x2c08   SPI_SHADER_PGM_LO_PS
3273	   0x2c09   SPI_SHADER_PGM_HI_PS */
3274	shader_addr += 256 * 9;
3275	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3276	ptr[i++] = 0x7;
3277	ptr[i++] = 0xffff;
3278	ptr[i++] = shader_addr >> 8;
3279	ptr[i++] = shader_addr >> 40;
3280
3281	for (j = 0; j < num_sh_reg; j++) {
3282		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3283		ptr[i++] = sh_registers[j * 2] - 0x2c00;
3284		ptr[i++] = sh_registers[j * 2 + 1];
3285	}
3286
3287	for (j = 0; j < num_context_reg; j++) {
3288		if (context_registers[j * 2] != 0xA1C5) {
3289			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3290			ptr[i++] = context_registers[j * 2] - 0xa000;
3291			ptr[i++] = context_registers[j * 2 + 1];
3292		}
3293
3294		if (context_registers[j * 2] == 0xA1B4) {
3295			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3296			ptr[i++] = 0x1b3;
3297			ptr[i++] = 2;
3298		}
3299	}
3300
3301	return i;
3302}
3303
3304static int amdgpu_draw_draw(uint32_t *ptr)
3305{
3306	int i = 0;
3307
3308	/* mmIA_MULTI_VGT_PARAM */
3309	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3310	ptr[i++] = 0x40000258;
3311	ptr[i++] = 0xd00ff;
3312
3313	/* mmVGT_PRIMITIVE_TYPE */
3314	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3315	ptr[i++] = 0x10000242;
3316	ptr[i++] = 0x11;
3317
3318	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3319	ptr[i++] = 3;
3320	ptr[i++] = 2;
3321
3322	return i;
3323}
3324
3325void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3326			amdgpu_bo_handle bo_shader_ps,
3327			amdgpu_bo_handle bo_shader_vs,
3328			uint64_t mc_address_shader_ps,
3329			uint64_t mc_address_shader_vs,
3330			uint32_t ring_id)
3331{
3332	amdgpu_context_handle context_handle;
3333	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3334	volatile unsigned char *ptr_dst;
3335	uint32_t *ptr_cmd;
3336	uint64_t mc_address_dst, mc_address_cmd;
3337	amdgpu_va_handle va_dst, va_cmd;
3338	int i, r;
3339	int bo_dst_size = 16384;
3340	int bo_cmd_size = 4096;
3341	struct amdgpu_cs_request ibs_request = {0};
3342	struct amdgpu_cs_ib_info ib_info = {0};
3343	struct amdgpu_cs_fence fence_status = {0};
3344	uint32_t expired;
3345	amdgpu_bo_list_handle bo_list;
3346
3347	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3348	CU_ASSERT_EQUAL(r, 0);
3349
3350	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3351					AMDGPU_GEM_DOMAIN_GTT, 0,
3352					&bo_cmd, (void **)&ptr_cmd,
3353					&mc_address_cmd, &va_cmd);
3354	CU_ASSERT_EQUAL(r, 0);
3355	memset(ptr_cmd, 0, bo_cmd_size);
3356
3357	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3358					AMDGPU_GEM_DOMAIN_VRAM, 0,
3359					&bo_dst, (void **)&ptr_dst,
3360					&mc_address_dst, &va_dst);
3361	CU_ASSERT_EQUAL(r, 0);
3362
3363	i = 0;
3364	i += amdgpu_draw_init(ptr_cmd + i);
3365
3366	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3367
3368	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3369
3370	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3371
3372	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3373
3374	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3375	ptr_cmd[i++] = 0xc;
3376	ptr_cmd[i++] = 0x33333333;
3377	ptr_cmd[i++] = 0x33333333;
3378	ptr_cmd[i++] = 0x33333333;
3379	ptr_cmd[i++] = 0x33333333;
3380
3381	i += amdgpu_draw_draw(ptr_cmd + i);
3382
3383	while (i & 7)
3384		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3385
3386	resources[0] = bo_dst;
3387	resources[1] = bo_shader_ps;
3388	resources[2] = bo_shader_vs;
3389	resources[3] = bo_cmd;
3390	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3391	CU_ASSERT_EQUAL(r, 0);
3392
3393	ib_info.ib_mc_address = mc_address_cmd;
3394	ib_info.size = i;
3395	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3396	ibs_request.ring = ring_id;
3397	ibs_request.resources = bo_list;
3398	ibs_request.number_of_ibs = 1;
3399	ibs_request.ibs = &ib_info;
3400	ibs_request.fence_info.handle = NULL;
3401
3402	/* submit CS */
3403	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3404	CU_ASSERT_EQUAL(r, 0);
3405
3406	r = amdgpu_bo_list_destroy(bo_list);
3407	CU_ASSERT_EQUAL(r, 0);
3408
3409	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3410	fence_status.ip_instance = 0;
3411	fence_status.ring = ring_id;
3412	fence_status.context = context_handle;
3413	fence_status.fence = ibs_request.seq_no;
3414
3415	/* wait for IB accomplished */
3416	r = amdgpu_cs_query_fence_status(&fence_status,
3417					 AMDGPU_TIMEOUT_INFINITE,
3418					 0, &expired);
3419	CU_ASSERT_EQUAL(r, 0);
3420	CU_ASSERT_EQUAL(expired, true);
3421
3422	/* verify if memset test result meets with expected */
3423	i = 0;
3424	while(i < bo_dst_size) {
3425		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3426	}
3427
3428	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3429	CU_ASSERT_EQUAL(r, 0);
3430
3431	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3432	CU_ASSERT_EQUAL(r, 0);
3433
3434	r = amdgpu_cs_ctx_free(context_handle);
3435	CU_ASSERT_EQUAL(r, 0);
3436}
3437
3438static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3439				    uint32_t ring)
3440{
3441	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3442	void *ptr_shader_ps;
3443	void *ptr_shader_vs;
3444	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3445	amdgpu_va_handle va_shader_ps, va_shader_vs;
3446	int r;
3447	int bo_shader_size = 4096;
3448
3449	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3450					AMDGPU_GEM_DOMAIN_VRAM, 0,
3451					&bo_shader_ps, &ptr_shader_ps,
3452					&mc_address_shader_ps, &va_shader_ps);
3453	CU_ASSERT_EQUAL(r, 0);
3454	memset(ptr_shader_ps, 0, bo_shader_size);
3455
3456	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3457					AMDGPU_GEM_DOMAIN_VRAM, 0,
3458					&bo_shader_vs, &ptr_shader_vs,
3459					&mc_address_shader_vs, &va_shader_vs);
3460	CU_ASSERT_EQUAL(r, 0);
3461	memset(ptr_shader_vs, 0, bo_shader_size);
3462
3463	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3464	CU_ASSERT_EQUAL(r, 0);
3465
3466	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3467	CU_ASSERT_EQUAL(r, 0);
3468
3469	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3470			mc_address_shader_ps, mc_address_shader_vs, ring);
3471
3472	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3473	CU_ASSERT_EQUAL(r, 0);
3474
3475	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3476	CU_ASSERT_EQUAL(r, 0);
3477}
3478
3479static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3480			       amdgpu_bo_handle bo_shader_ps,
3481			       amdgpu_bo_handle bo_shader_vs,
3482			       uint64_t mc_address_shader_ps,
3483			       uint64_t mc_address_shader_vs,
3484			       uint32_t ring, int hang)
3485{
3486	amdgpu_context_handle context_handle;
3487	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3488	volatile unsigned char *ptr_dst;
3489	unsigned char *ptr_src;
3490	uint32_t *ptr_cmd;
3491	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3492	amdgpu_va_handle va_dst, va_src, va_cmd;
3493	int i, r;
3494	int bo_size = 16384;
3495	int bo_cmd_size = 4096;
3496	struct amdgpu_cs_request ibs_request = {0};
3497	struct amdgpu_cs_ib_info ib_info= {0};
3498	uint32_t hang_state, hangs;
3499	uint32_t expired;
3500	amdgpu_bo_list_handle bo_list;
3501	struct amdgpu_cs_fence fence_status = {0};
3502
3503	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3504	CU_ASSERT_EQUAL(r, 0);
3505
3506	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3507				    AMDGPU_GEM_DOMAIN_GTT, 0,
3508				    &bo_cmd, (void **)&ptr_cmd,
3509				    &mc_address_cmd, &va_cmd);
3510	CU_ASSERT_EQUAL(r, 0);
3511	memset(ptr_cmd, 0, bo_cmd_size);
3512
3513	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3514					AMDGPU_GEM_DOMAIN_VRAM, 0,
3515					&bo_src, (void **)&ptr_src,
3516					&mc_address_src, &va_src);
3517	CU_ASSERT_EQUAL(r, 0);
3518
3519	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3520					AMDGPU_GEM_DOMAIN_VRAM, 0,
3521					&bo_dst, (void **)&ptr_dst,
3522					&mc_address_dst, &va_dst);
3523	CU_ASSERT_EQUAL(r, 0);
3524
3525	memset(ptr_src, 0x55, bo_size);
3526
3527	i = 0;
3528	i += amdgpu_draw_init(ptr_cmd + i);
3529
3530	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3531
3532	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3533
3534	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3535
3536	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3537
3538	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3539	ptr_cmd[i++] = 0xc;
3540	ptr_cmd[i++] = mc_address_src >> 8;
3541	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3542	ptr_cmd[i++] = 0x7c01f;
3543	ptr_cmd[i++] = 0x90500fac;
3544	ptr_cmd[i++] = 0x3e000;
3545	i += 3;
3546
3547	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3548	ptr_cmd[i++] = 0x14;
3549	ptr_cmd[i++] = 0x92;
3550	i += 3;
3551
3552	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3553	ptr_cmd[i++] = 0x191;
3554	ptr_cmd[i++] = 0;
3555
3556	i += amdgpu_draw_draw(ptr_cmd + i);
3557
3558	while (i & 7)
3559		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3560
3561	resources[0] = bo_dst;
3562	resources[1] = bo_src;
3563	resources[2] = bo_shader_ps;
3564	resources[3] = bo_shader_vs;
3565	resources[4] = bo_cmd;
3566	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3567	CU_ASSERT_EQUAL(r, 0);
3568
3569	ib_info.ib_mc_address = mc_address_cmd;
3570	ib_info.size = i;
3571	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3572	ibs_request.ring = ring;
3573	ibs_request.resources = bo_list;
3574	ibs_request.number_of_ibs = 1;
3575	ibs_request.ibs = &ib_info;
3576	ibs_request.fence_info.handle = NULL;
3577	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3578	CU_ASSERT_EQUAL(r, 0);
3579
3580	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3581	fence_status.ip_instance = 0;
3582	fence_status.ring = ring;
3583	fence_status.context = context_handle;
3584	fence_status.fence = ibs_request.seq_no;
3585
3586	/* wait for IB accomplished */
3587	r = amdgpu_cs_query_fence_status(&fence_status,
3588					 AMDGPU_TIMEOUT_INFINITE,
3589					 0, &expired);
3590	if (!hang) {
3591		CU_ASSERT_EQUAL(r, 0);
3592		CU_ASSERT_EQUAL(expired, true);
3593
3594		/* verify if memcpy test result meets with expected */
3595		i = 0;
3596		while(i < bo_size) {
3597			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3598			i++;
3599		}
3600	} else {
3601		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3602		CU_ASSERT_EQUAL(r, 0);
3603		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3604	}
3605
3606	r = amdgpu_bo_list_destroy(bo_list);
3607	CU_ASSERT_EQUAL(r, 0);
3608
3609	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3610	CU_ASSERT_EQUAL(r, 0);
3611	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3612	CU_ASSERT_EQUAL(r, 0);
3613
3614	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3615	CU_ASSERT_EQUAL(r, 0);
3616
3617	r = amdgpu_cs_ctx_free(context_handle);
3618	CU_ASSERT_EQUAL(r, 0);
3619}
3620
3621void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3622			     int hang)
3623{
3624	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3625	void *ptr_shader_ps;
3626	void *ptr_shader_vs;
3627	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3628	amdgpu_va_handle va_shader_ps, va_shader_vs;
3629	int bo_shader_size = 4096;
3630	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3631	int r;
3632
3633	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3634					AMDGPU_GEM_DOMAIN_VRAM, 0,
3635					&bo_shader_ps, &ptr_shader_ps,
3636					&mc_address_shader_ps, &va_shader_ps);
3637	CU_ASSERT_EQUAL(r, 0);
3638	memset(ptr_shader_ps, 0, bo_shader_size);
3639
3640	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3641					AMDGPU_GEM_DOMAIN_VRAM, 0,
3642					&bo_shader_vs, &ptr_shader_vs,
3643					&mc_address_shader_vs, &va_shader_vs);
3644	CU_ASSERT_EQUAL(r, 0);
3645	memset(ptr_shader_vs, 0, bo_shader_size);
3646
3647	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3648	CU_ASSERT_EQUAL(r, 0);
3649
3650	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3651	CU_ASSERT_EQUAL(r, 0);
3652
3653	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3654			mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3655
3656	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3657	CU_ASSERT_EQUAL(r, 0);
3658
3659	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3660	CU_ASSERT_EQUAL(r, 0);
3661}
3662
3663static void amdgpu_draw_test(void)
3664{
3665	int r;
3666	struct drm_amdgpu_info_hw_ip info;
3667	uint32_t ring_id;
3668
3669	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3670	CU_ASSERT_EQUAL(r, 0);
3671	if (!info.available_rings)
3672		printf("SKIP ... as there's no graphics ring\n");
3673
3674	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3675		amdgpu_memset_draw_test(device_handle, ring_id);
3676		amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3677	}
3678}
3679
3680void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3681{
3682	amdgpu_context_handle context_handle;
3683	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3684	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3685	void *ptr_shader_ps;
3686	void *ptr_shader_vs;
3687	volatile unsigned char *ptr_dst;
3688	unsigned char *ptr_src;
3689	uint32_t *ptr_cmd;
3690	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3691	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3692	amdgpu_va_handle va_shader_ps, va_shader_vs;
3693	amdgpu_va_handle va_dst, va_src, va_cmd;
3694	struct amdgpu_gpu_info gpu_info = {0};
3695	int i, r;
3696	int bo_size = 0x4000000;
3697	int bo_shader_ps_size = 0x400000;
3698	int bo_shader_vs_size = 4096;
3699	int bo_cmd_size = 4096;
3700	struct amdgpu_cs_request ibs_request = {0};
3701	struct amdgpu_cs_ib_info ib_info= {0};
3702	uint32_t hang_state, hangs, expired;
3703	amdgpu_bo_list_handle bo_list;
3704	struct amdgpu_cs_fence fence_status = {0};
3705
3706	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3707	CU_ASSERT_EQUAL(r, 0);
3708
3709	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3710	CU_ASSERT_EQUAL(r, 0);
3711
3712	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3713				    AMDGPU_GEM_DOMAIN_GTT, 0,
3714				    &bo_cmd, (void **)&ptr_cmd,
3715				    &mc_address_cmd, &va_cmd);
3716	CU_ASSERT_EQUAL(r, 0);
3717	memset(ptr_cmd, 0, bo_cmd_size);
3718
3719	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3720					AMDGPU_GEM_DOMAIN_VRAM, 0,
3721					&bo_shader_ps, &ptr_shader_ps,
3722					&mc_address_shader_ps, &va_shader_ps);
3723	CU_ASSERT_EQUAL(r, 0);
3724	memset(ptr_shader_ps, 0, bo_shader_ps_size);
3725
3726	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3727					AMDGPU_GEM_DOMAIN_VRAM, 0,
3728					&bo_shader_vs, &ptr_shader_vs,
3729					&mc_address_shader_vs, &va_shader_vs);
3730	CU_ASSERT_EQUAL(r, 0);
3731	memset(ptr_shader_vs, 0, bo_shader_vs_size);
3732
3733	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3734	CU_ASSERT_EQUAL(r, 0);
3735
3736	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3737	CU_ASSERT_EQUAL(r, 0);
3738
3739	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3740					AMDGPU_GEM_DOMAIN_VRAM, 0,
3741					&bo_src, (void **)&ptr_src,
3742					&mc_address_src, &va_src);
3743	CU_ASSERT_EQUAL(r, 0);
3744
3745	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3746					AMDGPU_GEM_DOMAIN_VRAM, 0,
3747					&bo_dst, (void **)&ptr_dst,
3748					&mc_address_dst, &va_dst);
3749	CU_ASSERT_EQUAL(r, 0);
3750
3751	memset(ptr_src, 0x55, bo_size);
3752
3753	i = 0;
3754	i += amdgpu_draw_init(ptr_cmd + i);
3755
3756	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3757
3758	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3759
3760	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3761							mc_address_shader_vs, 1);
3762
3763	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3764
3765	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3766	ptr_cmd[i++] = 0xc;
3767	ptr_cmd[i++] = mc_address_src >> 8;
3768	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3769	ptr_cmd[i++] = 0x1ffc7ff;
3770	ptr_cmd[i++] = 0x90500fac;
3771	ptr_cmd[i++] = 0xffe000;
3772	i += 3;
3773
3774	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3775	ptr_cmd[i++] = 0x14;
3776	ptr_cmd[i++] = 0x92;
3777	i += 3;
3778
3779	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3780	ptr_cmd[i++] = 0x191;
3781	ptr_cmd[i++] = 0;
3782
3783	i += amdgpu_draw_draw(ptr_cmd + i);
3784
3785	while (i & 7)
3786		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3787
3788	resources[0] = bo_dst;
3789	resources[1] = bo_src;
3790	resources[2] = bo_shader_ps;
3791	resources[3] = bo_shader_vs;
3792	resources[4] = bo_cmd;
3793	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3794	CU_ASSERT_EQUAL(r, 0);
3795
3796	ib_info.ib_mc_address = mc_address_cmd;
3797	ib_info.size = i;
3798	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3799	ibs_request.ring = ring;
3800	ibs_request.resources = bo_list;
3801	ibs_request.number_of_ibs = 1;
3802	ibs_request.ibs = &ib_info;
3803	ibs_request.fence_info.handle = NULL;
3804	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3805	CU_ASSERT_EQUAL(r, 0);
3806
3807	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3808	fence_status.ip_instance = 0;
3809	fence_status.ring = ring;
3810	fence_status.context = context_handle;
3811	fence_status.fence = ibs_request.seq_no;
3812
3813	/* wait for IB accomplished */
3814	r = amdgpu_cs_query_fence_status(&fence_status,
3815					 AMDGPU_TIMEOUT_INFINITE,
3816					 0, &expired);
3817
3818	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3819	CU_ASSERT_EQUAL(r, 0);
3820	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3821
3822	r = amdgpu_bo_list_destroy(bo_list);
3823	CU_ASSERT_EQUAL(r, 0);
3824
3825	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3826	CU_ASSERT_EQUAL(r, 0);
3827	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3828	CU_ASSERT_EQUAL(r, 0);
3829
3830	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3831	CU_ASSERT_EQUAL(r, 0);
3832
3833	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3834	CU_ASSERT_EQUAL(r, 0);
3835	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3836	CU_ASSERT_EQUAL(r, 0);
3837
3838	r = amdgpu_cs_ctx_free(context_handle);
3839	CU_ASSERT_EQUAL(r, 0);
3840}
3841
3842static void amdgpu_gpu_reset_test(void)
3843{
3844	int r;
3845	char debugfs_path[256], tmp[10];
3846	int fd;
3847	struct stat sbuf;
3848	amdgpu_context_handle context_handle;
3849	uint32_t hang_state, hangs;
3850
3851	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3852	CU_ASSERT_EQUAL(r, 0);
3853
3854	r = fstat(drm_amdgpu[0], &sbuf);
3855	CU_ASSERT_EQUAL(r, 0);
3856
3857	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3858	fd = open(debugfs_path, O_RDONLY);
3859	CU_ASSERT(fd >= 0);
3860
3861	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3862	CU_ASSERT(r > 0);
3863
3864	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3865	CU_ASSERT_EQUAL(r, 0);
3866	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3867
3868	close(fd);
3869	r = amdgpu_cs_ctx_free(context_handle);
3870	CU_ASSERT_EQUAL(r, 0);
3871
3872	amdgpu_compute_dispatch_test();
3873	amdgpu_gfx_dispatch_test();
3874}
3875