basic_tests.c revision 9bd392ad
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24#include <stdio.h>
25#include <stdlib.h>
26#include <unistd.h>
27#include <sys/types.h>
28#ifdef MAJOR_IN_SYSMACROS
29#include <sys/sysmacros.h>
30#endif
31#include <sys/stat.h>
32#include <fcntl.h>
33#if HAVE_ALLOCA_H
34# include <alloca.h>
35#endif
36#include <sys/wait.h>
37
38#include "CUnit/Basic.h"
39
40#include "amdgpu_test.h"
41#include "amdgpu_drm.h"
42#include "util_math.h"
43
44static  amdgpu_device_handle device_handle;
45static  uint32_t  major_version;
46static  uint32_t  minor_version;
47static  uint32_t  family_id;
48
49static void amdgpu_query_info_test(void);
50static void amdgpu_command_submission_gfx(void);
51static void amdgpu_command_submission_compute(void);
52static void amdgpu_command_submission_multi_fence(void);
53static void amdgpu_command_submission_sdma(void);
54static void amdgpu_userptr_test(void);
55static void amdgpu_semaphore_test(void);
56static void amdgpu_sync_dependency_test(void);
57static void amdgpu_bo_eviction_test(void);
58static void amdgpu_compute_dispatch_test(void);
59static void amdgpu_gfx_dispatch_test(void);
60static void amdgpu_draw_test(void);
61static void amdgpu_gpu_reset_test(void);
62
63static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
64static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
65static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
66static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
67				       unsigned ip_type,
68				       int instance, int pm4_dw, uint32_t *pm4_src,
69				       int res_cnt, amdgpu_bo_handle *resources,
70				       struct amdgpu_cs_ib_info *ib_info,
71				       struct amdgpu_cs_request *ibs_request);
72
73CU_TestInfo basic_tests[] = {
74	{ "Query Info Test",  amdgpu_query_info_test },
75	{ "Userptr Test",  amdgpu_userptr_test },
76	{ "bo eviction Test",  amdgpu_bo_eviction_test },
77	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
78	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
79	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
80	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
81	{ "SW semaphore Test",  amdgpu_semaphore_test },
82	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
83	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
84	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
85	{ "Draw Test",  amdgpu_draw_test },
86	{ "GPU reset Test", amdgpu_gpu_reset_test },
87	CU_TEST_INFO_NULL,
88};
89#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
90#define SDMA_PKT_HEADER_op_offset 0
91#define SDMA_PKT_HEADER_op_mask   0x000000FF
92#define SDMA_PKT_HEADER_op_shift  0
93#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
94#define SDMA_OPCODE_CONSTANT_FILL  11
95#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
96	/* 0 = byte fill
97	 * 2 = DW fill
98	 */
99#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
100					(((sub_op) & 0xFF) << 8) |	\
101					(((op) & 0xFF) << 0))
102#define	SDMA_OPCODE_WRITE				  2
103#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
104#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
105
106#define	SDMA_OPCODE_COPY				  1
107#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
108
109#define GFX_COMPUTE_NOP  0xffff1000
110#define SDMA_NOP  0x0
111
112/* PM4 */
113#define	PACKET_TYPE0	0
114#define	PACKET_TYPE1	1
115#define	PACKET_TYPE2	2
116#define	PACKET_TYPE3	3
117
118#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
119#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
120#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
121#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
122#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
123			 ((reg) & 0xFFFF) |			\
124			 ((n) & 0x3FFF) << 16)
125#define CP_PACKET2			0x80000000
126#define		PACKET2_PAD_SHIFT		0
127#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
128
129#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
130
131#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
132			 (((op) & 0xFF) << 8) |				\
133			 ((n) & 0x3FFF) << 16)
134#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
135
136/* Packet 3 types */
137#define	PACKET3_NOP					0x10
138
139#define	PACKET3_WRITE_DATA				0x37
140#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
141		/* 0 - register
142		 * 1 - memory (sync - via GRBM)
143		 * 2 - gl2
144		 * 3 - gds
145		 * 4 - reserved
146		 * 5 - memory (async - direct)
147		 */
148#define		WR_ONE_ADDR                             (1 << 16)
149#define		WR_CONFIRM                              (1 << 20)
150#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
151		/* 0 - LRU
152		 * 1 - Stream
153		 */
154#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
155		/* 0 - me
156		 * 1 - pfp
157		 * 2 - ce
158		 */
159
160#define	PACKET3_DMA_DATA				0x50
161/* 1. header
162 * 2. CONTROL
163 * 3. SRC_ADDR_LO or DATA [31:0]
164 * 4. SRC_ADDR_HI [31:0]
165 * 5. DST_ADDR_LO [31:0]
166 * 6. DST_ADDR_HI [7:0]
167 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
168 */
169/* CONTROL */
170#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
171		/* 0 - ME
172		 * 1 - PFP
173		 */
174#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
175		/* 0 - LRU
176		 * 1 - Stream
177		 * 2 - Bypass
178		 */
179#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
180#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
181		/* 0 - DST_ADDR using DAS
182		 * 1 - GDS
183		 * 3 - DST_ADDR using L2
184		 */
185#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
186		/* 0 - LRU
187		 * 1 - Stream
188		 * 2 - Bypass
189		 */
190#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
191#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
192		/* 0 - SRC_ADDR using SAS
193		 * 1 - GDS
194		 * 2 - DATA
195		 * 3 - SRC_ADDR using L2
196		 */
197#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
198/* COMMAND */
199#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
200#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
201		/* 0 - none
202		 * 1 - 8 in 16
203		 * 2 - 8 in 32
204		 * 3 - 8 in 64
205		 */
206#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
207		/* 0 - none
208		 * 1 - 8 in 16
209		 * 2 - 8 in 32
210		 * 3 - 8 in 64
211		 */
212#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
213		/* 0 - memory
214		 * 1 - register
215		 */
216#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
217		/* 0 - memory
218		 * 1 - register
219		 */
220#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
221#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
222#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
223
224#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
225						(((b) & 0x1) << 26) |		\
226						(((t) & 0x1) << 23) |		\
227						(((s) & 0x1) << 22) |		\
228						(((cnt) & 0xFFFFF) << 0))
229#define	SDMA_OPCODE_COPY_SI	3
230#define SDMA_OPCODE_CONSTANT_FILL_SI	13
231#define SDMA_NOP_SI  0xf
232#define GFX_COMPUTE_NOP_SI 0x80000000
233#define	PACKET3_DMA_DATA_SI	0x41
234#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
235		/* 0 - ME
236		 * 1 - PFP
237		 */
238#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
239		/* 0 - DST_ADDR using DAS
240		 * 1 - GDS
241		 * 3 - DST_ADDR using L2
242		 */
243#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
244		/* 0 - SRC_ADDR using SAS
245		 * 1 - GDS
246		 * 2 - DATA
247		 * 3 - SRC_ADDR using L2
248		 */
249#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
250
251
252#define PKT3_CONTEXT_CONTROL                   0x28
253#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
254#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
255#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
256
257#define PKT3_CLEAR_STATE                       0x12
258
259#define PKT3_SET_SH_REG                        0x76
260#define		PACKET3_SET_SH_REG_START			0x00002c00
261
262#define	PACKET3_DISPATCH_DIRECT				0x15
263#define PACKET3_EVENT_WRITE				0x46
264#define PACKET3_ACQUIRE_MEM				0x58
265#define PACKET3_SET_CONTEXT_REG				0x69
266#define PACKET3_SET_UCONFIG_REG				0x79
267#define PACKET3_DRAW_INDEX_AUTO				0x2D
268/* gfx 8 */
269#define mmCOMPUTE_PGM_LO                                                        0x2e0c
270#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
271#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
272#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
273#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
274#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
275#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
276
277
278
279#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
280		      ((num & 0x0000ff00) << 8) | \
281		      ((num & 0x00ff0000) >> 8) | \
282		      ((num & 0x000000ff) << 24))
283
284
285/* Shader code
286 * void main()
287{
288
289	float x = some_input;
290		for (unsigned i = 0; i < 1000000; i++)
291  	x = sin(x);
292
293	u[0] = 42u;
294}
295*/
296
297static  uint32_t shader_bin[] = {
298	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
299	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
300	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
301	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
302};
303
304#define CODE_OFFSET 512
305#define DATA_OFFSET 1024
306
307enum cs_type {
308	CS_BUFFERCLEAR,
309	CS_BUFFERCOPY,
310	CS_HANG,
311	CS_HANG_SLOW
312};
313
314static const uint32_t bufferclear_cs_shader_gfx9[] = {
315    0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
316    0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
317    0xBF810000
318};
319
320static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
321	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
322	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
323	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
324	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
325	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
326};
327
328static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
329
330static const uint32_t buffercopy_cs_shader_gfx9[] = {
331    0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
332    0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
333};
334
335static const uint32_t preamblecache_gfx9[] = {
336	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
337	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
338	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
339	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
340	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
341	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
342	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
343	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
344	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
345	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
346	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
347	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
348	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
349	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
350	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
351	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
352	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
353	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
354	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
355	0xc0017900, 0x24b, 0x0
356};
357
358enum ps_type {
359	PS_CONST,
360	PS_TEX,
361	PS_HANG,
362	PS_HANG_SLOW
363};
364
365static const uint32_t ps_const_shader_gfx9[] = {
366    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
367    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
368    0xC4001C0F, 0x00000100, 0xBF810000
369};
370
371static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
372
373static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
374    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
375     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
376     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
377     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
378     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
379     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
380     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
381     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
382     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
383     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
384    }
385};
386
387static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
388    0x00000004
389};
390
391static const uint32_t ps_num_sh_registers_gfx9 = 2;
392
393static const uint32_t ps_const_sh_registers_gfx9[][2] = {
394    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
395    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
396};
397
398static const uint32_t ps_num_context_registers_gfx9 = 7;
399
400static const uint32_t ps_const_context_reg_gfx9[][2] = {
401    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
402    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
403    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
404    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
405    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
406    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
407    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
408};
409
410static const uint32_t ps_tex_shader_gfx9[] = {
411    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
412    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
413    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
414    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
415    0x00000100, 0xBF810000
416};
417
418static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
419    0x0000000B
420};
421
422static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
423
424static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
425    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
426     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
427     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
428     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
429     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
430     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
431     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
432     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
433     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
434     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
435    }
436};
437
438static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
439    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
440    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
441};
442
443static const uint32_t ps_tex_context_reg_gfx9[][2] = {
444    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
445    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
446    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
447    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
448    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
449    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
450    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
451};
452
453static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
454    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
455    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
456    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
457    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
458    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
459    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
460    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
461    0xC400020F, 0x05060403, 0xBF810000
462};
463
464static const uint32_t cached_cmd_gfx9[] = {
465	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
466	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
467	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
468	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
469	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
470	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
471	0xc0026900, 0x292, 0x20, 0x60201b8,
472	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
473};
474
475unsigned int memcpy_ps_hang[] = {
476        0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
477        0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
478        0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
479        0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
480        0xF800180F, 0x03020100, 0xBF810000
481};
482
483struct amdgpu_test_shader {
484	uint32_t *shader;
485	uint32_t header_length;
486	uint32_t body_length;
487	uint32_t foot_length;
488};
489
490unsigned int memcpy_cs_hang_slow_ai_codes[] = {
491    0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
492    0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
493};
494
495struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
496        memcpy_cs_hang_slow_ai_codes,
497        4,
498        3,
499        1
500};
501
502unsigned int memcpy_cs_hang_slow_rv_codes[] = {
503    0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
504    0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
505};
506
507struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
508        memcpy_cs_hang_slow_rv_codes,
509        4,
510        3,
511        1
512};
513
514unsigned int memcpy_ps_hang_slow_ai_codes[] = {
515        0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
516        0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
517        0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
518        0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
519        0x03020100, 0xbf810000
520};
521
522struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
523        memcpy_ps_hang_slow_ai_codes,
524        7,
525        2,
526        9
527};
528
529int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
530			unsigned alignment, unsigned heap, uint64_t alloc_flags,
531			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
532			uint64_t *mc_address,
533			amdgpu_va_handle *va_handle)
534{
535	struct amdgpu_bo_alloc_request request = {};
536	amdgpu_bo_handle buf_handle;
537	amdgpu_va_handle handle;
538	uint64_t vmc_addr;
539	int r;
540
541	request.alloc_size = size;
542	request.phys_alignment = alignment;
543	request.preferred_heap = heap;
544	request.flags = alloc_flags;
545
546	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
547	if (r)
548		return r;
549
550	r = amdgpu_va_range_alloc(dev,
551				  amdgpu_gpu_va_range_general,
552				  size, alignment, 0, &vmc_addr,
553				  &handle, 0);
554	if (r)
555		goto error_va_alloc;
556
557	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
558				   AMDGPU_VM_PAGE_READABLE |
559				   AMDGPU_VM_PAGE_WRITEABLE |
560				   AMDGPU_VM_PAGE_EXECUTABLE |
561				   mapping_flags,
562				   AMDGPU_VA_OP_MAP);
563	if (r)
564		goto error_va_map;
565
566	r = amdgpu_bo_cpu_map(buf_handle, cpu);
567	if (r)
568		goto error_cpu_map;
569
570	*bo = buf_handle;
571	*mc_address = vmc_addr;
572	*va_handle = handle;
573
574	return 0;
575
576 error_cpu_map:
577	amdgpu_bo_cpu_unmap(buf_handle);
578
579 error_va_map:
580	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
581
582 error_va_alloc:
583	amdgpu_bo_free(buf_handle);
584	return r;
585}
586
587
588
589int suite_basic_tests_init(void)
590{
591	struct amdgpu_gpu_info gpu_info = {0};
592	int r;
593
594	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
595				   &minor_version, &device_handle);
596
597	if (r) {
598		if ((r == -EACCES) && (errno == EACCES))
599			printf("\n\nError:%s. "
600				"Hint:Try to run this test program as root.",
601				strerror(errno));
602		return CUE_SINIT_FAILED;
603	}
604
605	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
606	if (r)
607		return CUE_SINIT_FAILED;
608
609	family_id = gpu_info.family_id;
610
611	return CUE_SUCCESS;
612}
613
614int suite_basic_tests_clean(void)
615{
616	int r = amdgpu_device_deinitialize(device_handle);
617
618	if (r == 0)
619		return CUE_SUCCESS;
620	else
621		return CUE_SCLEAN_FAILED;
622}
623
624static void amdgpu_query_info_test(void)
625{
626	struct amdgpu_gpu_info gpu_info = {0};
627	uint32_t version, feature;
628	int r;
629
630	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
631	CU_ASSERT_EQUAL(r, 0);
632
633	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
634					  0, &version, &feature);
635	CU_ASSERT_EQUAL(r, 0);
636}
637
638static void amdgpu_command_submission_gfx_separate_ibs(void)
639{
640	amdgpu_context_handle context_handle;
641	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
642	void *ib_result_cpu, *ib_result_ce_cpu;
643	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
644	struct amdgpu_cs_request ibs_request = {0};
645	struct amdgpu_cs_ib_info ib_info[2];
646	struct amdgpu_cs_fence fence_status = {0};
647	uint32_t *ptr;
648	uint32_t expired;
649	amdgpu_bo_list_handle bo_list;
650	amdgpu_va_handle va_handle, va_handle_ce;
651	int r, i = 0;
652
653	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
654	CU_ASSERT_EQUAL(r, 0);
655
656	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
657				    AMDGPU_GEM_DOMAIN_GTT, 0,
658				    &ib_result_handle, &ib_result_cpu,
659				    &ib_result_mc_address, &va_handle);
660	CU_ASSERT_EQUAL(r, 0);
661
662	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
663				    AMDGPU_GEM_DOMAIN_GTT, 0,
664				    &ib_result_ce_handle, &ib_result_ce_cpu,
665				    &ib_result_ce_mc_address, &va_handle_ce);
666	CU_ASSERT_EQUAL(r, 0);
667
668	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
669			       ib_result_ce_handle, &bo_list);
670	CU_ASSERT_EQUAL(r, 0);
671
672	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
673
674	/* IT_SET_CE_DE_COUNTERS */
675	ptr = ib_result_ce_cpu;
676	if (family_id != AMDGPU_FAMILY_SI) {
677		ptr[i++] = 0xc0008900;
678		ptr[i++] = 0;
679	}
680	ptr[i++] = 0xc0008400;
681	ptr[i++] = 1;
682	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
683	ib_info[0].size = i;
684	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
685
686	/* IT_WAIT_ON_CE_COUNTER */
687	ptr = ib_result_cpu;
688	ptr[0] = 0xc0008600;
689	ptr[1] = 0x00000001;
690	ib_info[1].ib_mc_address = ib_result_mc_address;
691	ib_info[1].size = 2;
692
693	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
694	ibs_request.number_of_ibs = 2;
695	ibs_request.ibs = ib_info;
696	ibs_request.resources = bo_list;
697	ibs_request.fence_info.handle = NULL;
698
699	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
700
701	CU_ASSERT_EQUAL(r, 0);
702
703	fence_status.context = context_handle;
704	fence_status.ip_type = AMDGPU_HW_IP_GFX;
705	fence_status.ip_instance = 0;
706	fence_status.fence = ibs_request.seq_no;
707
708	r = amdgpu_cs_query_fence_status(&fence_status,
709					 AMDGPU_TIMEOUT_INFINITE,
710					 0, &expired);
711	CU_ASSERT_EQUAL(r, 0);
712
713	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
714				     ib_result_mc_address, 4096);
715	CU_ASSERT_EQUAL(r, 0);
716
717	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
718				     ib_result_ce_mc_address, 4096);
719	CU_ASSERT_EQUAL(r, 0);
720
721	r = amdgpu_bo_list_destroy(bo_list);
722	CU_ASSERT_EQUAL(r, 0);
723
724	r = amdgpu_cs_ctx_free(context_handle);
725	CU_ASSERT_EQUAL(r, 0);
726
727}
728
729static void amdgpu_command_submission_gfx_shared_ib(void)
730{
731	amdgpu_context_handle context_handle;
732	amdgpu_bo_handle ib_result_handle;
733	void *ib_result_cpu;
734	uint64_t ib_result_mc_address;
735	struct amdgpu_cs_request ibs_request = {0};
736	struct amdgpu_cs_ib_info ib_info[2];
737	struct amdgpu_cs_fence fence_status = {0};
738	uint32_t *ptr;
739	uint32_t expired;
740	amdgpu_bo_list_handle bo_list;
741	amdgpu_va_handle va_handle;
742	int r, i = 0;
743
744	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
745	CU_ASSERT_EQUAL(r, 0);
746
747	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
748				    AMDGPU_GEM_DOMAIN_GTT, 0,
749				    &ib_result_handle, &ib_result_cpu,
750				    &ib_result_mc_address, &va_handle);
751	CU_ASSERT_EQUAL(r, 0);
752
753	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
754			       &bo_list);
755	CU_ASSERT_EQUAL(r, 0);
756
757	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
758
759	/* IT_SET_CE_DE_COUNTERS */
760	ptr = ib_result_cpu;
761	if (family_id != AMDGPU_FAMILY_SI) {
762		ptr[i++] = 0xc0008900;
763		ptr[i++] = 0;
764	}
765	ptr[i++] = 0xc0008400;
766	ptr[i++] = 1;
767	ib_info[0].ib_mc_address = ib_result_mc_address;
768	ib_info[0].size = i;
769	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
770
771	ptr = (uint32_t *)ib_result_cpu + 4;
772	ptr[0] = 0xc0008600;
773	ptr[1] = 0x00000001;
774	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
775	ib_info[1].size = 2;
776
777	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
778	ibs_request.number_of_ibs = 2;
779	ibs_request.ibs = ib_info;
780	ibs_request.resources = bo_list;
781	ibs_request.fence_info.handle = NULL;
782
783	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
784
785	CU_ASSERT_EQUAL(r, 0);
786
787	fence_status.context = context_handle;
788	fence_status.ip_type = AMDGPU_HW_IP_GFX;
789	fence_status.ip_instance = 0;
790	fence_status.fence = ibs_request.seq_no;
791
792	r = amdgpu_cs_query_fence_status(&fence_status,
793					 AMDGPU_TIMEOUT_INFINITE,
794					 0, &expired);
795	CU_ASSERT_EQUAL(r, 0);
796
797	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
798				     ib_result_mc_address, 4096);
799	CU_ASSERT_EQUAL(r, 0);
800
801	r = amdgpu_bo_list_destroy(bo_list);
802	CU_ASSERT_EQUAL(r, 0);
803
804	r = amdgpu_cs_ctx_free(context_handle);
805	CU_ASSERT_EQUAL(r, 0);
806}
807
808static void amdgpu_command_submission_gfx_cp_write_data(void)
809{
810	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
811}
812
813static void amdgpu_command_submission_gfx_cp_const_fill(void)
814{
815	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
816}
817
818static void amdgpu_command_submission_gfx_cp_copy_data(void)
819{
820	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
821}
822
823static void amdgpu_bo_eviction_test(void)
824{
825	const int sdma_write_length = 1024;
826	const int pm4_dw = 256;
827	amdgpu_context_handle context_handle;
828	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
829	amdgpu_bo_handle *resources;
830	uint32_t *pm4;
831	struct amdgpu_cs_ib_info *ib_info;
832	struct amdgpu_cs_request *ibs_request;
833	uint64_t bo1_mc, bo2_mc;
834	volatile unsigned char *bo1_cpu, *bo2_cpu;
835	int i, j, r, loop1, loop2;
836	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
837	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
838	struct amdgpu_heap_info vram_info, gtt_info;
839
840	pm4 = calloc(pm4_dw, sizeof(*pm4));
841	CU_ASSERT_NOT_EQUAL(pm4, NULL);
842
843	ib_info = calloc(1, sizeof(*ib_info));
844	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
845
846	ibs_request = calloc(1, sizeof(*ibs_request));
847	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
848
849	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
850	CU_ASSERT_EQUAL(r, 0);
851
852	/* prepare resource */
853	resources = calloc(4, sizeof(amdgpu_bo_handle));
854	CU_ASSERT_NOT_EQUAL(resources, NULL);
855
856	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
857				   0, &vram_info);
858	CU_ASSERT_EQUAL(r, 0);
859
860	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
861				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
862	CU_ASSERT_EQUAL(r, 0);
863	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
864				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
865	CU_ASSERT_EQUAL(r, 0);
866
867	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
868				   0, &gtt_info);
869	CU_ASSERT_EQUAL(r, 0);
870
871	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
872				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
873	CU_ASSERT_EQUAL(r, 0);
874	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
875				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
876	CU_ASSERT_EQUAL(r, 0);
877
878
879
880	loop1 = loop2 = 0;
881	/* run 9 circle to test all mapping combination */
882	while(loop1 < 2) {
883		while(loop2 < 2) {
884			/* allocate UC bo1for sDMA use */
885			r = amdgpu_bo_alloc_and_map(device_handle,
886						    sdma_write_length, 4096,
887						    AMDGPU_GEM_DOMAIN_GTT,
888						    gtt_flags[loop1], &bo1,
889						    (void**)&bo1_cpu, &bo1_mc,
890						    &bo1_va_handle);
891			CU_ASSERT_EQUAL(r, 0);
892
893			/* set bo1 */
894			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
895
896			/* allocate UC bo2 for sDMA use */
897			r = amdgpu_bo_alloc_and_map(device_handle,
898						    sdma_write_length, 4096,
899						    AMDGPU_GEM_DOMAIN_GTT,
900						    gtt_flags[loop2], &bo2,
901						    (void**)&bo2_cpu, &bo2_mc,
902						    &bo2_va_handle);
903			CU_ASSERT_EQUAL(r, 0);
904
905			/* clear bo2 */
906			memset((void*)bo2_cpu, 0, sdma_write_length);
907
908			resources[0] = bo1;
909			resources[1] = bo2;
910			resources[2] = vram_max[loop2];
911			resources[3] = gtt_max[loop2];
912
913			/* fulfill PM4: test DMA copy linear */
914			i = j = 0;
915			if (family_id == AMDGPU_FAMILY_SI) {
916				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
917							  sdma_write_length);
918				pm4[i++] = 0xffffffff & bo2_mc;
919				pm4[i++] = 0xffffffff & bo1_mc;
920				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
921				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
922			} else {
923				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
924				if (family_id >= AMDGPU_FAMILY_AI)
925					pm4[i++] = sdma_write_length - 1;
926				else
927					pm4[i++] = sdma_write_length;
928				pm4[i++] = 0;
929				pm4[i++] = 0xffffffff & bo1_mc;
930				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
931				pm4[i++] = 0xffffffff & bo2_mc;
932				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
933			}
934
935			amdgpu_test_exec_cs_helper(context_handle,
936						   AMDGPU_HW_IP_DMA, 0,
937						   i, pm4,
938						   4, resources,
939						   ib_info, ibs_request);
940
941			/* verify if SDMA test result meets with expected */
942			i = 0;
943			while(i < sdma_write_length) {
944				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
945			}
946			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
947						     sdma_write_length);
948			CU_ASSERT_EQUAL(r, 0);
949			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
950						     sdma_write_length);
951			CU_ASSERT_EQUAL(r, 0);
952			loop2++;
953		}
954		loop2 = 0;
955		loop1++;
956	}
957	amdgpu_bo_free(vram_max[0]);
958	amdgpu_bo_free(vram_max[1]);
959	amdgpu_bo_free(gtt_max[0]);
960	amdgpu_bo_free(gtt_max[1]);
961	/* clean resources */
962	free(resources);
963	free(ibs_request);
964	free(ib_info);
965	free(pm4);
966
967	/* end of test */
968	r = amdgpu_cs_ctx_free(context_handle);
969	CU_ASSERT_EQUAL(r, 0);
970}
971
972
973static void amdgpu_command_submission_gfx(void)
974{
975	/* write data using the CP */
976	amdgpu_command_submission_gfx_cp_write_data();
977	/* const fill using the CP */
978	amdgpu_command_submission_gfx_cp_const_fill();
979	/* copy data using the CP */
980	amdgpu_command_submission_gfx_cp_copy_data();
981	/* separate IB buffers for multi-IB submission */
982	amdgpu_command_submission_gfx_separate_ibs();
983	/* shared IB buffer for multi-IB submission */
984	amdgpu_command_submission_gfx_shared_ib();
985}
986
987static void amdgpu_semaphore_test(void)
988{
989	amdgpu_context_handle context_handle[2];
990	amdgpu_semaphore_handle sem;
991	amdgpu_bo_handle ib_result_handle[2];
992	void *ib_result_cpu[2];
993	uint64_t ib_result_mc_address[2];
994	struct amdgpu_cs_request ibs_request[2] = {0};
995	struct amdgpu_cs_ib_info ib_info[2] = {0};
996	struct amdgpu_cs_fence fence_status = {0};
997	uint32_t *ptr;
998	uint32_t expired;
999	uint32_t sdma_nop, gfx_nop;
1000	amdgpu_bo_list_handle bo_list[2];
1001	amdgpu_va_handle va_handle[2];
1002	int r, i;
1003
1004	if (family_id == AMDGPU_FAMILY_SI) {
1005		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1006		gfx_nop = GFX_COMPUTE_NOP_SI;
1007	} else {
1008		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1009		gfx_nop = GFX_COMPUTE_NOP;
1010	}
1011
1012	r = amdgpu_cs_create_semaphore(&sem);
1013	CU_ASSERT_EQUAL(r, 0);
1014	for (i = 0; i < 2; i++) {
1015		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1016		CU_ASSERT_EQUAL(r, 0);
1017
1018		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1019					    AMDGPU_GEM_DOMAIN_GTT, 0,
1020					    &ib_result_handle[i], &ib_result_cpu[i],
1021					    &ib_result_mc_address[i], &va_handle[i]);
1022		CU_ASSERT_EQUAL(r, 0);
1023
1024		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1025				       NULL, &bo_list[i]);
1026		CU_ASSERT_EQUAL(r, 0);
1027	}
1028
1029	/* 1. same context different engine */
1030	ptr = ib_result_cpu[0];
1031	ptr[0] = sdma_nop;
1032	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1033	ib_info[0].size = 1;
1034
1035	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1036	ibs_request[0].number_of_ibs = 1;
1037	ibs_request[0].ibs = &ib_info[0];
1038	ibs_request[0].resources = bo_list[0];
1039	ibs_request[0].fence_info.handle = NULL;
1040	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1041	CU_ASSERT_EQUAL(r, 0);
1042	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1043	CU_ASSERT_EQUAL(r, 0);
1044
1045	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1046	CU_ASSERT_EQUAL(r, 0);
1047	ptr = ib_result_cpu[1];
1048	ptr[0] = gfx_nop;
1049	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1050	ib_info[1].size = 1;
1051
1052	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1053	ibs_request[1].number_of_ibs = 1;
1054	ibs_request[1].ibs = &ib_info[1];
1055	ibs_request[1].resources = bo_list[1];
1056	ibs_request[1].fence_info.handle = NULL;
1057
1058	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1059	CU_ASSERT_EQUAL(r, 0);
1060
1061	fence_status.context = context_handle[0];
1062	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1063	fence_status.ip_instance = 0;
1064	fence_status.fence = ibs_request[1].seq_no;
1065	r = amdgpu_cs_query_fence_status(&fence_status,
1066					 500000000, 0, &expired);
1067	CU_ASSERT_EQUAL(r, 0);
1068	CU_ASSERT_EQUAL(expired, true);
1069
1070	/* 2. same engine different context */
1071	ptr = ib_result_cpu[0];
1072	ptr[0] = gfx_nop;
1073	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1074	ib_info[0].size = 1;
1075
1076	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
1077	ibs_request[0].number_of_ibs = 1;
1078	ibs_request[0].ibs = &ib_info[0];
1079	ibs_request[0].resources = bo_list[0];
1080	ibs_request[0].fence_info.handle = NULL;
1081	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1082	CU_ASSERT_EQUAL(r, 0);
1083	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1084	CU_ASSERT_EQUAL(r, 0);
1085
1086	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
1087	CU_ASSERT_EQUAL(r, 0);
1088	ptr = ib_result_cpu[1];
1089	ptr[0] = gfx_nop;
1090	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1091	ib_info[1].size = 1;
1092
1093	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1094	ibs_request[1].number_of_ibs = 1;
1095	ibs_request[1].ibs = &ib_info[1];
1096	ibs_request[1].resources = bo_list[1];
1097	ibs_request[1].fence_info.handle = NULL;
1098	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1099
1100	CU_ASSERT_EQUAL(r, 0);
1101
1102	fence_status.context = context_handle[1];
1103	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1104	fence_status.ip_instance = 0;
1105	fence_status.fence = ibs_request[1].seq_no;
1106	r = amdgpu_cs_query_fence_status(&fence_status,
1107					 500000000, 0, &expired);
1108	CU_ASSERT_EQUAL(r, 0);
1109	CU_ASSERT_EQUAL(expired, true);
1110
1111	for (i = 0; i < 2; i++) {
1112		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1113					     ib_result_mc_address[i], 4096);
1114		CU_ASSERT_EQUAL(r, 0);
1115
1116		r = amdgpu_bo_list_destroy(bo_list[i]);
1117		CU_ASSERT_EQUAL(r, 0);
1118
1119		r = amdgpu_cs_ctx_free(context_handle[i]);
1120		CU_ASSERT_EQUAL(r, 0);
1121	}
1122
1123	r = amdgpu_cs_destroy_semaphore(sem);
1124	CU_ASSERT_EQUAL(r, 0);
1125}
1126
1127static void amdgpu_command_submission_compute_nop(void)
1128{
1129	amdgpu_context_handle context_handle;
1130	amdgpu_bo_handle ib_result_handle;
1131	void *ib_result_cpu;
1132	uint64_t ib_result_mc_address;
1133	struct amdgpu_cs_request ibs_request;
1134	struct amdgpu_cs_ib_info ib_info;
1135	struct amdgpu_cs_fence fence_status;
1136	uint32_t *ptr;
1137	uint32_t expired;
1138	int r, instance;
1139	amdgpu_bo_list_handle bo_list;
1140	amdgpu_va_handle va_handle;
1141	struct drm_amdgpu_info_hw_ip info;
1142
1143	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1144	CU_ASSERT_EQUAL(r, 0);
1145
1146	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1147	CU_ASSERT_EQUAL(r, 0);
1148
1149	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1150		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1151					    AMDGPU_GEM_DOMAIN_GTT, 0,
1152					    &ib_result_handle, &ib_result_cpu,
1153					    &ib_result_mc_address, &va_handle);
1154		CU_ASSERT_EQUAL(r, 0);
1155
1156		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1157				       &bo_list);
1158		CU_ASSERT_EQUAL(r, 0);
1159
1160		ptr = ib_result_cpu;
1161		memset(ptr, 0, 16);
1162		ptr[0]=PACKET3(PACKET3_NOP, 14);
1163
1164		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1165		ib_info.ib_mc_address = ib_result_mc_address;
1166		ib_info.size = 16;
1167
1168		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1169		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1170		ibs_request.ring = instance;
1171		ibs_request.number_of_ibs = 1;
1172		ibs_request.ibs = &ib_info;
1173		ibs_request.resources = bo_list;
1174		ibs_request.fence_info.handle = NULL;
1175
1176		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1177		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1178		CU_ASSERT_EQUAL(r, 0);
1179
1180		fence_status.context = context_handle;
1181		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1182		fence_status.ip_instance = 0;
1183		fence_status.ring = instance;
1184		fence_status.fence = ibs_request.seq_no;
1185
1186		r = amdgpu_cs_query_fence_status(&fence_status,
1187						 AMDGPU_TIMEOUT_INFINITE,
1188						 0, &expired);
1189		CU_ASSERT_EQUAL(r, 0);
1190
1191		r = amdgpu_bo_list_destroy(bo_list);
1192		CU_ASSERT_EQUAL(r, 0);
1193
1194		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1195					     ib_result_mc_address, 4096);
1196		CU_ASSERT_EQUAL(r, 0);
1197	}
1198
1199	r = amdgpu_cs_ctx_free(context_handle);
1200	CU_ASSERT_EQUAL(r, 0);
1201}
1202
1203static void amdgpu_command_submission_compute_cp_write_data(void)
1204{
1205	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1206}
1207
1208static void amdgpu_command_submission_compute_cp_const_fill(void)
1209{
1210	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1211}
1212
1213static void amdgpu_command_submission_compute_cp_copy_data(void)
1214{
1215	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1216}
1217
1218static void amdgpu_command_submission_compute(void)
1219{
1220	/* write data using the CP */
1221	amdgpu_command_submission_compute_cp_write_data();
1222	/* const fill using the CP */
1223	amdgpu_command_submission_compute_cp_const_fill();
1224	/* copy data using the CP */
1225	amdgpu_command_submission_compute_cp_copy_data();
1226	/* nop test */
1227	amdgpu_command_submission_compute_nop();
1228}
1229
1230/*
1231 * caller need create/release:
1232 * pm4_src, resources, ib_info, and ibs_request
1233 * submit command stream described in ibs_request and wait for this IB accomplished
1234 */
1235static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1236				       unsigned ip_type,
1237				       int instance, int pm4_dw, uint32_t *pm4_src,
1238				       int res_cnt, amdgpu_bo_handle *resources,
1239				       struct amdgpu_cs_ib_info *ib_info,
1240				       struct amdgpu_cs_request *ibs_request)
1241{
1242	int r;
1243	uint32_t expired;
1244	uint32_t *ring_ptr;
1245	amdgpu_bo_handle ib_result_handle;
1246	void *ib_result_cpu;
1247	uint64_t ib_result_mc_address;
1248	struct amdgpu_cs_fence fence_status = {0};
1249	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1250	amdgpu_va_handle va_handle;
1251
1252	/* prepare CS */
1253	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1254	CU_ASSERT_NOT_EQUAL(resources, NULL);
1255	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1256	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1257	CU_ASSERT_TRUE(pm4_dw <= 1024);
1258
1259	/* allocate IB */
1260	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1261				    AMDGPU_GEM_DOMAIN_GTT, 0,
1262				    &ib_result_handle, &ib_result_cpu,
1263				    &ib_result_mc_address, &va_handle);
1264	CU_ASSERT_EQUAL(r, 0);
1265
1266	/* copy PM4 packet to ring from caller */
1267	ring_ptr = ib_result_cpu;
1268	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1269
1270	ib_info->ib_mc_address = ib_result_mc_address;
1271	ib_info->size = pm4_dw;
1272
1273	ibs_request->ip_type = ip_type;
1274	ibs_request->ring = instance;
1275	ibs_request->number_of_ibs = 1;
1276	ibs_request->ibs = ib_info;
1277	ibs_request->fence_info.handle = NULL;
1278
1279	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1280	all_res[res_cnt] = ib_result_handle;
1281
1282	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1283				  NULL, &ibs_request->resources);
1284	CU_ASSERT_EQUAL(r, 0);
1285
1286	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1287
1288	/* submit CS */
1289	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1290	CU_ASSERT_EQUAL(r, 0);
1291
1292	r = amdgpu_bo_list_destroy(ibs_request->resources);
1293	CU_ASSERT_EQUAL(r, 0);
1294
1295	fence_status.ip_type = ip_type;
1296	fence_status.ip_instance = 0;
1297	fence_status.ring = ibs_request->ring;
1298	fence_status.context = context_handle;
1299	fence_status.fence = ibs_request->seq_no;
1300
1301	/* wait for IB accomplished */
1302	r = amdgpu_cs_query_fence_status(&fence_status,
1303					 AMDGPU_TIMEOUT_INFINITE,
1304					 0, &expired);
1305	CU_ASSERT_EQUAL(r, 0);
1306	CU_ASSERT_EQUAL(expired, true);
1307
1308	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1309				     ib_result_mc_address, 4096);
1310	CU_ASSERT_EQUAL(r, 0);
1311}
1312
1313static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1314{
1315	const int sdma_write_length = 128;
1316	const int pm4_dw = 256;
1317	amdgpu_context_handle context_handle;
1318	amdgpu_bo_handle bo;
1319	amdgpu_bo_handle *resources;
1320	uint32_t *pm4;
1321	struct amdgpu_cs_ib_info *ib_info;
1322	struct amdgpu_cs_request *ibs_request;
1323	uint64_t bo_mc;
1324	volatile uint32_t *bo_cpu;
1325	int i, j, r, loop, ring_id;
1326	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1327	amdgpu_va_handle va_handle;
1328	struct drm_amdgpu_info_hw_ip hw_ip_info;
1329
1330	pm4 = calloc(pm4_dw, sizeof(*pm4));
1331	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1332
1333	ib_info = calloc(1, sizeof(*ib_info));
1334	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1335
1336	ibs_request = calloc(1, sizeof(*ibs_request));
1337	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1338
1339	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1340	CU_ASSERT_EQUAL(r, 0);
1341
1342	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1343	CU_ASSERT_EQUAL(r, 0);
1344
1345	/* prepare resource */
1346	resources = calloc(1, sizeof(amdgpu_bo_handle));
1347	CU_ASSERT_NOT_EQUAL(resources, NULL);
1348
1349	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1350		loop = 0;
1351		while(loop < 2) {
1352			/* allocate UC bo for sDMA use */
1353			r = amdgpu_bo_alloc_and_map(device_handle,
1354						    sdma_write_length * sizeof(uint32_t),
1355						    4096, AMDGPU_GEM_DOMAIN_GTT,
1356						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1357						    &bo_mc, &va_handle);
1358			CU_ASSERT_EQUAL(r, 0);
1359
1360			/* clear bo */
1361			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1362
1363			resources[0] = bo;
1364
1365			/* fulfill PM4: test DMA write-linear */
1366			i = j = 0;
1367			if (ip_type == AMDGPU_HW_IP_DMA) {
1368				if (family_id == AMDGPU_FAMILY_SI)
1369					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1370								  sdma_write_length);
1371				else
1372					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1373							       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1374				pm4[i++] = 0xffffffff & bo_mc;
1375				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1376				if (family_id >= AMDGPU_FAMILY_AI)
1377					pm4[i++] = sdma_write_length - 1;
1378				else if (family_id != AMDGPU_FAMILY_SI)
1379					pm4[i++] = sdma_write_length;
1380				while(j++ < sdma_write_length)
1381					pm4[i++] = 0xdeadbeaf;
1382			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1383				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1384				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1385				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1386				pm4[i++] = 0xfffffffc & bo_mc;
1387				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1388				while(j++ < sdma_write_length)
1389					pm4[i++] = 0xdeadbeaf;
1390			}
1391
1392			amdgpu_test_exec_cs_helper(context_handle,
1393						   ip_type, ring_id,
1394						   i, pm4,
1395						   1, resources,
1396						   ib_info, ibs_request);
1397
1398			/* verify if SDMA test result meets with expected */
1399			i = 0;
1400			while(i < sdma_write_length) {
1401				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1402			}
1403
1404			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1405						     sdma_write_length * sizeof(uint32_t));
1406			CU_ASSERT_EQUAL(r, 0);
1407			loop++;
1408		}
1409	}
1410	/* clean resources */
1411	free(resources);
1412	free(ibs_request);
1413	free(ib_info);
1414	free(pm4);
1415
1416	/* end of test */
1417	r = amdgpu_cs_ctx_free(context_handle);
1418	CU_ASSERT_EQUAL(r, 0);
1419}
1420
1421static void amdgpu_command_submission_sdma_write_linear(void)
1422{
1423	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1424}
1425
1426static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1427{
1428	const int sdma_write_length = 1024 * 1024;
1429	const int pm4_dw = 256;
1430	amdgpu_context_handle context_handle;
1431	amdgpu_bo_handle bo;
1432	amdgpu_bo_handle *resources;
1433	uint32_t *pm4;
1434	struct amdgpu_cs_ib_info *ib_info;
1435	struct amdgpu_cs_request *ibs_request;
1436	uint64_t bo_mc;
1437	volatile uint32_t *bo_cpu;
1438	int i, j, r, loop, ring_id;
1439	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1440	amdgpu_va_handle va_handle;
1441	struct drm_amdgpu_info_hw_ip hw_ip_info;
1442
1443	pm4 = calloc(pm4_dw, sizeof(*pm4));
1444	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1445
1446	ib_info = calloc(1, sizeof(*ib_info));
1447	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1448
1449	ibs_request = calloc(1, sizeof(*ibs_request));
1450	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1451
1452	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1453	CU_ASSERT_EQUAL(r, 0);
1454
1455	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1456	CU_ASSERT_EQUAL(r, 0);
1457
1458	/* prepare resource */
1459	resources = calloc(1, sizeof(amdgpu_bo_handle));
1460	CU_ASSERT_NOT_EQUAL(resources, NULL);
1461
1462	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1463		loop = 0;
1464		while(loop < 2) {
1465			/* allocate UC bo for sDMA use */
1466			r = amdgpu_bo_alloc_and_map(device_handle,
1467						    sdma_write_length, 4096,
1468						    AMDGPU_GEM_DOMAIN_GTT,
1469						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1470						    &bo_mc, &va_handle);
1471			CU_ASSERT_EQUAL(r, 0);
1472
1473			/* clear bo */
1474			memset((void*)bo_cpu, 0, sdma_write_length);
1475
1476			resources[0] = bo;
1477
1478			/* fulfill PM4: test DMA const fill */
1479			i = j = 0;
1480			if (ip_type == AMDGPU_HW_IP_DMA) {
1481				if (family_id == AMDGPU_FAMILY_SI) {
1482					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1483								  0, 0, 0,
1484								  sdma_write_length / 4);
1485					pm4[i++] = 0xfffffffc & bo_mc;
1486					pm4[i++] = 0xdeadbeaf;
1487					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1488				} else {
1489					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1490							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1491					pm4[i++] = 0xffffffff & bo_mc;
1492					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1493					pm4[i++] = 0xdeadbeaf;
1494					if (family_id >= AMDGPU_FAMILY_AI)
1495						pm4[i++] = sdma_write_length - 1;
1496					else
1497						pm4[i++] = sdma_write_length;
1498				}
1499			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1500				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1501				if (family_id == AMDGPU_FAMILY_SI) {
1502					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1503					pm4[i++] = 0xdeadbeaf;
1504					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1505						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1506						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1507						   PACKET3_DMA_DATA_SI_CP_SYNC;
1508					pm4[i++] = 0xffffffff & bo_mc;
1509					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1510					pm4[i++] = sdma_write_length;
1511				} else {
1512					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1513					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1514						   PACKET3_DMA_DATA_DST_SEL(0) |
1515						   PACKET3_DMA_DATA_SRC_SEL(2) |
1516						   PACKET3_DMA_DATA_CP_SYNC;
1517					pm4[i++] = 0xdeadbeaf;
1518					pm4[i++] = 0;
1519					pm4[i++] = 0xfffffffc & bo_mc;
1520					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1521					pm4[i++] = sdma_write_length;
1522				}
1523			}
1524
1525			amdgpu_test_exec_cs_helper(context_handle,
1526						   ip_type, ring_id,
1527						   i, pm4,
1528						   1, resources,
1529						   ib_info, ibs_request);
1530
1531			/* verify if SDMA test result meets with expected */
1532			i = 0;
1533			while(i < (sdma_write_length / 4)) {
1534				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1535			}
1536
1537			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1538						     sdma_write_length);
1539			CU_ASSERT_EQUAL(r, 0);
1540			loop++;
1541		}
1542	}
1543	/* clean resources */
1544	free(resources);
1545	free(ibs_request);
1546	free(ib_info);
1547	free(pm4);
1548
1549	/* end of test */
1550	r = amdgpu_cs_ctx_free(context_handle);
1551	CU_ASSERT_EQUAL(r, 0);
1552}
1553
1554static void amdgpu_command_submission_sdma_const_fill(void)
1555{
1556	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1557}
1558
1559static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1560{
1561	const int sdma_write_length = 1024;
1562	const int pm4_dw = 256;
1563	amdgpu_context_handle context_handle;
1564	amdgpu_bo_handle bo1, bo2;
1565	amdgpu_bo_handle *resources;
1566	uint32_t *pm4;
1567	struct amdgpu_cs_ib_info *ib_info;
1568	struct amdgpu_cs_request *ibs_request;
1569	uint64_t bo1_mc, bo2_mc;
1570	volatile unsigned char *bo1_cpu, *bo2_cpu;
1571	int i, j, r, loop1, loop2, ring_id;
1572	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1573	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1574	struct drm_amdgpu_info_hw_ip hw_ip_info;
1575
1576	pm4 = calloc(pm4_dw, sizeof(*pm4));
1577	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1578
1579	ib_info = calloc(1, sizeof(*ib_info));
1580	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1581
1582	ibs_request = calloc(1, sizeof(*ibs_request));
1583	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1584
1585	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1586	CU_ASSERT_EQUAL(r, 0);
1587
1588	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1589	CU_ASSERT_EQUAL(r, 0);
1590
1591	/* prepare resource */
1592	resources = calloc(2, sizeof(amdgpu_bo_handle));
1593	CU_ASSERT_NOT_EQUAL(resources, NULL);
1594
1595	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1596		loop1 = loop2 = 0;
1597		/* run 9 circle to test all mapping combination */
1598		while(loop1 < 2) {
1599			while(loop2 < 2) {
1600				/* allocate UC bo1for sDMA use */
1601				r = amdgpu_bo_alloc_and_map(device_handle,
1602							    sdma_write_length, 4096,
1603							    AMDGPU_GEM_DOMAIN_GTT,
1604							    gtt_flags[loop1], &bo1,
1605							    (void**)&bo1_cpu, &bo1_mc,
1606							    &bo1_va_handle);
1607				CU_ASSERT_EQUAL(r, 0);
1608
1609				/* set bo1 */
1610				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1611
1612				/* allocate UC bo2 for sDMA use */
1613				r = amdgpu_bo_alloc_and_map(device_handle,
1614							    sdma_write_length, 4096,
1615							    AMDGPU_GEM_DOMAIN_GTT,
1616							    gtt_flags[loop2], &bo2,
1617							    (void**)&bo2_cpu, &bo2_mc,
1618							    &bo2_va_handle);
1619				CU_ASSERT_EQUAL(r, 0);
1620
1621				/* clear bo2 */
1622				memset((void*)bo2_cpu, 0, sdma_write_length);
1623
1624				resources[0] = bo1;
1625				resources[1] = bo2;
1626
1627				/* fulfill PM4: test DMA copy linear */
1628				i = j = 0;
1629				if (ip_type == AMDGPU_HW_IP_DMA) {
1630					if (family_id == AMDGPU_FAMILY_SI) {
1631						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1632									  0, 0, 0,
1633									  sdma_write_length);
1634						pm4[i++] = 0xffffffff & bo2_mc;
1635						pm4[i++] = 0xffffffff & bo1_mc;
1636						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1637						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1638					} else {
1639						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1640								       SDMA_COPY_SUB_OPCODE_LINEAR,
1641								       0);
1642						if (family_id >= AMDGPU_FAMILY_AI)
1643							pm4[i++] = sdma_write_length - 1;
1644						else
1645							pm4[i++] = sdma_write_length;
1646						pm4[i++] = 0;
1647						pm4[i++] = 0xffffffff & bo1_mc;
1648						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1649						pm4[i++] = 0xffffffff & bo2_mc;
1650						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1651					}
1652				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1653					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1654					if (family_id == AMDGPU_FAMILY_SI) {
1655						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1656						pm4[i++] = 0xfffffffc & bo1_mc;
1657						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1658							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1659							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1660							   PACKET3_DMA_DATA_SI_CP_SYNC |
1661							   (0xffff00000000 & bo1_mc) >> 32;
1662						pm4[i++] = 0xfffffffc & bo2_mc;
1663						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1664						pm4[i++] = sdma_write_length;
1665					} else {
1666						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1667						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1668							   PACKET3_DMA_DATA_DST_SEL(0) |
1669							   PACKET3_DMA_DATA_SRC_SEL(0) |
1670							   PACKET3_DMA_DATA_CP_SYNC;
1671						pm4[i++] = 0xfffffffc & bo1_mc;
1672						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1673						pm4[i++] = 0xfffffffc & bo2_mc;
1674						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1675						pm4[i++] = sdma_write_length;
1676					}
1677				}
1678
1679				amdgpu_test_exec_cs_helper(context_handle,
1680							   ip_type, ring_id,
1681							   i, pm4,
1682							   2, resources,
1683							   ib_info, ibs_request);
1684
1685				/* verify if SDMA test result meets with expected */
1686				i = 0;
1687				while(i < sdma_write_length) {
1688					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1689				}
1690				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1691							     sdma_write_length);
1692				CU_ASSERT_EQUAL(r, 0);
1693				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1694							     sdma_write_length);
1695				CU_ASSERT_EQUAL(r, 0);
1696				loop2++;
1697			}
1698			loop1++;
1699		}
1700	}
1701	/* clean resources */
1702	free(resources);
1703	free(ibs_request);
1704	free(ib_info);
1705	free(pm4);
1706
1707	/* end of test */
1708	r = amdgpu_cs_ctx_free(context_handle);
1709	CU_ASSERT_EQUAL(r, 0);
1710}
1711
1712static void amdgpu_command_submission_sdma_copy_linear(void)
1713{
1714	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1715}
1716
1717static void amdgpu_command_submission_sdma(void)
1718{
1719	amdgpu_command_submission_sdma_write_linear();
1720	amdgpu_command_submission_sdma_const_fill();
1721	amdgpu_command_submission_sdma_copy_linear();
1722}
1723
1724static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1725{
1726	amdgpu_context_handle context_handle;
1727	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1728	void *ib_result_cpu, *ib_result_ce_cpu;
1729	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1730	struct amdgpu_cs_request ibs_request[2] = {0};
1731	struct amdgpu_cs_ib_info ib_info[2];
1732	struct amdgpu_cs_fence fence_status[2] = {0};
1733	uint32_t *ptr;
1734	uint32_t expired;
1735	amdgpu_bo_list_handle bo_list;
1736	amdgpu_va_handle va_handle, va_handle_ce;
1737	int r;
1738	int i = 0, ib_cs_num = 2;
1739
1740	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1741	CU_ASSERT_EQUAL(r, 0);
1742
1743	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1744				    AMDGPU_GEM_DOMAIN_GTT, 0,
1745				    &ib_result_handle, &ib_result_cpu,
1746				    &ib_result_mc_address, &va_handle);
1747	CU_ASSERT_EQUAL(r, 0);
1748
1749	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1750				    AMDGPU_GEM_DOMAIN_GTT, 0,
1751				    &ib_result_ce_handle, &ib_result_ce_cpu,
1752				    &ib_result_ce_mc_address, &va_handle_ce);
1753	CU_ASSERT_EQUAL(r, 0);
1754
1755	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1756			       ib_result_ce_handle, &bo_list);
1757	CU_ASSERT_EQUAL(r, 0);
1758
1759	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1760
1761	/* IT_SET_CE_DE_COUNTERS */
1762	ptr = ib_result_ce_cpu;
1763	if (family_id != AMDGPU_FAMILY_SI) {
1764		ptr[i++] = 0xc0008900;
1765		ptr[i++] = 0;
1766	}
1767	ptr[i++] = 0xc0008400;
1768	ptr[i++] = 1;
1769	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1770	ib_info[0].size = i;
1771	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1772
1773	/* IT_WAIT_ON_CE_COUNTER */
1774	ptr = ib_result_cpu;
1775	ptr[0] = 0xc0008600;
1776	ptr[1] = 0x00000001;
1777	ib_info[1].ib_mc_address = ib_result_mc_address;
1778	ib_info[1].size = 2;
1779
1780	for (i = 0; i < ib_cs_num; i++) {
1781		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1782		ibs_request[i].number_of_ibs = 2;
1783		ibs_request[i].ibs = ib_info;
1784		ibs_request[i].resources = bo_list;
1785		ibs_request[i].fence_info.handle = NULL;
1786	}
1787
1788	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1789
1790	CU_ASSERT_EQUAL(r, 0);
1791
1792	for (i = 0; i < ib_cs_num; i++) {
1793		fence_status[i].context = context_handle;
1794		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1795		fence_status[i].fence = ibs_request[i].seq_no;
1796	}
1797
1798	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1799				AMDGPU_TIMEOUT_INFINITE,
1800				&expired, NULL);
1801	CU_ASSERT_EQUAL(r, 0);
1802
1803	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1804				     ib_result_mc_address, 4096);
1805	CU_ASSERT_EQUAL(r, 0);
1806
1807	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1808				     ib_result_ce_mc_address, 4096);
1809	CU_ASSERT_EQUAL(r, 0);
1810
1811	r = amdgpu_bo_list_destroy(bo_list);
1812	CU_ASSERT_EQUAL(r, 0);
1813
1814	r = amdgpu_cs_ctx_free(context_handle);
1815	CU_ASSERT_EQUAL(r, 0);
1816}
1817
1818static void amdgpu_command_submission_multi_fence(void)
1819{
1820	amdgpu_command_submission_multi_fence_wait_all(true);
1821	amdgpu_command_submission_multi_fence_wait_all(false);
1822}
1823
1824static void amdgpu_userptr_test(void)
1825{
1826	int i, r, j;
1827	uint32_t *pm4 = NULL;
1828	uint64_t bo_mc;
1829	void *ptr = NULL;
1830	int pm4_dw = 256;
1831	int sdma_write_length = 4;
1832	amdgpu_bo_handle handle;
1833	amdgpu_context_handle context_handle;
1834	struct amdgpu_cs_ib_info *ib_info;
1835	struct amdgpu_cs_request *ibs_request;
1836	amdgpu_bo_handle buf_handle;
1837	amdgpu_va_handle va_handle;
1838
1839	pm4 = calloc(pm4_dw, sizeof(*pm4));
1840	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1841
1842	ib_info = calloc(1, sizeof(*ib_info));
1843	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1844
1845	ibs_request = calloc(1, sizeof(*ibs_request));
1846	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1847
1848	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1849	CU_ASSERT_EQUAL(r, 0);
1850
1851	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
1852	CU_ASSERT_NOT_EQUAL(ptr, NULL);
1853	memset(ptr, 0, BUFFER_SIZE);
1854
1855	r = amdgpu_create_bo_from_user_mem(device_handle,
1856					   ptr, BUFFER_SIZE, &buf_handle);
1857	CU_ASSERT_EQUAL(r, 0);
1858
1859	r = amdgpu_va_range_alloc(device_handle,
1860				  amdgpu_gpu_va_range_general,
1861				  BUFFER_SIZE, 1, 0, &bo_mc,
1862				  &va_handle, 0);
1863	CU_ASSERT_EQUAL(r, 0);
1864
1865	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
1866	CU_ASSERT_EQUAL(r, 0);
1867
1868	handle = buf_handle;
1869
1870	j = i = 0;
1871
1872	if (family_id == AMDGPU_FAMILY_SI)
1873		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1874				sdma_write_length);
1875	else
1876		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1877				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1878	pm4[i++] = 0xffffffff & bo_mc;
1879	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1880	if (family_id >= AMDGPU_FAMILY_AI)
1881		pm4[i++] = sdma_write_length - 1;
1882	else if (family_id != AMDGPU_FAMILY_SI)
1883		pm4[i++] = sdma_write_length;
1884
1885	while (j++ < sdma_write_length)
1886		pm4[i++] = 0xdeadbeaf;
1887
1888	if (!fork()) {
1889		pm4[0] = 0x0;
1890		exit(0);
1891	}
1892
1893	amdgpu_test_exec_cs_helper(context_handle,
1894				   AMDGPU_HW_IP_DMA, 0,
1895				   i, pm4,
1896				   1, &handle,
1897				   ib_info, ibs_request);
1898	i = 0;
1899	while (i < sdma_write_length) {
1900		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
1901	}
1902	free(ibs_request);
1903	free(ib_info);
1904	free(pm4);
1905
1906	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
1907	CU_ASSERT_EQUAL(r, 0);
1908	r = amdgpu_va_range_free(va_handle);
1909	CU_ASSERT_EQUAL(r, 0);
1910	r = amdgpu_bo_free(buf_handle);
1911	CU_ASSERT_EQUAL(r, 0);
1912	free(ptr);
1913
1914	r = amdgpu_cs_ctx_free(context_handle);
1915	CU_ASSERT_EQUAL(r, 0);
1916
1917	wait(NULL);
1918}
1919
1920static void amdgpu_sync_dependency_test(void)
1921{
1922	amdgpu_context_handle context_handle[2];
1923	amdgpu_bo_handle ib_result_handle;
1924	void *ib_result_cpu;
1925	uint64_t ib_result_mc_address;
1926	struct amdgpu_cs_request ibs_request;
1927	struct amdgpu_cs_ib_info ib_info;
1928	struct amdgpu_cs_fence fence_status;
1929	uint32_t expired;
1930	int i, j, r;
1931	amdgpu_bo_list_handle bo_list;
1932	amdgpu_va_handle va_handle;
1933	static uint32_t *ptr;
1934	uint64_t seq_no;
1935
1936	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
1937	CU_ASSERT_EQUAL(r, 0);
1938	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
1939	CU_ASSERT_EQUAL(r, 0);
1940
1941	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
1942			AMDGPU_GEM_DOMAIN_GTT, 0,
1943						    &ib_result_handle, &ib_result_cpu,
1944						    &ib_result_mc_address, &va_handle);
1945	CU_ASSERT_EQUAL(r, 0);
1946
1947	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1948			       &bo_list);
1949	CU_ASSERT_EQUAL(r, 0);
1950
1951	ptr = ib_result_cpu;
1952	i = 0;
1953
1954	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
1955
1956	/* Dispatch minimal init config and verify it's executed */
1957	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
1958	ptr[i++] = 0x80000000;
1959	ptr[i++] = 0x80000000;
1960
1961	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
1962	ptr[i++] = 0x80000000;
1963
1964
1965	/* Program compute regs */
1966	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1967	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1968	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
1969	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
1970
1971
1972	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1973	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
1974	/*
1975	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
1976	                                      SGPRS = 1
1977	                                      PRIORITY = 0
1978	                                      FLOAT_MODE = 192 (0xc0)
1979	                                      PRIV = 0
1980	                                      DX10_CLAMP = 1
1981	                                      DEBUG_MODE = 0
1982	                                      IEEE_MODE = 0
1983	                                      BULKY = 0
1984	                                      CDBG_USER = 0
1985	 *
1986	 */
1987	ptr[i++] = 0x002c0040;
1988
1989
1990	/*
1991	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
1992	                                      USER_SGPR = 8
1993	                                      TRAP_PRESENT = 0
1994	                                      TGID_X_EN = 0
1995	                                      TGID_Y_EN = 0
1996	                                      TGID_Z_EN = 0
1997	                                      TG_SIZE_EN = 0
1998	                                      TIDIG_COMP_CNT = 0
1999	                                      EXCP_EN_MSB = 0
2000	                                      LDS_SIZE = 0
2001	                                      EXCP_EN = 0
2002	 *
2003	 */
2004	ptr[i++] = 0x00000010;
2005
2006
2007/*
2008 * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2009                                         WAVESIZE = 0
2010 *
2011 */
2012	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2013	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2014	ptr[i++] = 0x00000100;
2015
2016	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2017	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2018	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2019	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2020
2021	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2022	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2023	ptr[i++] = 0;
2024
2025	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2026	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2027	ptr[i++] = 1;
2028	ptr[i++] = 1;
2029	ptr[i++] = 1;
2030
2031
2032	/* Dispatch */
2033	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2034	ptr[i++] = 1;
2035	ptr[i++] = 1;
2036	ptr[i++] = 1;
2037	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2038
2039
2040	while (i & 7)
2041		ptr[i++] =  0xffff1000; /* type3 nop packet */
2042
2043	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2044	ib_info.ib_mc_address = ib_result_mc_address;
2045	ib_info.size = i;
2046
2047	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2048	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2049	ibs_request.ring = 0;
2050	ibs_request.number_of_ibs = 1;
2051	ibs_request.ibs = &ib_info;
2052	ibs_request.resources = bo_list;
2053	ibs_request.fence_info.handle = NULL;
2054
2055	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2056	CU_ASSERT_EQUAL(r, 0);
2057	seq_no = ibs_request.seq_no;
2058
2059
2060
2061	/* Prepare second command with dependency on the first */
2062	j = i;
2063	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2064	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2065	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2066	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2067	ptr[i++] = 99;
2068
2069	while (i & 7)
2070		ptr[i++] =  0xffff1000; /* type3 nop packet */
2071
2072	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2073	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2074	ib_info.size = i - j;
2075
2076	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2077	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2078	ibs_request.ring = 0;
2079	ibs_request.number_of_ibs = 1;
2080	ibs_request.ibs = &ib_info;
2081	ibs_request.resources = bo_list;
2082	ibs_request.fence_info.handle = NULL;
2083
2084	ibs_request.number_of_dependencies = 1;
2085
2086	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2087	ibs_request.dependencies[0].context = context_handle[1];
2088	ibs_request.dependencies[0].ip_instance = 0;
2089	ibs_request.dependencies[0].ring = 0;
2090	ibs_request.dependencies[0].fence = seq_no;
2091
2092
2093	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2094	CU_ASSERT_EQUAL(r, 0);
2095
2096
2097	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2098	fence_status.context = context_handle[0];
2099	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2100	fence_status.ip_instance = 0;
2101	fence_status.ring = 0;
2102	fence_status.fence = ibs_request.seq_no;
2103
2104	r = amdgpu_cs_query_fence_status(&fence_status,
2105		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2106	CU_ASSERT_EQUAL(r, 0);
2107
2108	/* Expect the second command to wait for shader to complete */
2109	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2110
2111	r = amdgpu_bo_list_destroy(bo_list);
2112	CU_ASSERT_EQUAL(r, 0);
2113
2114	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2115				     ib_result_mc_address, 4096);
2116	CU_ASSERT_EQUAL(r, 0);
2117
2118	r = amdgpu_cs_ctx_free(context_handle[0]);
2119	CU_ASSERT_EQUAL(r, 0);
2120	r = amdgpu_cs_ctx_free(context_handle[1]);
2121	CU_ASSERT_EQUAL(r, 0);
2122
2123	free(ibs_request.dependencies);
2124}
2125
2126static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2127{
2128	struct amdgpu_test_shader *shader;
2129	int i, loop = 0x10000;
2130
2131	switch (family) {
2132		case AMDGPU_FAMILY_AI:
2133			shader = &memcpy_cs_hang_slow_ai;
2134			break;
2135		case AMDGPU_FAMILY_RV:
2136			shader = &memcpy_cs_hang_slow_rv;
2137			break;
2138		default:
2139			return -1;
2140			break;
2141	}
2142
2143	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2144
2145	for (i = 0; i < loop; i++)
2146		memcpy(ptr + shader->header_length + shader->body_length * i,
2147			shader->shader + shader->header_length,
2148			shader->body_length * sizeof(uint32_t));
2149
2150	memcpy(ptr + shader->header_length + shader->body_length * loop,
2151		shader->shader + shader->header_length + shader->body_length,
2152		shader->foot_length * sizeof(uint32_t));
2153
2154	return 0;
2155}
2156
2157static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2158					   int cs_type)
2159{
2160	uint32_t shader_size;
2161	const uint32_t *shader;
2162
2163	switch (cs_type) {
2164		case CS_BUFFERCLEAR:
2165			shader = bufferclear_cs_shader_gfx9;
2166			shader_size = sizeof(bufferclear_cs_shader_gfx9);
2167			break;
2168		case CS_BUFFERCOPY:
2169			shader = buffercopy_cs_shader_gfx9;
2170			shader_size = sizeof(buffercopy_cs_shader_gfx9);
2171			break;
2172		case CS_HANG:
2173			shader = memcpy_ps_hang;
2174			shader_size = sizeof(memcpy_ps_hang);
2175			break;
2176		default:
2177			return -1;
2178			break;
2179	}
2180
2181	memcpy(ptr, shader, shader_size);
2182	return 0;
2183}
2184
2185static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2186{
2187	int i = 0;
2188
2189	/* Write context control and load shadowing register if necessary */
2190	if (ip_type == AMDGPU_HW_IP_GFX) {
2191		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2192		ptr[i++] = 0x80000000;
2193		ptr[i++] = 0x80000000;
2194	}
2195
2196	/* Issue commands to set default compute state. */
2197	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2198	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2199	ptr[i++] = 0x204;
2200	i += 3;
2201
2202	/* clear mmCOMPUTE_TMPRING_SIZE */
2203	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2204	ptr[i++] = 0x218;
2205	ptr[i++] = 0;
2206
2207	return i;
2208}
2209
2210static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2211{
2212	int i = 0;
2213
2214	/*  Issue commands to set cu mask used in current dispatch */
2215	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2216	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2217	ptr[i++] = 0x216;
2218	ptr[i++] = 0xffffffff;
2219	ptr[i++] = 0xffffffff;
2220	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2221	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2222	ptr[i++] = 0x219;
2223	ptr[i++] = 0xffffffff;
2224	ptr[i++] = 0xffffffff;
2225
2226	return i;
2227}
2228
2229static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2230{
2231	int i, j;
2232
2233	i = 0;
2234
2235	/* Writes shader state to HW */
2236	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2237	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2238	ptr[i++] = 0x20c;
2239	ptr[i++] = (shader_addr >> 8);
2240	ptr[i++] = (shader_addr >> 40);
2241	/* write sh regs*/
2242	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2243		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2244		/* - Gfx9ShRegBase */
2245		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2246		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2247	}
2248
2249	return i;
2250}
2251
2252static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2253					 uint32_t ip_type,
2254					 uint32_t ring)
2255{
2256	amdgpu_context_handle context_handle;
2257	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2258	volatile unsigned char *ptr_dst;
2259	void *ptr_shader;
2260	uint32_t *ptr_cmd;
2261	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2262	amdgpu_va_handle va_dst, va_shader, va_cmd;
2263	int i, r;
2264	int bo_dst_size = 16384;
2265	int bo_shader_size = 4096;
2266	int bo_cmd_size = 4096;
2267	struct amdgpu_cs_request ibs_request = {0};
2268	struct amdgpu_cs_ib_info ib_info= {0};
2269	amdgpu_bo_list_handle bo_list;
2270	struct amdgpu_cs_fence fence_status = {0};
2271	uint32_t expired;
2272
2273	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2274	CU_ASSERT_EQUAL(r, 0);
2275
2276	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2277					AMDGPU_GEM_DOMAIN_GTT, 0,
2278					&bo_cmd, (void **)&ptr_cmd,
2279					&mc_address_cmd, &va_cmd);
2280	CU_ASSERT_EQUAL(r, 0);
2281	memset(ptr_cmd, 0, bo_cmd_size);
2282
2283	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2284					AMDGPU_GEM_DOMAIN_VRAM, 0,
2285					&bo_shader, &ptr_shader,
2286					&mc_address_shader, &va_shader);
2287	CU_ASSERT_EQUAL(r, 0);
2288	memset(ptr_shader, 0, bo_shader_size);
2289
2290	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2291	CU_ASSERT_EQUAL(r, 0);
2292
2293	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2294					AMDGPU_GEM_DOMAIN_VRAM, 0,
2295					&bo_dst, (void **)&ptr_dst,
2296					&mc_address_dst, &va_dst);
2297	CU_ASSERT_EQUAL(r, 0);
2298
2299	i = 0;
2300	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2301
2302	/*  Issue commands to set cu mask used in current dispatch */
2303	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2304
2305	/* Writes shader state to HW */
2306	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2307
2308	/* Write constant data */
2309	/* Writes the UAV constant data to the SGPRs. */
2310	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2311	ptr_cmd[i++] = 0x240;
2312	ptr_cmd[i++] = mc_address_dst;
2313	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2314	ptr_cmd[i++] = 0x400;
2315	ptr_cmd[i++] = 0x74fac;
2316
2317	/* Sets a range of pixel shader constants */
2318	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2319	ptr_cmd[i++] = 0x244;
2320	ptr_cmd[i++] = 0x22222222;
2321	ptr_cmd[i++] = 0x22222222;
2322	ptr_cmd[i++] = 0x22222222;
2323	ptr_cmd[i++] = 0x22222222;
2324
2325	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2326	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2327	ptr_cmd[i++] = 0x215;
2328	ptr_cmd[i++] = 0;
2329
2330	/* dispatch direct command */
2331	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2332	ptr_cmd[i++] = 0x10;
2333	ptr_cmd[i++] = 1;
2334	ptr_cmd[i++] = 1;
2335	ptr_cmd[i++] = 1;
2336
2337	while (i & 7)
2338		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2339
2340	resources[0] = bo_dst;
2341	resources[1] = bo_shader;
2342	resources[2] = bo_cmd;
2343	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2344	CU_ASSERT_EQUAL(r, 0);
2345
2346	ib_info.ib_mc_address = mc_address_cmd;
2347	ib_info.size = i;
2348	ibs_request.ip_type = ip_type;
2349	ibs_request.ring = ring;
2350	ibs_request.resources = bo_list;
2351	ibs_request.number_of_ibs = 1;
2352	ibs_request.ibs = &ib_info;
2353	ibs_request.fence_info.handle = NULL;
2354
2355	/* submit CS */
2356	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2357	CU_ASSERT_EQUAL(r, 0);
2358
2359	r = amdgpu_bo_list_destroy(bo_list);
2360	CU_ASSERT_EQUAL(r, 0);
2361
2362	fence_status.ip_type = ip_type;
2363	fence_status.ip_instance = 0;
2364	fence_status.ring = ring;
2365	fence_status.context = context_handle;
2366	fence_status.fence = ibs_request.seq_no;
2367
2368	/* wait for IB accomplished */
2369	r = amdgpu_cs_query_fence_status(&fence_status,
2370					 AMDGPU_TIMEOUT_INFINITE,
2371					 0, &expired);
2372	CU_ASSERT_EQUAL(r, 0);
2373	CU_ASSERT_EQUAL(expired, true);
2374
2375	/* verify if memset test result meets with expected */
2376	i = 0;
2377	while(i < bo_dst_size) {
2378		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2379	}
2380
2381	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2382	CU_ASSERT_EQUAL(r, 0);
2383
2384	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2385	CU_ASSERT_EQUAL(r, 0);
2386
2387	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2388	CU_ASSERT_EQUAL(r, 0);
2389
2390	r = amdgpu_cs_ctx_free(context_handle);
2391	CU_ASSERT_EQUAL(r, 0);
2392}
2393
2394static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2395					uint32_t ip_type,
2396					uint32_t ring,
2397					int hang)
2398{
2399	amdgpu_context_handle context_handle;
2400	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2401	volatile unsigned char *ptr_dst;
2402	void *ptr_shader;
2403	unsigned char *ptr_src;
2404	uint32_t *ptr_cmd;
2405	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2406	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2407	int i, r;
2408	int bo_dst_size = 16384;
2409	int bo_shader_size = 4096;
2410	int bo_cmd_size = 4096;
2411	struct amdgpu_cs_request ibs_request = {0};
2412	struct amdgpu_cs_ib_info ib_info= {0};
2413	uint32_t expired, hang_state, hangs;
2414	enum cs_type cs_type;
2415	amdgpu_bo_list_handle bo_list;
2416	struct amdgpu_cs_fence fence_status = {0};
2417
2418	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2419	CU_ASSERT_EQUAL(r, 0);
2420
2421	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2422				    AMDGPU_GEM_DOMAIN_GTT, 0,
2423				    &bo_cmd, (void **)&ptr_cmd,
2424				    &mc_address_cmd, &va_cmd);
2425	CU_ASSERT_EQUAL(r, 0);
2426	memset(ptr_cmd, 0, bo_cmd_size);
2427
2428	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2429					AMDGPU_GEM_DOMAIN_VRAM, 0,
2430					&bo_shader, &ptr_shader,
2431					&mc_address_shader, &va_shader);
2432	CU_ASSERT_EQUAL(r, 0);
2433	memset(ptr_shader, 0, bo_shader_size);
2434
2435	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2436	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2437	CU_ASSERT_EQUAL(r, 0);
2438
2439	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2440					AMDGPU_GEM_DOMAIN_VRAM, 0,
2441					&bo_src, (void **)&ptr_src,
2442					&mc_address_src, &va_src);
2443	CU_ASSERT_EQUAL(r, 0);
2444
2445	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2446					AMDGPU_GEM_DOMAIN_VRAM, 0,
2447					&bo_dst, (void **)&ptr_dst,
2448					&mc_address_dst, &va_dst);
2449	CU_ASSERT_EQUAL(r, 0);
2450
2451	memset(ptr_src, 0x55, bo_dst_size);
2452
2453	i = 0;
2454	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2455
2456	/*  Issue commands to set cu mask used in current dispatch */
2457	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2458
2459	/* Writes shader state to HW */
2460	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2461
2462	/* Write constant data */
2463	/* Writes the texture resource constants data to the SGPRs */
2464	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2465	ptr_cmd[i++] = 0x240;
2466	ptr_cmd[i++] = mc_address_src;
2467	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2468	ptr_cmd[i++] = 0x400;
2469	ptr_cmd[i++] = 0x74fac;
2470
2471	/* Writes the UAV constant data to the SGPRs. */
2472	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2473	ptr_cmd[i++] = 0x244;
2474	ptr_cmd[i++] = mc_address_dst;
2475	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2476	ptr_cmd[i++] = 0x400;
2477	ptr_cmd[i++] = 0x74fac;
2478
2479	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2480	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2481	ptr_cmd[i++] = 0x215;
2482	ptr_cmd[i++] = 0;
2483
2484	/* dispatch direct command */
2485	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2486	ptr_cmd[i++] = 0x10;
2487	ptr_cmd[i++] = 1;
2488	ptr_cmd[i++] = 1;
2489	ptr_cmd[i++] = 1;
2490
2491	while (i & 7)
2492		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2493
2494	resources[0] = bo_shader;
2495	resources[1] = bo_src;
2496	resources[2] = bo_dst;
2497	resources[3] = bo_cmd;
2498	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2499	CU_ASSERT_EQUAL(r, 0);
2500
2501	ib_info.ib_mc_address = mc_address_cmd;
2502	ib_info.size = i;
2503	ibs_request.ip_type = ip_type;
2504	ibs_request.ring = ring;
2505	ibs_request.resources = bo_list;
2506	ibs_request.number_of_ibs = 1;
2507	ibs_request.ibs = &ib_info;
2508	ibs_request.fence_info.handle = NULL;
2509	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2510	CU_ASSERT_EQUAL(r, 0);
2511
2512	fence_status.ip_type = ip_type;
2513	fence_status.ip_instance = 0;
2514	fence_status.ring = ring;
2515	fence_status.context = context_handle;
2516	fence_status.fence = ibs_request.seq_no;
2517
2518	/* wait for IB accomplished */
2519	r = amdgpu_cs_query_fence_status(&fence_status,
2520					 AMDGPU_TIMEOUT_INFINITE,
2521					 0, &expired);
2522
2523	if (!hang) {
2524		CU_ASSERT_EQUAL(r, 0);
2525		CU_ASSERT_EQUAL(expired, true);
2526
2527		/* verify if memcpy test result meets with expected */
2528		i = 0;
2529		while(i < bo_dst_size) {
2530			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2531			i++;
2532		}
2533	} else {
2534		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2535		CU_ASSERT_EQUAL(r, 0);
2536		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2537	}
2538
2539	r = amdgpu_bo_list_destroy(bo_list);
2540	CU_ASSERT_EQUAL(r, 0);
2541
2542	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2543	CU_ASSERT_EQUAL(r, 0);
2544	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2545	CU_ASSERT_EQUAL(r, 0);
2546
2547	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2548	CU_ASSERT_EQUAL(r, 0);
2549
2550	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2551	CU_ASSERT_EQUAL(r, 0);
2552
2553	r = amdgpu_cs_ctx_free(context_handle);
2554	CU_ASSERT_EQUAL(r, 0);
2555}
2556
2557static void amdgpu_compute_dispatch_test(void)
2558{
2559	int r;
2560	struct drm_amdgpu_info_hw_ip info;
2561	uint32_t ring_id;
2562
2563	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2564	CU_ASSERT_EQUAL(r, 0);
2565	if (!info.available_rings)
2566		printf("SKIP ... as there's no compute ring\n");
2567
2568	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2569		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2570		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2571	}
2572}
2573
2574static void amdgpu_gfx_dispatch_test(void)
2575{
2576	int r;
2577	struct drm_amdgpu_info_hw_ip info;
2578	uint32_t ring_id;
2579
2580	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2581	CU_ASSERT_EQUAL(r, 0);
2582	if (!info.available_rings)
2583		printf("SKIP ... as there's no graphics ring\n");
2584
2585	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2586		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2587		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2588	}
2589}
2590
2591void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2592{
2593	int r;
2594	struct drm_amdgpu_info_hw_ip info;
2595	uint32_t ring_id;
2596
2597	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2598	CU_ASSERT_EQUAL(r, 0);
2599	if (!info.available_rings)
2600		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2601
2602	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2603		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2604		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2605		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2606	}
2607}
2608
2609static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2610						  uint32_t ip_type, uint32_t ring)
2611{
2612	amdgpu_context_handle context_handle;
2613	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2614	volatile unsigned char *ptr_dst;
2615	void *ptr_shader;
2616	unsigned char *ptr_src;
2617	uint32_t *ptr_cmd;
2618	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2619	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2620	int i, r;
2621	int bo_dst_size = 0x4000000;
2622	int bo_shader_size = 0x400000;
2623	int bo_cmd_size = 4096;
2624	struct amdgpu_cs_request ibs_request = {0};
2625	struct amdgpu_cs_ib_info ib_info= {0};
2626	uint32_t hang_state, hangs, expired;
2627	struct amdgpu_gpu_info gpu_info = {0};
2628	amdgpu_bo_list_handle bo_list;
2629	struct amdgpu_cs_fence fence_status = {0};
2630
2631	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2632	CU_ASSERT_EQUAL(r, 0);
2633
2634	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2635	CU_ASSERT_EQUAL(r, 0);
2636
2637	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2638				    AMDGPU_GEM_DOMAIN_GTT, 0,
2639				    &bo_cmd, (void **)&ptr_cmd,
2640				    &mc_address_cmd, &va_cmd);
2641	CU_ASSERT_EQUAL(r, 0);
2642	memset(ptr_cmd, 0, bo_cmd_size);
2643
2644	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2645					AMDGPU_GEM_DOMAIN_VRAM, 0,
2646					&bo_shader, &ptr_shader,
2647					&mc_address_shader, &va_shader);
2648	CU_ASSERT_EQUAL(r, 0);
2649	memset(ptr_shader, 0, bo_shader_size);
2650
2651	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2652	CU_ASSERT_EQUAL(r, 0);
2653
2654	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2655					AMDGPU_GEM_DOMAIN_VRAM, 0,
2656					&bo_src, (void **)&ptr_src,
2657					&mc_address_src, &va_src);
2658	CU_ASSERT_EQUAL(r, 0);
2659
2660	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2661					AMDGPU_GEM_DOMAIN_VRAM, 0,
2662					&bo_dst, (void **)&ptr_dst,
2663					&mc_address_dst, &va_dst);
2664	CU_ASSERT_EQUAL(r, 0);
2665
2666	memset(ptr_src, 0x55, bo_dst_size);
2667
2668	i = 0;
2669	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2670
2671	/*  Issue commands to set cu mask used in current dispatch */
2672	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2673
2674	/* Writes shader state to HW */
2675	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2676
2677	/* Write constant data */
2678	/* Writes the texture resource constants data to the SGPRs */
2679	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2680	ptr_cmd[i++] = 0x240;
2681	ptr_cmd[i++] = mc_address_src;
2682	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2683	ptr_cmd[i++] = 0x400000;
2684	ptr_cmd[i++] = 0x74fac;
2685
2686	/* Writes the UAV constant data to the SGPRs. */
2687	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2688	ptr_cmd[i++] = 0x244;
2689	ptr_cmd[i++] = mc_address_dst;
2690	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2691	ptr_cmd[i++] = 0x400000;
2692	ptr_cmd[i++] = 0x74fac;
2693
2694	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2695	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2696	ptr_cmd[i++] = 0x215;
2697	ptr_cmd[i++] = 0;
2698
2699	/* dispatch direct command */
2700	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2701	ptr_cmd[i++] = 0x10000;
2702	ptr_cmd[i++] = 1;
2703	ptr_cmd[i++] = 1;
2704	ptr_cmd[i++] = 1;
2705
2706	while (i & 7)
2707		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2708
2709	resources[0] = bo_shader;
2710	resources[1] = bo_src;
2711	resources[2] = bo_dst;
2712	resources[3] = bo_cmd;
2713	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2714	CU_ASSERT_EQUAL(r, 0);
2715
2716	ib_info.ib_mc_address = mc_address_cmd;
2717	ib_info.size = i;
2718	ibs_request.ip_type = ip_type;
2719	ibs_request.ring = ring;
2720	ibs_request.resources = bo_list;
2721	ibs_request.number_of_ibs = 1;
2722	ibs_request.ibs = &ib_info;
2723	ibs_request.fence_info.handle = NULL;
2724	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2725	CU_ASSERT_EQUAL(r, 0);
2726
2727	fence_status.ip_type = ip_type;
2728	fence_status.ip_instance = 0;
2729	fence_status.ring = ring;
2730	fence_status.context = context_handle;
2731	fence_status.fence = ibs_request.seq_no;
2732
2733	/* wait for IB accomplished */
2734	r = amdgpu_cs_query_fence_status(&fence_status,
2735					 AMDGPU_TIMEOUT_INFINITE,
2736					 0, &expired);
2737
2738	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2739	CU_ASSERT_EQUAL(r, 0);
2740	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2741
2742	r = amdgpu_bo_list_destroy(bo_list);
2743	CU_ASSERT_EQUAL(r, 0);
2744
2745	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2746	CU_ASSERT_EQUAL(r, 0);
2747	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2748	CU_ASSERT_EQUAL(r, 0);
2749
2750	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2751	CU_ASSERT_EQUAL(r, 0);
2752
2753	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2754	CU_ASSERT_EQUAL(r, 0);
2755
2756	r = amdgpu_cs_ctx_free(context_handle);
2757	CU_ASSERT_EQUAL(r, 0);
2758}
2759
2760void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2761{
2762	int r;
2763	struct drm_amdgpu_info_hw_ip info;
2764	uint32_t ring_id;
2765
2766	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2767	CU_ASSERT_EQUAL(r, 0);
2768	if (!info.available_rings)
2769		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2770
2771	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2772		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2773		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2774		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2775	}
2776}
2777
2778static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2779{
2780	struct amdgpu_test_shader *shader;
2781	int i, loop = 0x40000;
2782
2783	switch (family) {
2784		case AMDGPU_FAMILY_AI:
2785		case AMDGPU_FAMILY_RV:
2786			shader = &memcpy_ps_hang_slow_ai;
2787			break;
2788		default:
2789			return -1;
2790			break;
2791	}
2792
2793	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2794
2795	for (i = 0; i < loop; i++)
2796		memcpy(ptr + shader->header_length + shader->body_length * i,
2797			shader->shader + shader->header_length,
2798			shader->body_length * sizeof(uint32_t));
2799
2800	memcpy(ptr + shader->header_length + shader->body_length * loop,
2801		shader->shader + shader->header_length + shader->body_length,
2802		shader->foot_length * sizeof(uint32_t));
2803
2804	return 0;
2805}
2806
2807static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
2808{
2809	int i;
2810	uint32_t shader_offset= 256;
2811	uint32_t mem_offset, patch_code_offset;
2812	uint32_t shader_size, patchinfo_code_size;
2813	const uint32_t *shader;
2814	const uint32_t *patchinfo_code;
2815	const uint32_t *patchcode_offset;
2816
2817	switch (ps_type) {
2818		case PS_CONST:
2819			shader = ps_const_shader_gfx9;
2820			shader_size = sizeof(ps_const_shader_gfx9);
2821			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
2822			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
2823			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
2824			break;
2825		case PS_TEX:
2826			shader = ps_tex_shader_gfx9;
2827			shader_size = sizeof(ps_tex_shader_gfx9);
2828			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
2829			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
2830			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
2831			break;
2832		case PS_HANG:
2833			shader = memcpy_ps_hang;
2834			shader_size = sizeof(memcpy_ps_hang);
2835
2836			memcpy(ptr, shader, shader_size);
2837			return 0;
2838		default:
2839			return -1;
2840			break;
2841	}
2842
2843	/* write main shader program */
2844	for (i = 0 ; i < 10; i++) {
2845		mem_offset = i * shader_offset;
2846		memcpy(ptr + mem_offset, shader, shader_size);
2847	}
2848
2849	/* overwrite patch codes */
2850	for (i = 0 ; i < 10; i++) {
2851		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
2852		patch_code_offset = i * patchinfo_code_size;
2853		memcpy(ptr + mem_offset,
2854			patchinfo_code + patch_code_offset,
2855			patchinfo_code_size * sizeof(uint32_t));
2856	}
2857
2858	return 0;
2859}
2860
2861/* load RectPosTexFast_VS */
2862static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
2863{
2864	const uint32_t *shader;
2865	uint32_t shader_size;
2866
2867	shader = vs_RectPosTexFast_shader_gfx9;
2868	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
2869
2870	memcpy(ptr, shader, shader_size);
2871
2872	return 0;
2873}
2874
2875static int amdgpu_draw_init(uint32_t *ptr)
2876{
2877	int i = 0;
2878	const uint32_t *preamblecache_ptr;
2879	uint32_t preamblecache_size;
2880
2881	/* Write context control and load shadowing register if necessary */
2882	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2883	ptr[i++] = 0x80000000;
2884	ptr[i++] = 0x80000000;
2885
2886	preamblecache_ptr = preamblecache_gfx9;
2887	preamblecache_size = sizeof(preamblecache_gfx9);
2888
2889	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
2890	return i + preamblecache_size/sizeof(uint32_t);
2891}
2892
2893static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
2894							 uint64_t dst_addr,
2895							 int hang_slow)
2896{
2897	int i = 0;
2898
2899	/* setup color buffer */
2900	/* offset   reg
2901	   0xA318   CB_COLOR0_BASE
2902	   0xA319   CB_COLOR0_BASE_EXT
2903	   0xA31A   CB_COLOR0_ATTRIB2
2904	   0xA31B   CB_COLOR0_VIEW
2905	   0xA31C   CB_COLOR0_INFO
2906	   0xA31D   CB_COLOR0_ATTRIB
2907	   0xA31E   CB_COLOR0_DCC_CONTROL
2908	   0xA31F   CB_COLOR0_CMASK
2909	   0xA320   CB_COLOR0_CMASK_BASE_EXT
2910	   0xA321   CB_COLOR0_FMASK
2911	   0xA322   CB_COLOR0_FMASK_BASE_EXT
2912	   0xA323   CB_COLOR0_CLEAR_WORD0
2913	   0xA324   CB_COLOR0_CLEAR_WORD1
2914	   0xA325   CB_COLOR0_DCC_BASE
2915	   0xA326   CB_COLOR0_DCC_BASE_EXT */
2916	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
2917	ptr[i++] = 0x318;
2918	ptr[i++] = dst_addr >> 8;
2919	ptr[i++] = dst_addr >> 40;
2920	ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
2921	ptr[i++] = 0;
2922	ptr[i++] = 0x50438;
2923	ptr[i++] = 0x10140000;
2924	i += 9;
2925
2926	/* mmCB_MRT0_EPITCH */
2927	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2928	ptr[i++] = 0x1e8;
2929	ptr[i++] = hang_slow ? 0x7ff : 0x1f;
2930
2931	/* 0xA32B   CB_COLOR1_BASE */
2932	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2933	ptr[i++] = 0x32b;
2934	ptr[i++] = 0;
2935
2936	/* 0xA33A   CB_COLOR1_BASE */
2937	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2938	ptr[i++] = 0x33a;
2939	ptr[i++] = 0;
2940
2941	/* SPI_SHADER_COL_FORMAT */
2942	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2943	ptr[i++] = 0x1c5;
2944	ptr[i++] = 9;
2945
2946	/* Setup depth buffer */
2947	/* mmDB_Z_INFO */
2948	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
2949	ptr[i++] = 0xe;
2950	i += 2;
2951
2952	return i;
2953}
2954
2955static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
2956{
2957	int i = 0;
2958	const uint32_t *cached_cmd_ptr;
2959	uint32_t cached_cmd_size;
2960
2961	/* mmPA_SC_TILE_STEERING_OVERRIDE */
2962	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2963	ptr[i++] = 0xd7;
2964	ptr[i++] = 0;
2965
2966	ptr[i++] = 0xffff1000;
2967	ptr[i++] = 0xc0021000;
2968
2969	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2970	ptr[i++] = 0xd7;
2971	ptr[i++] = 1;
2972
2973	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
2974	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
2975	ptr[i++] = 0x2fe;
2976	i += 16;
2977
2978	/* mmPA_SC_CENTROID_PRIORITY_0 */
2979	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
2980	ptr[i++] = 0x2f5;
2981	i += 2;
2982
2983	cached_cmd_ptr = cached_cmd_gfx9;
2984	cached_cmd_size = sizeof(cached_cmd_gfx9);
2985
2986	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
2987	if (hang_slow)
2988		*(ptr + i + 12) = 0x8000800;
2989	i += cached_cmd_size/sizeof(uint32_t);
2990
2991	return i;
2992}
2993
2994static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
2995						  int ps_type,
2996						  uint64_t shader_addr,
2997						  int hang_slow)
2998{
2999	int i = 0;
3000
3001	/* mmPA_CL_VS_OUT_CNTL */
3002	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3003	ptr[i++] = 0x207;
3004	ptr[i++] = 0;
3005
3006	/* mmSPI_SHADER_PGM_RSRC3_VS */
3007	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3008	ptr[i++] = 0x46;
3009	ptr[i++] = 0xffff;
3010
3011	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3012	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3013	ptr[i++] = 0x48;
3014	ptr[i++] = shader_addr >> 8;
3015	ptr[i++] = shader_addr >> 40;
3016
3017	/* mmSPI_SHADER_PGM_RSRC1_VS */
3018	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3019	ptr[i++] = 0x4a;
3020	ptr[i++] = 0xc0081;
3021	/* mmSPI_SHADER_PGM_RSRC2_VS */
3022	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3023	ptr[i++] = 0x4b;
3024	ptr[i++] = 0x18;
3025
3026	/* mmSPI_VS_OUT_CONFIG */
3027	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3028	ptr[i++] = 0x1b1;
3029	ptr[i++] = 2;
3030
3031	/* mmSPI_SHADER_POS_FORMAT */
3032	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3033	ptr[i++] = 0x1c3;
3034	ptr[i++] = 4;
3035
3036	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3037	ptr[i++] = 0x4c;
3038	i += 2;
3039	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3040	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3041
3042	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3043	ptr[i++] = 0x50;
3044	i += 2;
3045	if (ps_type == PS_CONST) {
3046		i += 2;
3047	} else if (ps_type == PS_TEX) {
3048		ptr[i++] = 0x3f800000;
3049		ptr[i++] = 0x3f800000;
3050	}
3051
3052	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3053	ptr[i++] = 0x54;
3054	i += 4;
3055
3056	return i;
3057}
3058
3059static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3060				   int ps_type,
3061				   uint64_t shader_addr)
3062{
3063	int i, j;
3064	const uint32_t *sh_registers;
3065	const uint32_t *context_registers;
3066	uint32_t num_sh_reg, num_context_reg;
3067
3068	if (ps_type == PS_CONST) {
3069		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3070		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3071		num_sh_reg = ps_num_sh_registers_gfx9;
3072		num_context_reg = ps_num_context_registers_gfx9;
3073	} else if (ps_type == PS_TEX) {
3074		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3075		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3076		num_sh_reg = ps_num_sh_registers_gfx9;
3077		num_context_reg = ps_num_context_registers_gfx9;
3078	}
3079
3080	i = 0;
3081
3082	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
3083	   0x2c08   SPI_SHADER_PGM_LO_PS
3084	   0x2c09   SPI_SHADER_PGM_HI_PS */
3085	shader_addr += 256 * 9;
3086	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3087	ptr[i++] = 0x7;
3088	ptr[i++] = 0xffff;
3089	ptr[i++] = shader_addr >> 8;
3090	ptr[i++] = shader_addr >> 40;
3091
3092	for (j = 0; j < num_sh_reg; j++) {
3093		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3094		ptr[i++] = sh_registers[j * 2] - 0x2c00;
3095		ptr[i++] = sh_registers[j * 2 + 1];
3096	}
3097
3098	for (j = 0; j < num_context_reg; j++) {
3099		if (context_registers[j * 2] != 0xA1C5) {
3100			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3101			ptr[i++] = context_registers[j * 2] - 0xa000;
3102			ptr[i++] = context_registers[j * 2 + 1];
3103		}
3104
3105		if (context_registers[j * 2] == 0xA1B4) {
3106			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3107			ptr[i++] = 0x1b3;
3108			ptr[i++] = 2;
3109		}
3110	}
3111
3112	return i;
3113}
3114
3115static int amdgpu_draw_draw(uint32_t *ptr)
3116{
3117	int i = 0;
3118
3119	/* mmIA_MULTI_VGT_PARAM */
3120	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3121	ptr[i++] = 0x40000258;
3122	ptr[i++] = 0xd00ff;
3123
3124	/* mmVGT_PRIMITIVE_TYPE */
3125	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3126	ptr[i++] = 0x10000242;
3127	ptr[i++] = 0x11;
3128
3129	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3130	ptr[i++] = 3;
3131	ptr[i++] = 2;
3132
3133	return i;
3134}
3135
3136void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3137			amdgpu_bo_handle bo_shader_ps,
3138			amdgpu_bo_handle bo_shader_vs,
3139			uint64_t mc_address_shader_ps,
3140			uint64_t mc_address_shader_vs,
3141			uint32_t ring_id)
3142{
3143	amdgpu_context_handle context_handle;
3144	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3145	volatile unsigned char *ptr_dst;
3146	uint32_t *ptr_cmd;
3147	uint64_t mc_address_dst, mc_address_cmd;
3148	amdgpu_va_handle va_dst, va_cmd;
3149	int i, r;
3150	int bo_dst_size = 16384;
3151	int bo_cmd_size = 4096;
3152	struct amdgpu_cs_request ibs_request = {0};
3153	struct amdgpu_cs_ib_info ib_info = {0};
3154	struct amdgpu_cs_fence fence_status = {0};
3155	uint32_t expired;
3156	amdgpu_bo_list_handle bo_list;
3157
3158	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3159	CU_ASSERT_EQUAL(r, 0);
3160
3161	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3162					AMDGPU_GEM_DOMAIN_GTT, 0,
3163					&bo_cmd, (void **)&ptr_cmd,
3164					&mc_address_cmd, &va_cmd);
3165	CU_ASSERT_EQUAL(r, 0);
3166	memset(ptr_cmd, 0, bo_cmd_size);
3167
3168	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3169					AMDGPU_GEM_DOMAIN_VRAM, 0,
3170					&bo_dst, (void **)&ptr_dst,
3171					&mc_address_dst, &va_dst);
3172	CU_ASSERT_EQUAL(r, 0);
3173
3174	i = 0;
3175	i += amdgpu_draw_init(ptr_cmd + i);
3176
3177	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3178
3179	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3180
3181	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3182
3183	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3184
3185	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3186	ptr_cmd[i++] = 0xc;
3187	ptr_cmd[i++] = 0x33333333;
3188	ptr_cmd[i++] = 0x33333333;
3189	ptr_cmd[i++] = 0x33333333;
3190	ptr_cmd[i++] = 0x33333333;
3191
3192	i += amdgpu_draw_draw(ptr_cmd + i);
3193
3194	while (i & 7)
3195		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3196
3197	resources[0] = bo_dst;
3198	resources[1] = bo_shader_ps;
3199	resources[2] = bo_shader_vs;
3200	resources[3] = bo_cmd;
3201	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3202	CU_ASSERT_EQUAL(r, 0);
3203
3204	ib_info.ib_mc_address = mc_address_cmd;
3205	ib_info.size = i;
3206	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3207	ibs_request.ring = ring_id;
3208	ibs_request.resources = bo_list;
3209	ibs_request.number_of_ibs = 1;
3210	ibs_request.ibs = &ib_info;
3211	ibs_request.fence_info.handle = NULL;
3212
3213	/* submit CS */
3214	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3215	CU_ASSERT_EQUAL(r, 0);
3216
3217	r = amdgpu_bo_list_destroy(bo_list);
3218	CU_ASSERT_EQUAL(r, 0);
3219
3220	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3221	fence_status.ip_instance = 0;
3222	fence_status.ring = ring_id;
3223	fence_status.context = context_handle;
3224	fence_status.fence = ibs_request.seq_no;
3225
3226	/* wait for IB accomplished */
3227	r = amdgpu_cs_query_fence_status(&fence_status,
3228					 AMDGPU_TIMEOUT_INFINITE,
3229					 0, &expired);
3230	CU_ASSERT_EQUAL(r, 0);
3231	CU_ASSERT_EQUAL(expired, true);
3232
3233	/* verify if memset test result meets with expected */
3234	i = 0;
3235	while(i < bo_dst_size) {
3236		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3237	}
3238
3239	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3240	CU_ASSERT_EQUAL(r, 0);
3241
3242	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3243	CU_ASSERT_EQUAL(r, 0);
3244
3245	r = amdgpu_cs_ctx_free(context_handle);
3246	CU_ASSERT_EQUAL(r, 0);
3247}
3248
3249static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3250				    uint32_t ring)
3251{
3252	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3253	void *ptr_shader_ps;
3254	void *ptr_shader_vs;
3255	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3256	amdgpu_va_handle va_shader_ps, va_shader_vs;
3257	int r;
3258	int bo_shader_size = 4096;
3259
3260	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3261					AMDGPU_GEM_DOMAIN_VRAM, 0,
3262					&bo_shader_ps, &ptr_shader_ps,
3263					&mc_address_shader_ps, &va_shader_ps);
3264	CU_ASSERT_EQUAL(r, 0);
3265	memset(ptr_shader_ps, 0, bo_shader_size);
3266
3267	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3268					AMDGPU_GEM_DOMAIN_VRAM, 0,
3269					&bo_shader_vs, &ptr_shader_vs,
3270					&mc_address_shader_vs, &va_shader_vs);
3271	CU_ASSERT_EQUAL(r, 0);
3272	memset(ptr_shader_vs, 0, bo_shader_size);
3273
3274	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3275	CU_ASSERT_EQUAL(r, 0);
3276
3277	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3278	CU_ASSERT_EQUAL(r, 0);
3279
3280	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3281			mc_address_shader_ps, mc_address_shader_vs, ring);
3282
3283	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3284	CU_ASSERT_EQUAL(r, 0);
3285
3286	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3287	CU_ASSERT_EQUAL(r, 0);
3288}
3289
3290static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3291			       amdgpu_bo_handle bo_shader_ps,
3292			       amdgpu_bo_handle bo_shader_vs,
3293			       uint64_t mc_address_shader_ps,
3294			       uint64_t mc_address_shader_vs,
3295			       uint32_t ring, int hang)
3296{
3297	amdgpu_context_handle context_handle;
3298	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3299	volatile unsigned char *ptr_dst;
3300	unsigned char *ptr_src;
3301	uint32_t *ptr_cmd;
3302	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3303	amdgpu_va_handle va_dst, va_src, va_cmd;
3304	int i, r;
3305	int bo_size = 16384;
3306	int bo_cmd_size = 4096;
3307	struct amdgpu_cs_request ibs_request = {0};
3308	struct amdgpu_cs_ib_info ib_info= {0};
3309	uint32_t hang_state, hangs;
3310	uint32_t expired;
3311	amdgpu_bo_list_handle bo_list;
3312	struct amdgpu_cs_fence fence_status = {0};
3313
3314	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3315	CU_ASSERT_EQUAL(r, 0);
3316
3317	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3318				    AMDGPU_GEM_DOMAIN_GTT, 0,
3319				    &bo_cmd, (void **)&ptr_cmd,
3320				    &mc_address_cmd, &va_cmd);
3321	CU_ASSERT_EQUAL(r, 0);
3322	memset(ptr_cmd, 0, bo_cmd_size);
3323
3324	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3325					AMDGPU_GEM_DOMAIN_VRAM, 0,
3326					&bo_src, (void **)&ptr_src,
3327					&mc_address_src, &va_src);
3328	CU_ASSERT_EQUAL(r, 0);
3329
3330	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3331					AMDGPU_GEM_DOMAIN_VRAM, 0,
3332					&bo_dst, (void **)&ptr_dst,
3333					&mc_address_dst, &va_dst);
3334	CU_ASSERT_EQUAL(r, 0);
3335
3336	memset(ptr_src, 0x55, bo_size);
3337
3338	i = 0;
3339	i += amdgpu_draw_init(ptr_cmd + i);
3340
3341	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3342
3343	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3344
3345	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3346
3347	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3348
3349	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3350	ptr_cmd[i++] = 0xc;
3351	ptr_cmd[i++] = mc_address_src >> 8;
3352	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3353	ptr_cmd[i++] = 0x7c01f;
3354	ptr_cmd[i++] = 0x90500fac;
3355	ptr_cmd[i++] = 0x3e000;
3356	i += 3;
3357
3358	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3359	ptr_cmd[i++] = 0x14;
3360	ptr_cmd[i++] = 0x92;
3361	i += 3;
3362
3363	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3364	ptr_cmd[i++] = 0x191;
3365	ptr_cmd[i++] = 0;
3366
3367	i += amdgpu_draw_draw(ptr_cmd + i);
3368
3369	while (i & 7)
3370		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3371
3372	resources[0] = bo_dst;
3373	resources[1] = bo_src;
3374	resources[2] = bo_shader_ps;
3375	resources[3] = bo_shader_vs;
3376	resources[4] = bo_cmd;
3377	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3378	CU_ASSERT_EQUAL(r, 0);
3379
3380	ib_info.ib_mc_address = mc_address_cmd;
3381	ib_info.size = i;
3382	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3383	ibs_request.ring = ring;
3384	ibs_request.resources = bo_list;
3385	ibs_request.number_of_ibs = 1;
3386	ibs_request.ibs = &ib_info;
3387	ibs_request.fence_info.handle = NULL;
3388	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3389	CU_ASSERT_EQUAL(r, 0);
3390
3391	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3392	fence_status.ip_instance = 0;
3393	fence_status.ring = ring;
3394	fence_status.context = context_handle;
3395	fence_status.fence = ibs_request.seq_no;
3396
3397	/* wait for IB accomplished */
3398	r = amdgpu_cs_query_fence_status(&fence_status,
3399					 AMDGPU_TIMEOUT_INFINITE,
3400					 0, &expired);
3401	if (!hang) {
3402		CU_ASSERT_EQUAL(r, 0);
3403		CU_ASSERT_EQUAL(expired, true);
3404
3405		/* verify if memcpy test result meets with expected */
3406		i = 0;
3407		while(i < bo_size) {
3408			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3409			i++;
3410		}
3411	} else {
3412		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3413		CU_ASSERT_EQUAL(r, 0);
3414		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3415	}
3416
3417	r = amdgpu_bo_list_destroy(bo_list);
3418	CU_ASSERT_EQUAL(r, 0);
3419
3420	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3421	CU_ASSERT_EQUAL(r, 0);
3422	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3423	CU_ASSERT_EQUAL(r, 0);
3424
3425	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3426	CU_ASSERT_EQUAL(r, 0);
3427
3428	r = amdgpu_cs_ctx_free(context_handle);
3429	CU_ASSERT_EQUAL(r, 0);
3430}
3431
3432void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3433			     int hang)
3434{
3435	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3436	void *ptr_shader_ps;
3437	void *ptr_shader_vs;
3438	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3439	amdgpu_va_handle va_shader_ps, va_shader_vs;
3440	int bo_shader_size = 4096;
3441	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3442	int r;
3443
3444	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3445					AMDGPU_GEM_DOMAIN_VRAM, 0,
3446					&bo_shader_ps, &ptr_shader_ps,
3447					&mc_address_shader_ps, &va_shader_ps);
3448	CU_ASSERT_EQUAL(r, 0);
3449	memset(ptr_shader_ps, 0, bo_shader_size);
3450
3451	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3452					AMDGPU_GEM_DOMAIN_VRAM, 0,
3453					&bo_shader_vs, &ptr_shader_vs,
3454					&mc_address_shader_vs, &va_shader_vs);
3455	CU_ASSERT_EQUAL(r, 0);
3456	memset(ptr_shader_vs, 0, bo_shader_size);
3457
3458	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3459	CU_ASSERT_EQUAL(r, 0);
3460
3461	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3462	CU_ASSERT_EQUAL(r, 0);
3463
3464	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3465			mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3466
3467	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3468	CU_ASSERT_EQUAL(r, 0);
3469
3470	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3471	CU_ASSERT_EQUAL(r, 0);
3472}
3473
3474static void amdgpu_draw_test(void)
3475{
3476	int r;
3477	struct drm_amdgpu_info_hw_ip info;
3478	uint32_t ring_id;
3479
3480	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3481	CU_ASSERT_EQUAL(r, 0);
3482	if (!info.available_rings)
3483		printf("SKIP ... as there's no graphics ring\n");
3484
3485	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3486		amdgpu_memset_draw_test(device_handle, ring_id);
3487		amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3488	}
3489}
3490
3491void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3492{
3493	amdgpu_context_handle context_handle;
3494	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3495	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3496	void *ptr_shader_ps;
3497	void *ptr_shader_vs;
3498	volatile unsigned char *ptr_dst;
3499	unsigned char *ptr_src;
3500	uint32_t *ptr_cmd;
3501	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3502	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3503	amdgpu_va_handle va_shader_ps, va_shader_vs;
3504	amdgpu_va_handle va_dst, va_src, va_cmd;
3505	struct amdgpu_gpu_info gpu_info = {0};
3506	int i, r;
3507	int bo_size = 0x4000000;
3508	int bo_shader_ps_size = 0x400000;
3509	int bo_shader_vs_size = 4096;
3510	int bo_cmd_size = 4096;
3511	struct amdgpu_cs_request ibs_request = {0};
3512	struct amdgpu_cs_ib_info ib_info= {0};
3513	uint32_t hang_state, hangs, expired;
3514	amdgpu_bo_list_handle bo_list;
3515	struct amdgpu_cs_fence fence_status = {0};
3516
3517	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3518	CU_ASSERT_EQUAL(r, 0);
3519
3520	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3521	CU_ASSERT_EQUAL(r, 0);
3522
3523	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3524				    AMDGPU_GEM_DOMAIN_GTT, 0,
3525				    &bo_cmd, (void **)&ptr_cmd,
3526				    &mc_address_cmd, &va_cmd);
3527	CU_ASSERT_EQUAL(r, 0);
3528	memset(ptr_cmd, 0, bo_cmd_size);
3529
3530	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3531					AMDGPU_GEM_DOMAIN_VRAM, 0,
3532					&bo_shader_ps, &ptr_shader_ps,
3533					&mc_address_shader_ps, &va_shader_ps);
3534	CU_ASSERT_EQUAL(r, 0);
3535	memset(ptr_shader_ps, 0, bo_shader_ps_size);
3536
3537	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3538					AMDGPU_GEM_DOMAIN_VRAM, 0,
3539					&bo_shader_vs, &ptr_shader_vs,
3540					&mc_address_shader_vs, &va_shader_vs);
3541	CU_ASSERT_EQUAL(r, 0);
3542	memset(ptr_shader_vs, 0, bo_shader_vs_size);
3543
3544	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3545	CU_ASSERT_EQUAL(r, 0);
3546
3547	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3548	CU_ASSERT_EQUAL(r, 0);
3549
3550	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3551					AMDGPU_GEM_DOMAIN_VRAM, 0,
3552					&bo_src, (void **)&ptr_src,
3553					&mc_address_src, &va_src);
3554	CU_ASSERT_EQUAL(r, 0);
3555
3556	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3557					AMDGPU_GEM_DOMAIN_VRAM, 0,
3558					&bo_dst, (void **)&ptr_dst,
3559					&mc_address_dst, &va_dst);
3560	CU_ASSERT_EQUAL(r, 0);
3561
3562	memset(ptr_src, 0x55, bo_size);
3563
3564	i = 0;
3565	i += amdgpu_draw_init(ptr_cmd + i);
3566
3567	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3568
3569	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3570
3571	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3572							mc_address_shader_vs, 1);
3573
3574	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3575
3576	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3577	ptr_cmd[i++] = 0xc;
3578	ptr_cmd[i++] = mc_address_src >> 8;
3579	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3580	ptr_cmd[i++] = 0x1ffc7ff;
3581	ptr_cmd[i++] = 0x90500fac;
3582	ptr_cmd[i++] = 0xffe000;
3583	i += 3;
3584
3585	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3586	ptr_cmd[i++] = 0x14;
3587	ptr_cmd[i++] = 0x92;
3588	i += 3;
3589
3590	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3591	ptr_cmd[i++] = 0x191;
3592	ptr_cmd[i++] = 0;
3593
3594	i += amdgpu_draw_draw(ptr_cmd + i);
3595
3596	while (i & 7)
3597		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3598
3599	resources[0] = bo_dst;
3600	resources[1] = bo_src;
3601	resources[2] = bo_shader_ps;
3602	resources[3] = bo_shader_vs;
3603	resources[4] = bo_cmd;
3604	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3605	CU_ASSERT_EQUAL(r, 0);
3606
3607	ib_info.ib_mc_address = mc_address_cmd;
3608	ib_info.size = i;
3609	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3610	ibs_request.ring = ring;
3611	ibs_request.resources = bo_list;
3612	ibs_request.number_of_ibs = 1;
3613	ibs_request.ibs = &ib_info;
3614	ibs_request.fence_info.handle = NULL;
3615	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3616	CU_ASSERT_EQUAL(r, 0);
3617
3618	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3619	fence_status.ip_instance = 0;
3620	fence_status.ring = ring;
3621	fence_status.context = context_handle;
3622	fence_status.fence = ibs_request.seq_no;
3623
3624	/* wait for IB accomplished */
3625	r = amdgpu_cs_query_fence_status(&fence_status,
3626					 AMDGPU_TIMEOUT_INFINITE,
3627					 0, &expired);
3628
3629	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3630	CU_ASSERT_EQUAL(r, 0);
3631	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3632
3633	r = amdgpu_bo_list_destroy(bo_list);
3634	CU_ASSERT_EQUAL(r, 0);
3635
3636	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3637	CU_ASSERT_EQUAL(r, 0);
3638	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3639	CU_ASSERT_EQUAL(r, 0);
3640
3641	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3642	CU_ASSERT_EQUAL(r, 0);
3643
3644	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3645	CU_ASSERT_EQUAL(r, 0);
3646	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3647	CU_ASSERT_EQUAL(r, 0);
3648
3649	r = amdgpu_cs_ctx_free(context_handle);
3650	CU_ASSERT_EQUAL(r, 0);
3651}
3652
3653static void amdgpu_gpu_reset_test(void)
3654{
3655	int r;
3656	char debugfs_path[256], tmp[10];
3657	int fd;
3658	struct stat sbuf;
3659	amdgpu_context_handle context_handle;
3660	uint32_t hang_state, hangs;
3661
3662	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3663	CU_ASSERT_EQUAL(r, 0);
3664
3665	r = fstat(drm_amdgpu[0], &sbuf);
3666	CU_ASSERT_EQUAL(r, 0);
3667
3668	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3669	fd = open(debugfs_path, O_RDONLY);
3670	CU_ASSERT(fd >= 0);
3671
3672	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3673	CU_ASSERT(r > 0);
3674
3675	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3676	CU_ASSERT_EQUAL(r, 0);
3677	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3678
3679	close(fd);
3680	r = amdgpu_cs_ctx_free(context_handle);
3681	CU_ASSERT_EQUAL(r, 0);
3682
3683	amdgpu_compute_dispatch_test();
3684	amdgpu_gfx_dispatch_test();
3685}
3686