basic_tests.c revision 4babd585
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24#include <stdio.h>
25#include <stdlib.h>
26#include <unistd.h>
27#include <sys/types.h>
28#ifdef MAJOR_IN_SYSMACROS
29#include <sys/sysmacros.h>
30#endif
31#include <sys/stat.h>
32#include <fcntl.h>
33#if HAVE_ALLOCA_H
34# include <alloca.h>
35#endif
36#include <sys/wait.h>
37
38#include "CUnit/Basic.h"
39
40#include "amdgpu_test.h"
41#include "amdgpu_drm.h"
42#include "amdgpu_internal.h"
43#include "util_math.h"
44
45static  amdgpu_device_handle device_handle;
46static  uint32_t  major_version;
47static  uint32_t  minor_version;
48static  uint32_t  family_id;
49static  uint32_t  chip_id;
50static  uint32_t  chip_rev;
51
52static void amdgpu_query_info_test(void);
53static void amdgpu_command_submission_gfx(void);
54static void amdgpu_command_submission_compute(void);
55static void amdgpu_command_submission_multi_fence(void);
56static void amdgpu_command_submission_sdma(void);
57static void amdgpu_userptr_test(void);
58static void amdgpu_semaphore_test(void);
59static void amdgpu_sync_dependency_test(void);
60static void amdgpu_bo_eviction_test(void);
61static void amdgpu_compute_dispatch_test(void);
62static void amdgpu_gfx_dispatch_test(void);
63static void amdgpu_draw_test(void);
64static void amdgpu_gpu_reset_test(void);
65
66static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
67static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
68static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
69static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
70				       unsigned ip_type,
71				       int instance, int pm4_dw, uint32_t *pm4_src,
72				       int res_cnt, amdgpu_bo_handle *resources,
73				       struct amdgpu_cs_ib_info *ib_info,
74				       struct amdgpu_cs_request *ibs_request);
75
76CU_TestInfo basic_tests[] = {
77	{ "Query Info Test",  amdgpu_query_info_test },
78	{ "Userptr Test",  amdgpu_userptr_test },
79	{ "bo eviction Test",  amdgpu_bo_eviction_test },
80	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
81	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
82	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
83	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
84	{ "SW semaphore Test",  amdgpu_semaphore_test },
85	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
86	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
87	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
88	{ "Draw Test",  amdgpu_draw_test },
89	{ "GPU reset Test", amdgpu_gpu_reset_test },
90	CU_TEST_INFO_NULL,
91};
92#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
93#define SDMA_PKT_HEADER_op_offset 0
94#define SDMA_PKT_HEADER_op_mask   0x000000FF
95#define SDMA_PKT_HEADER_op_shift  0
96#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
97#define SDMA_OPCODE_CONSTANT_FILL  11
98#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
99	/* 0 = byte fill
100	 * 2 = DW fill
101	 */
102#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
103					(((sub_op) & 0xFF) << 8) |	\
104					(((op) & 0xFF) << 0))
105#define	SDMA_OPCODE_WRITE				  2
106#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
107#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
108
109#define	SDMA_OPCODE_COPY				  1
110#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
111
112#define	SDMA_OPCODE_ATOMIC				  10
113#		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
114        /* 0 - single_pass_atomic.
115         * 1 - loop_until_compare_satisfied.
116         */
117#		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
118		/* 0 - non-TMZ.
119		 * 1 - TMZ.
120	     */
121#		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
122		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
123		 * same as Packet 3
124		 */
125
126#define GFX_COMPUTE_NOP  0xffff1000
127#define SDMA_NOP  0x0
128
129/* PM4 */
130#define	PACKET_TYPE0	0
131#define	PACKET_TYPE1	1
132#define	PACKET_TYPE2	2
133#define	PACKET_TYPE3	3
134
135#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
136#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
137#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
138#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
139#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
140			 ((reg) & 0xFFFF) |			\
141			 ((n) & 0x3FFF) << 16)
142#define CP_PACKET2			0x80000000
143#define		PACKET2_PAD_SHIFT		0
144#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
145
146#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
147
148#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
149			 (((op) & 0xFF) << 8) |				\
150			 ((n) & 0x3FFF) << 16)
151#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
152
153/* Packet 3 types */
154#define	PACKET3_NOP					0x10
155
156#define	PACKET3_WRITE_DATA				0x37
157#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
158		/* 0 - register
159		 * 1 - memory (sync - via GRBM)
160		 * 2 - gl2
161		 * 3 - gds
162		 * 4 - reserved
163		 * 5 - memory (async - direct)
164		 */
165#define		WR_ONE_ADDR                             (1 << 16)
166#define		WR_CONFIRM                              (1 << 20)
167#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
168		/* 0 - LRU
169		 * 1 - Stream
170		 */
171#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
172		/* 0 - me
173		 * 1 - pfp
174		 * 2 - ce
175		 */
176
177#define	PACKET3_ATOMIC_MEM				0x1E
178#define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
179#define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
180            /* 0 - single_pass_atomic.
181             * 1 - loop_until_compare_satisfied.
182             */
183#define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
184            /* 0 - lru.
185             * 1 - stream.
186             */
187#define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
188            /* 0 - micro_engine.
189			 */
190
191#define	PACKET3_DMA_DATA				0x50
192/* 1. header
193 * 2. CONTROL
194 * 3. SRC_ADDR_LO or DATA [31:0]
195 * 4. SRC_ADDR_HI [31:0]
196 * 5. DST_ADDR_LO [31:0]
197 * 6. DST_ADDR_HI [7:0]
198 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
199 */
200/* CONTROL */
201#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
202		/* 0 - ME
203		 * 1 - PFP
204		 */
205#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
206		/* 0 - LRU
207		 * 1 - Stream
208		 * 2 - Bypass
209		 */
210#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
211#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
212		/* 0 - DST_ADDR using DAS
213		 * 1 - GDS
214		 * 3 - DST_ADDR using L2
215		 */
216#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
217		/* 0 - LRU
218		 * 1 - Stream
219		 * 2 - Bypass
220		 */
221#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
222#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
223		/* 0 - SRC_ADDR using SAS
224		 * 1 - GDS
225		 * 2 - DATA
226		 * 3 - SRC_ADDR using L2
227		 */
228#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
229/* COMMAND */
230#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
231#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
232		/* 0 - none
233		 * 1 - 8 in 16
234		 * 2 - 8 in 32
235		 * 3 - 8 in 64
236		 */
237#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
238		/* 0 - none
239		 * 1 - 8 in 16
240		 * 2 - 8 in 32
241		 * 3 - 8 in 64
242		 */
243#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
244		/* 0 - memory
245		 * 1 - register
246		 */
247#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
248		/* 0 - memory
249		 * 1 - register
250		 */
251#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
252#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
253#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
254
255#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
256						(((b) & 0x1) << 26) |		\
257						(((t) & 0x1) << 23) |		\
258						(((s) & 0x1) << 22) |		\
259						(((cnt) & 0xFFFFF) << 0))
260#define	SDMA_OPCODE_COPY_SI	3
261#define SDMA_OPCODE_CONSTANT_FILL_SI	13
262#define SDMA_NOP_SI  0xf
263#define GFX_COMPUTE_NOP_SI 0x80000000
264#define	PACKET3_DMA_DATA_SI	0x41
265#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
266		/* 0 - ME
267		 * 1 - PFP
268		 */
269#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
270		/* 0 - DST_ADDR using DAS
271		 * 1 - GDS
272		 * 3 - DST_ADDR using L2
273		 */
274#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
275		/* 0 - SRC_ADDR using SAS
276		 * 1 - GDS
277		 * 2 - DATA
278		 * 3 - SRC_ADDR using L2
279		 */
280#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
281
282
283#define PKT3_CONTEXT_CONTROL                   0x28
284#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
285#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
286#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
287
288#define PKT3_CLEAR_STATE                       0x12
289
290#define PKT3_SET_SH_REG                        0x76
291#define		PACKET3_SET_SH_REG_START			0x00002c00
292
293#define	PACKET3_DISPATCH_DIRECT				0x15
294#define PACKET3_EVENT_WRITE				0x46
295#define PACKET3_ACQUIRE_MEM				0x58
296#define PACKET3_SET_CONTEXT_REG				0x69
297#define PACKET3_SET_UCONFIG_REG				0x79
298#define PACKET3_DRAW_INDEX_AUTO				0x2D
299/* gfx 8 */
300#define mmCOMPUTE_PGM_LO                                                        0x2e0c
301#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
302#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
303#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
304#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
305#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
306#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
307
308
309
310#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
311		      ((num & 0x0000ff00) << 8) | \
312		      ((num & 0x00ff0000) >> 8) | \
313		      ((num & 0x000000ff) << 24))
314
315
316/* Shader code
317 * void main()
318{
319
320	float x = some_input;
321		for (unsigned i = 0; i < 1000000; i++)
322  	x = sin(x);
323
324	u[0] = 42u;
325}
326*/
327
328static  uint32_t shader_bin[] = {
329	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
330	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
331	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
332	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
333};
334
335#define CODE_OFFSET 512
336#define DATA_OFFSET 1024
337
338enum cs_type {
339	CS_BUFFERCLEAR,
340	CS_BUFFERCOPY,
341	CS_HANG,
342	CS_HANG_SLOW
343};
344
345static const uint32_t bufferclear_cs_shader_gfx9[] = {
346    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
347    0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
348    0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
349    0xbf810000
350};
351
352static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
353	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
354	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
355	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
356	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
357	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
358};
359
360static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
361
362static const uint32_t buffercopy_cs_shader_gfx9[] = {
363    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
364    0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
365    0xe01c2000, 0x80010200, 0xbf810000
366};
367
368static const uint32_t preamblecache_gfx9[] = {
369	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
370	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
371	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
372	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
373	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
374	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
375	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
376	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
377	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
378	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
379	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
380	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
381	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
382	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
383	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
384	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
385	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
386	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
387	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
388	0xc0017900, 0x24b, 0x0
389};
390
391enum ps_type {
392	PS_CONST,
393	PS_TEX,
394	PS_HANG,
395	PS_HANG_SLOW
396};
397
398static const uint32_t ps_const_shader_gfx9[] = {
399    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
400    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
401    0xC4001C0F, 0x00000100, 0xBF810000
402};
403
404static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
405
406static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
407    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
408     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
409     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
410     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
411     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
412     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
413     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
414     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
415     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
416     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
417    }
418};
419
420static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
421    0x00000004
422};
423
424static const uint32_t ps_num_sh_registers_gfx9 = 2;
425
426static const uint32_t ps_const_sh_registers_gfx9[][2] = {
427    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
428    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
429};
430
431static const uint32_t ps_num_context_registers_gfx9 = 7;
432
433static const uint32_t ps_const_context_reg_gfx9[][2] = {
434    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
435    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
436    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
437    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
438    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
439    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
440    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
441};
442
443static const uint32_t ps_tex_shader_gfx9[] = {
444    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
445    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
446    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
447    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
448    0x00000100, 0xBF810000
449};
450
451static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
452    0x0000000B
453};
454
455static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
456
457static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
458    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
459     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
460     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
461     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
462     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
463     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
464     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
465     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
466     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
467     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
468    }
469};
470
471static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
472    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
473    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
474};
475
476static const uint32_t ps_tex_context_reg_gfx9[][2] = {
477    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
478    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
479    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
480    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
481    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
482    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
483    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
484};
485
486static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
487    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
488    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
489    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
490    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
491    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
492    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
493    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
494    0xC400020F, 0x05060403, 0xBF810000
495};
496
497static const uint32_t cached_cmd_gfx9[] = {
498	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
499	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
500	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
501	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
502	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
503	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
504	0xc0026900, 0x292, 0x20, 0x60201b8,
505	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
506};
507
508unsigned int memcpy_ps_hang[] = {
509        0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
510        0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
511        0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
512        0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
513        0xF800180F, 0x03020100, 0xBF810000
514};
515
516struct amdgpu_test_shader {
517	uint32_t *shader;
518	uint32_t header_length;
519	uint32_t body_length;
520	uint32_t foot_length;
521};
522
523unsigned int memcpy_cs_hang_slow_ai_codes[] = {
524    0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
525    0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
526};
527
528struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
529        memcpy_cs_hang_slow_ai_codes,
530        4,
531        3,
532        1
533};
534
535unsigned int memcpy_cs_hang_slow_rv_codes[] = {
536    0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
537    0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
538};
539
540struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
541        memcpy_cs_hang_slow_rv_codes,
542        4,
543        3,
544        1
545};
546
547unsigned int memcpy_ps_hang_slow_ai_codes[] = {
548        0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
549        0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
550        0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
551        0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
552        0x03020100, 0xbf810000
553};
554
555struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
556        memcpy_ps_hang_slow_ai_codes,
557        7,
558        2,
559        9
560};
561
562int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
563			unsigned alignment, unsigned heap, uint64_t alloc_flags,
564			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
565			uint64_t *mc_address,
566			amdgpu_va_handle *va_handle)
567{
568	struct amdgpu_bo_alloc_request request = {};
569	amdgpu_bo_handle buf_handle;
570	amdgpu_va_handle handle;
571	uint64_t vmc_addr;
572	int r;
573
574	request.alloc_size = size;
575	request.phys_alignment = alignment;
576	request.preferred_heap = heap;
577	request.flags = alloc_flags;
578
579	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
580	if (r)
581		return r;
582
583	r = amdgpu_va_range_alloc(dev,
584				  amdgpu_gpu_va_range_general,
585				  size, alignment, 0, &vmc_addr,
586				  &handle, 0);
587	if (r)
588		goto error_va_alloc;
589
590	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
591				   AMDGPU_VM_PAGE_READABLE |
592				   AMDGPU_VM_PAGE_WRITEABLE |
593				   AMDGPU_VM_PAGE_EXECUTABLE |
594				   mapping_flags,
595				   AMDGPU_VA_OP_MAP);
596	if (r)
597		goto error_va_map;
598
599	r = amdgpu_bo_cpu_map(buf_handle, cpu);
600	if (r)
601		goto error_cpu_map;
602
603	*bo = buf_handle;
604	*mc_address = vmc_addr;
605	*va_handle = handle;
606
607	return 0;
608
609 error_cpu_map:
610	amdgpu_bo_cpu_unmap(buf_handle);
611
612 error_va_map:
613	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
614
615 error_va_alloc:
616	amdgpu_bo_free(buf_handle);
617	return r;
618}
619
620
621
622CU_BOOL suite_basic_tests_enable(void)
623{
624
625	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
626					     &minor_version, &device_handle))
627		return CU_FALSE;
628
629
630	family_id = device_handle->info.family_id;
631	chip_id = device_handle->info.chip_external_rev;
632	chip_rev = device_handle->info.chip_rev;
633
634	if (amdgpu_device_deinitialize(device_handle))
635		return CU_FALSE;
636
637	/* disable gfx engine basic test cases for some asics have no CPG */
638	if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
639		if (amdgpu_set_test_active("Basic Tests",
640					"Command submission Test (GFX)",
641					CU_FALSE))
642			fprintf(stderr, "test deactivation failed - %s\n",
643				CU_get_error_msg());
644
645		if (amdgpu_set_test_active("Basic Tests",
646					"Command submission Test (Multi-Fence)",
647					CU_FALSE))
648			fprintf(stderr, "test deactivation failed - %s\n",
649				CU_get_error_msg());
650
651		if (amdgpu_set_test_active("Basic Tests",
652					"Sync dependency Test",
653					CU_FALSE))
654			fprintf(stderr, "test deactivation failed - %s\n",
655				CU_get_error_msg());
656	}
657
658	return CU_TRUE;
659}
660
661int suite_basic_tests_init(void)
662{
663	struct amdgpu_gpu_info gpu_info = {0};
664	int r;
665
666	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
667				   &minor_version, &device_handle);
668
669	if (r) {
670		if ((r == -EACCES) && (errno == EACCES))
671			printf("\n\nError:%s. "
672				"Hint:Try to run this test program as root.",
673				strerror(errno));
674		return CUE_SINIT_FAILED;
675	}
676
677	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
678	if (r)
679		return CUE_SINIT_FAILED;
680
681	family_id = gpu_info.family_id;
682
683	return CUE_SUCCESS;
684}
685
686int suite_basic_tests_clean(void)
687{
688	int r = amdgpu_device_deinitialize(device_handle);
689
690	if (r == 0)
691		return CUE_SUCCESS;
692	else
693		return CUE_SCLEAN_FAILED;
694}
695
696static void amdgpu_query_info_test(void)
697{
698	struct amdgpu_gpu_info gpu_info = {0};
699	uint32_t version, feature;
700	int r;
701
702	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
703	CU_ASSERT_EQUAL(r, 0);
704
705	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
706					  0, &version, &feature);
707	CU_ASSERT_EQUAL(r, 0);
708}
709
710static void amdgpu_command_submission_gfx_separate_ibs(void)
711{
712	amdgpu_context_handle context_handle;
713	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
714	void *ib_result_cpu, *ib_result_ce_cpu;
715	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
716	struct amdgpu_cs_request ibs_request = {0};
717	struct amdgpu_cs_ib_info ib_info[2];
718	struct amdgpu_cs_fence fence_status = {0};
719	uint32_t *ptr;
720	uint32_t expired;
721	amdgpu_bo_list_handle bo_list;
722	amdgpu_va_handle va_handle, va_handle_ce;
723	int r, i = 0;
724
725	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
726	CU_ASSERT_EQUAL(r, 0);
727
728	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
729				    AMDGPU_GEM_DOMAIN_GTT, 0,
730				    &ib_result_handle, &ib_result_cpu,
731				    &ib_result_mc_address, &va_handle);
732	CU_ASSERT_EQUAL(r, 0);
733
734	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
735				    AMDGPU_GEM_DOMAIN_GTT, 0,
736				    &ib_result_ce_handle, &ib_result_ce_cpu,
737				    &ib_result_ce_mc_address, &va_handle_ce);
738	CU_ASSERT_EQUAL(r, 0);
739
740	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
741			       ib_result_ce_handle, &bo_list);
742	CU_ASSERT_EQUAL(r, 0);
743
744	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
745
746	/* IT_SET_CE_DE_COUNTERS */
747	ptr = ib_result_ce_cpu;
748	if (family_id != AMDGPU_FAMILY_SI) {
749		ptr[i++] = 0xc0008900;
750		ptr[i++] = 0;
751	}
752	ptr[i++] = 0xc0008400;
753	ptr[i++] = 1;
754	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
755	ib_info[0].size = i;
756	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
757
758	/* IT_WAIT_ON_CE_COUNTER */
759	ptr = ib_result_cpu;
760	ptr[0] = 0xc0008600;
761	ptr[1] = 0x00000001;
762	ib_info[1].ib_mc_address = ib_result_mc_address;
763	ib_info[1].size = 2;
764
765	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
766	ibs_request.number_of_ibs = 2;
767	ibs_request.ibs = ib_info;
768	ibs_request.resources = bo_list;
769	ibs_request.fence_info.handle = NULL;
770
771	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
772
773	CU_ASSERT_EQUAL(r, 0);
774
775	fence_status.context = context_handle;
776	fence_status.ip_type = AMDGPU_HW_IP_GFX;
777	fence_status.ip_instance = 0;
778	fence_status.fence = ibs_request.seq_no;
779
780	r = amdgpu_cs_query_fence_status(&fence_status,
781					 AMDGPU_TIMEOUT_INFINITE,
782					 0, &expired);
783	CU_ASSERT_EQUAL(r, 0);
784
785	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
786				     ib_result_mc_address, 4096);
787	CU_ASSERT_EQUAL(r, 0);
788
789	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
790				     ib_result_ce_mc_address, 4096);
791	CU_ASSERT_EQUAL(r, 0);
792
793	r = amdgpu_bo_list_destroy(bo_list);
794	CU_ASSERT_EQUAL(r, 0);
795
796	r = amdgpu_cs_ctx_free(context_handle);
797	CU_ASSERT_EQUAL(r, 0);
798
799}
800
801static void amdgpu_command_submission_gfx_shared_ib(void)
802{
803	amdgpu_context_handle context_handle;
804	amdgpu_bo_handle ib_result_handle;
805	void *ib_result_cpu;
806	uint64_t ib_result_mc_address;
807	struct amdgpu_cs_request ibs_request = {0};
808	struct amdgpu_cs_ib_info ib_info[2];
809	struct amdgpu_cs_fence fence_status = {0};
810	uint32_t *ptr;
811	uint32_t expired;
812	amdgpu_bo_list_handle bo_list;
813	amdgpu_va_handle va_handle;
814	int r, i = 0;
815
816	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
817	CU_ASSERT_EQUAL(r, 0);
818
819	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
820				    AMDGPU_GEM_DOMAIN_GTT, 0,
821				    &ib_result_handle, &ib_result_cpu,
822				    &ib_result_mc_address, &va_handle);
823	CU_ASSERT_EQUAL(r, 0);
824
825	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
826			       &bo_list);
827	CU_ASSERT_EQUAL(r, 0);
828
829	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
830
831	/* IT_SET_CE_DE_COUNTERS */
832	ptr = ib_result_cpu;
833	if (family_id != AMDGPU_FAMILY_SI) {
834		ptr[i++] = 0xc0008900;
835		ptr[i++] = 0;
836	}
837	ptr[i++] = 0xc0008400;
838	ptr[i++] = 1;
839	ib_info[0].ib_mc_address = ib_result_mc_address;
840	ib_info[0].size = i;
841	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
842
843	ptr = (uint32_t *)ib_result_cpu + 4;
844	ptr[0] = 0xc0008600;
845	ptr[1] = 0x00000001;
846	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
847	ib_info[1].size = 2;
848
849	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
850	ibs_request.number_of_ibs = 2;
851	ibs_request.ibs = ib_info;
852	ibs_request.resources = bo_list;
853	ibs_request.fence_info.handle = NULL;
854
855	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
856
857	CU_ASSERT_EQUAL(r, 0);
858
859	fence_status.context = context_handle;
860	fence_status.ip_type = AMDGPU_HW_IP_GFX;
861	fence_status.ip_instance = 0;
862	fence_status.fence = ibs_request.seq_no;
863
864	r = amdgpu_cs_query_fence_status(&fence_status,
865					 AMDGPU_TIMEOUT_INFINITE,
866					 0, &expired);
867	CU_ASSERT_EQUAL(r, 0);
868
869	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
870				     ib_result_mc_address, 4096);
871	CU_ASSERT_EQUAL(r, 0);
872
873	r = amdgpu_bo_list_destroy(bo_list);
874	CU_ASSERT_EQUAL(r, 0);
875
876	r = amdgpu_cs_ctx_free(context_handle);
877	CU_ASSERT_EQUAL(r, 0);
878}
879
880static void amdgpu_command_submission_gfx_cp_write_data(void)
881{
882	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
883}
884
885static void amdgpu_command_submission_gfx_cp_const_fill(void)
886{
887	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
888}
889
890static void amdgpu_command_submission_gfx_cp_copy_data(void)
891{
892	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
893}
894
895static void amdgpu_bo_eviction_test(void)
896{
897	const int sdma_write_length = 1024;
898	const int pm4_dw = 256;
899	amdgpu_context_handle context_handle;
900	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
901	amdgpu_bo_handle *resources;
902	uint32_t *pm4;
903	struct amdgpu_cs_ib_info *ib_info;
904	struct amdgpu_cs_request *ibs_request;
905	uint64_t bo1_mc, bo2_mc;
906	volatile unsigned char *bo1_cpu, *bo2_cpu;
907	int i, j, r, loop1, loop2;
908	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
909	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
910	struct amdgpu_heap_info vram_info, gtt_info;
911
912	pm4 = calloc(pm4_dw, sizeof(*pm4));
913	CU_ASSERT_NOT_EQUAL(pm4, NULL);
914
915	ib_info = calloc(1, sizeof(*ib_info));
916	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
917
918	ibs_request = calloc(1, sizeof(*ibs_request));
919	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
920
921	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
922	CU_ASSERT_EQUAL(r, 0);
923
924	/* prepare resource */
925	resources = calloc(4, sizeof(amdgpu_bo_handle));
926	CU_ASSERT_NOT_EQUAL(resources, NULL);
927
928	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
929				   0, &vram_info);
930	CU_ASSERT_EQUAL(r, 0);
931
932	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
933				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
934	CU_ASSERT_EQUAL(r, 0);
935	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
936				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
937	CU_ASSERT_EQUAL(r, 0);
938
939	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
940				   0, &gtt_info);
941	CU_ASSERT_EQUAL(r, 0);
942
943	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
944				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
945	CU_ASSERT_EQUAL(r, 0);
946	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
947				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
948	CU_ASSERT_EQUAL(r, 0);
949
950
951
952	loop1 = loop2 = 0;
953	/* run 9 circle to test all mapping combination */
954	while(loop1 < 2) {
955		while(loop2 < 2) {
956			/* allocate UC bo1for sDMA use */
957			r = amdgpu_bo_alloc_and_map(device_handle,
958						    sdma_write_length, 4096,
959						    AMDGPU_GEM_DOMAIN_GTT,
960						    gtt_flags[loop1], &bo1,
961						    (void**)&bo1_cpu, &bo1_mc,
962						    &bo1_va_handle);
963			CU_ASSERT_EQUAL(r, 0);
964
965			/* set bo1 */
966			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
967
968			/* allocate UC bo2 for sDMA use */
969			r = amdgpu_bo_alloc_and_map(device_handle,
970						    sdma_write_length, 4096,
971						    AMDGPU_GEM_DOMAIN_GTT,
972						    gtt_flags[loop2], &bo2,
973						    (void**)&bo2_cpu, &bo2_mc,
974						    &bo2_va_handle);
975			CU_ASSERT_EQUAL(r, 0);
976
977			/* clear bo2 */
978			memset((void*)bo2_cpu, 0, sdma_write_length);
979
980			resources[0] = bo1;
981			resources[1] = bo2;
982			resources[2] = vram_max[loop2];
983			resources[3] = gtt_max[loop2];
984
985			/* fulfill PM4: test DMA copy linear */
986			i = j = 0;
987			if (family_id == AMDGPU_FAMILY_SI) {
988				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
989							  sdma_write_length);
990				pm4[i++] = 0xffffffff & bo2_mc;
991				pm4[i++] = 0xffffffff & bo1_mc;
992				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
993				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
994			} else {
995				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
996				if (family_id >= AMDGPU_FAMILY_AI)
997					pm4[i++] = sdma_write_length - 1;
998				else
999					pm4[i++] = sdma_write_length;
1000				pm4[i++] = 0;
1001				pm4[i++] = 0xffffffff & bo1_mc;
1002				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1003				pm4[i++] = 0xffffffff & bo2_mc;
1004				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1005			}
1006
1007			amdgpu_test_exec_cs_helper(context_handle,
1008						   AMDGPU_HW_IP_DMA, 0,
1009						   i, pm4,
1010						   4, resources,
1011						   ib_info, ibs_request);
1012
1013			/* verify if SDMA test result meets with expected */
1014			i = 0;
1015			while(i < sdma_write_length) {
1016				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1017			}
1018			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1019						     sdma_write_length);
1020			CU_ASSERT_EQUAL(r, 0);
1021			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1022						     sdma_write_length);
1023			CU_ASSERT_EQUAL(r, 0);
1024			loop2++;
1025		}
1026		loop2 = 0;
1027		loop1++;
1028	}
1029	amdgpu_bo_free(vram_max[0]);
1030	amdgpu_bo_free(vram_max[1]);
1031	amdgpu_bo_free(gtt_max[0]);
1032	amdgpu_bo_free(gtt_max[1]);
1033	/* clean resources */
1034	free(resources);
1035	free(ibs_request);
1036	free(ib_info);
1037	free(pm4);
1038
1039	/* end of test */
1040	r = amdgpu_cs_ctx_free(context_handle);
1041	CU_ASSERT_EQUAL(r, 0);
1042}
1043
1044
1045static void amdgpu_command_submission_gfx(void)
1046{
1047	/* write data using the CP */
1048	amdgpu_command_submission_gfx_cp_write_data();
1049	/* const fill using the CP */
1050	amdgpu_command_submission_gfx_cp_const_fill();
1051	/* copy data using the CP */
1052	amdgpu_command_submission_gfx_cp_copy_data();
1053	/* separate IB buffers for multi-IB submission */
1054	amdgpu_command_submission_gfx_separate_ibs();
1055	/* shared IB buffer for multi-IB submission */
1056	amdgpu_command_submission_gfx_shared_ib();
1057}
1058
1059static void amdgpu_semaphore_test(void)
1060{
1061	amdgpu_context_handle context_handle[2];
1062	amdgpu_semaphore_handle sem;
1063	amdgpu_bo_handle ib_result_handle[2];
1064	void *ib_result_cpu[2];
1065	uint64_t ib_result_mc_address[2];
1066	struct amdgpu_cs_request ibs_request[2] = {0};
1067	struct amdgpu_cs_ib_info ib_info[2] = {0};
1068	struct amdgpu_cs_fence fence_status = {0};
1069	uint32_t *ptr;
1070	uint32_t expired;
1071	uint32_t sdma_nop, gfx_nop;
1072	amdgpu_bo_list_handle bo_list[2];
1073	amdgpu_va_handle va_handle[2];
1074	int r, i;
1075	struct amdgpu_gpu_info gpu_info = {0};
1076	unsigned gc_ip_type;
1077
1078	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
1079	CU_ASSERT_EQUAL(r, 0);
1080
1081	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
1082			AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
1083
1084	if (family_id == AMDGPU_FAMILY_SI) {
1085		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1086		gfx_nop = GFX_COMPUTE_NOP_SI;
1087	} else {
1088		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1089		gfx_nop = GFX_COMPUTE_NOP;
1090	}
1091
1092	r = amdgpu_cs_create_semaphore(&sem);
1093	CU_ASSERT_EQUAL(r, 0);
1094	for (i = 0; i < 2; i++) {
1095		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1096		CU_ASSERT_EQUAL(r, 0);
1097
1098		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1099					    AMDGPU_GEM_DOMAIN_GTT, 0,
1100					    &ib_result_handle[i], &ib_result_cpu[i],
1101					    &ib_result_mc_address[i], &va_handle[i]);
1102		CU_ASSERT_EQUAL(r, 0);
1103
1104		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1105				       NULL, &bo_list[i]);
1106		CU_ASSERT_EQUAL(r, 0);
1107	}
1108
1109	/* 1. same context different engine */
1110	ptr = ib_result_cpu[0];
1111	ptr[0] = sdma_nop;
1112	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1113	ib_info[0].size = 1;
1114
1115	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1116	ibs_request[0].number_of_ibs = 1;
1117	ibs_request[0].ibs = &ib_info[0];
1118	ibs_request[0].resources = bo_list[0];
1119	ibs_request[0].fence_info.handle = NULL;
1120	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1121	CU_ASSERT_EQUAL(r, 0);
1122	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1123	CU_ASSERT_EQUAL(r, 0);
1124
1125	r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1126	CU_ASSERT_EQUAL(r, 0);
1127	ptr = ib_result_cpu[1];
1128	ptr[0] = gfx_nop;
1129	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1130	ib_info[1].size = 1;
1131
1132	ibs_request[1].ip_type = gc_ip_type;
1133	ibs_request[1].number_of_ibs = 1;
1134	ibs_request[1].ibs = &ib_info[1];
1135	ibs_request[1].resources = bo_list[1];
1136	ibs_request[1].fence_info.handle = NULL;
1137
1138	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1139	CU_ASSERT_EQUAL(r, 0);
1140
1141	fence_status.context = context_handle[0];
1142	fence_status.ip_type = gc_ip_type;
1143	fence_status.ip_instance = 0;
1144	fence_status.fence = ibs_request[1].seq_no;
1145	r = amdgpu_cs_query_fence_status(&fence_status,
1146					 500000000, 0, &expired);
1147	CU_ASSERT_EQUAL(r, 0);
1148	CU_ASSERT_EQUAL(expired, true);
1149
1150	/* 2. same engine different context */
1151	ptr = ib_result_cpu[0];
1152	ptr[0] = gfx_nop;
1153	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1154	ib_info[0].size = 1;
1155
1156	ibs_request[0].ip_type = gc_ip_type;
1157	ibs_request[0].number_of_ibs = 1;
1158	ibs_request[0].ibs = &ib_info[0];
1159	ibs_request[0].resources = bo_list[0];
1160	ibs_request[0].fence_info.handle = NULL;
1161	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1162	CU_ASSERT_EQUAL(r, 0);
1163	r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1164	CU_ASSERT_EQUAL(r, 0);
1165
1166	r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
1167	CU_ASSERT_EQUAL(r, 0);
1168	ptr = ib_result_cpu[1];
1169	ptr[0] = gfx_nop;
1170	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1171	ib_info[1].size = 1;
1172
1173	ibs_request[1].ip_type = gc_ip_type;
1174	ibs_request[1].number_of_ibs = 1;
1175	ibs_request[1].ibs = &ib_info[1];
1176	ibs_request[1].resources = bo_list[1];
1177	ibs_request[1].fence_info.handle = NULL;
1178	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1179
1180	CU_ASSERT_EQUAL(r, 0);
1181
1182	fence_status.context = context_handle[1];
1183	fence_status.ip_type = gc_ip_type;
1184	fence_status.ip_instance = 0;
1185	fence_status.fence = ibs_request[1].seq_no;
1186	r = amdgpu_cs_query_fence_status(&fence_status,
1187					 500000000, 0, &expired);
1188	CU_ASSERT_EQUAL(r, 0);
1189	CU_ASSERT_EQUAL(expired, true);
1190
1191	for (i = 0; i < 2; i++) {
1192		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1193					     ib_result_mc_address[i], 4096);
1194		CU_ASSERT_EQUAL(r, 0);
1195
1196		r = amdgpu_bo_list_destroy(bo_list[i]);
1197		CU_ASSERT_EQUAL(r, 0);
1198
1199		r = amdgpu_cs_ctx_free(context_handle[i]);
1200		CU_ASSERT_EQUAL(r, 0);
1201	}
1202
1203	r = amdgpu_cs_destroy_semaphore(sem);
1204	CU_ASSERT_EQUAL(r, 0);
1205}
1206
1207static void amdgpu_command_submission_compute_nop(void)
1208{
1209	amdgpu_context_handle context_handle;
1210	amdgpu_bo_handle ib_result_handle;
1211	void *ib_result_cpu;
1212	uint64_t ib_result_mc_address;
1213	struct amdgpu_cs_request ibs_request;
1214	struct amdgpu_cs_ib_info ib_info;
1215	struct amdgpu_cs_fence fence_status;
1216	uint32_t *ptr;
1217	uint32_t expired;
1218	int r, instance;
1219	amdgpu_bo_list_handle bo_list;
1220	amdgpu_va_handle va_handle;
1221	struct drm_amdgpu_info_hw_ip info;
1222
1223	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1224	CU_ASSERT_EQUAL(r, 0);
1225
1226	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1227	CU_ASSERT_EQUAL(r, 0);
1228
1229	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1230		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1231					    AMDGPU_GEM_DOMAIN_GTT, 0,
1232					    &ib_result_handle, &ib_result_cpu,
1233					    &ib_result_mc_address, &va_handle);
1234		CU_ASSERT_EQUAL(r, 0);
1235
1236		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1237				       &bo_list);
1238		CU_ASSERT_EQUAL(r, 0);
1239
1240		ptr = ib_result_cpu;
1241		memset(ptr, 0, 16);
1242		ptr[0]=PACKET3(PACKET3_NOP, 14);
1243
1244		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1245		ib_info.ib_mc_address = ib_result_mc_address;
1246		ib_info.size = 16;
1247
1248		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1249		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1250		ibs_request.ring = instance;
1251		ibs_request.number_of_ibs = 1;
1252		ibs_request.ibs = &ib_info;
1253		ibs_request.resources = bo_list;
1254		ibs_request.fence_info.handle = NULL;
1255
1256		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1257		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1258		CU_ASSERT_EQUAL(r, 0);
1259
1260		fence_status.context = context_handle;
1261		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1262		fence_status.ip_instance = 0;
1263		fence_status.ring = instance;
1264		fence_status.fence = ibs_request.seq_no;
1265
1266		r = amdgpu_cs_query_fence_status(&fence_status,
1267						 AMDGPU_TIMEOUT_INFINITE,
1268						 0, &expired);
1269		CU_ASSERT_EQUAL(r, 0);
1270
1271		r = amdgpu_bo_list_destroy(bo_list);
1272		CU_ASSERT_EQUAL(r, 0);
1273
1274		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1275					     ib_result_mc_address, 4096);
1276		CU_ASSERT_EQUAL(r, 0);
1277	}
1278
1279	r = amdgpu_cs_ctx_free(context_handle);
1280	CU_ASSERT_EQUAL(r, 0);
1281}
1282
1283static void amdgpu_command_submission_compute_cp_write_data(void)
1284{
1285	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1286}
1287
1288static void amdgpu_command_submission_compute_cp_const_fill(void)
1289{
1290	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1291}
1292
1293static void amdgpu_command_submission_compute_cp_copy_data(void)
1294{
1295	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1296}
1297
1298static void amdgpu_command_submission_compute(void)
1299{
1300	/* write data using the CP */
1301	amdgpu_command_submission_compute_cp_write_data();
1302	/* const fill using the CP */
1303	amdgpu_command_submission_compute_cp_const_fill();
1304	/* copy data using the CP */
1305	amdgpu_command_submission_compute_cp_copy_data();
1306	/* nop test */
1307	amdgpu_command_submission_compute_nop();
1308}
1309
1310/*
1311 * caller need create/release:
1312 * pm4_src, resources, ib_info, and ibs_request
1313 * submit command stream described in ibs_request and wait for this IB accomplished
1314 */
1315void
1316amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1317			       amdgpu_context_handle context_handle,
1318			       unsigned ip_type, int instance, int pm4_dw,
1319			       uint32_t *pm4_src, int res_cnt,
1320			       amdgpu_bo_handle *resources,
1321			       struct amdgpu_cs_ib_info *ib_info,
1322			       struct amdgpu_cs_request *ibs_request,
1323			       bool secure)
1324{
1325	int r;
1326	uint32_t expired;
1327	uint32_t *ring_ptr;
1328	amdgpu_bo_handle ib_result_handle;
1329	void *ib_result_cpu;
1330	uint64_t ib_result_mc_address;
1331	struct amdgpu_cs_fence fence_status = {0};
1332	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1333	amdgpu_va_handle va_handle;
1334
1335	/* prepare CS */
1336	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1337	CU_ASSERT_NOT_EQUAL(resources, NULL);
1338	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1339	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1340	CU_ASSERT_TRUE(pm4_dw <= 1024);
1341
1342	/* allocate IB */
1343	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1344				    AMDGPU_GEM_DOMAIN_GTT, 0,
1345				    &ib_result_handle, &ib_result_cpu,
1346				    &ib_result_mc_address, &va_handle);
1347	CU_ASSERT_EQUAL(r, 0);
1348
1349	/* copy PM4 packet to ring from caller */
1350	ring_ptr = ib_result_cpu;
1351	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1352
1353	ib_info->ib_mc_address = ib_result_mc_address;
1354	ib_info->size = pm4_dw;
1355	if (secure)
1356		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1357
1358	ibs_request->ip_type = ip_type;
1359	ibs_request->ring = instance;
1360	ibs_request->number_of_ibs = 1;
1361	ibs_request->ibs = ib_info;
1362	ibs_request->fence_info.handle = NULL;
1363
1364	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1365	all_res[res_cnt] = ib_result_handle;
1366
1367	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1368				  NULL, &ibs_request->resources);
1369	CU_ASSERT_EQUAL(r, 0);
1370
1371	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1372
1373	/* submit CS */
1374	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1375	CU_ASSERT_EQUAL(r, 0);
1376
1377	r = amdgpu_bo_list_destroy(ibs_request->resources);
1378	CU_ASSERT_EQUAL(r, 0);
1379
1380	fence_status.ip_type = ip_type;
1381	fence_status.ip_instance = 0;
1382	fence_status.ring = ibs_request->ring;
1383	fence_status.context = context_handle;
1384	fence_status.fence = ibs_request->seq_no;
1385
1386	/* wait for IB accomplished */
1387	r = amdgpu_cs_query_fence_status(&fence_status,
1388					 AMDGPU_TIMEOUT_INFINITE,
1389					 0, &expired);
1390	CU_ASSERT_EQUAL(r, 0);
1391	CU_ASSERT_EQUAL(expired, true);
1392
1393	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1394				     ib_result_mc_address, 4096);
1395	CU_ASSERT_EQUAL(r, 0);
1396}
1397
1398static void
1399amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1400			   unsigned ip_type, int instance, int pm4_dw,
1401			   uint32_t *pm4_src, int res_cnt,
1402			   amdgpu_bo_handle *resources,
1403			   struct amdgpu_cs_ib_info *ib_info,
1404			   struct amdgpu_cs_request *ibs_request)
1405{
1406	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1407				       ip_type, instance, pm4_dw, pm4_src,
1408				       res_cnt, resources, ib_info,
1409				       ibs_request, false);
1410}
1411
1412void
1413amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1414							  device, unsigned
1415							  ip_type, bool secure)
1416{
1417	const int sdma_write_length = 128;
1418	const int pm4_dw = 256;
1419	amdgpu_context_handle context_handle;
1420	amdgpu_bo_handle bo;
1421	amdgpu_bo_handle *resources;
1422	uint32_t *pm4;
1423	struct amdgpu_cs_ib_info *ib_info;
1424	struct amdgpu_cs_request *ibs_request;
1425	uint64_t bo_mc;
1426	volatile uint32_t *bo_cpu;
1427	uint32_t bo_cpu_origin;
1428	int i, j, r, loop, ring_id;
1429	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1430	amdgpu_va_handle va_handle;
1431	struct drm_amdgpu_info_hw_ip hw_ip_info;
1432
1433	pm4 = calloc(pm4_dw, sizeof(*pm4));
1434	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1435
1436	ib_info = calloc(1, sizeof(*ib_info));
1437	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1438
1439	ibs_request = calloc(1, sizeof(*ibs_request));
1440	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1441
1442	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1443	CU_ASSERT_EQUAL(r, 0);
1444
1445	for (i = 0; secure && (i < 2); i++)
1446		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1447
1448	r = amdgpu_cs_ctx_create(device, &context_handle);
1449
1450	CU_ASSERT_EQUAL(r, 0);
1451
1452	/* prepare resource */
1453	resources = calloc(1, sizeof(amdgpu_bo_handle));
1454	CU_ASSERT_NOT_EQUAL(resources, NULL);
1455
1456	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1457		loop = 0;
1458		while(loop < 2) {
1459			/* allocate UC bo for sDMA use */
1460			r = amdgpu_bo_alloc_and_map(device,
1461						    sdma_write_length * sizeof(uint32_t),
1462						    4096, AMDGPU_GEM_DOMAIN_GTT,
1463						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1464						    &bo_mc, &va_handle);
1465			CU_ASSERT_EQUAL(r, 0);
1466
1467			/* clear bo */
1468			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1469
1470			resources[0] = bo;
1471
1472			/* fulfill PM4: test DMA write-linear */
1473			i = j = 0;
1474			if (ip_type == AMDGPU_HW_IP_DMA) {
1475				if (family_id == AMDGPU_FAMILY_SI)
1476					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1477								  sdma_write_length);
1478				else
1479					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1480							       SDMA_WRITE_SUB_OPCODE_LINEAR,
1481							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
1482				pm4[i++] = 0xfffffffc & bo_mc;
1483				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1484				if (family_id >= AMDGPU_FAMILY_AI)
1485					pm4[i++] = sdma_write_length - 1;
1486				else if (family_id != AMDGPU_FAMILY_SI)
1487					pm4[i++] = sdma_write_length;
1488				while(j++ < sdma_write_length)
1489					pm4[i++] = 0xdeadbeaf;
1490			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1491				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1492				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1493				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1494				pm4[i++] = 0xfffffffc & bo_mc;
1495				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1496				while(j++ < sdma_write_length)
1497					pm4[i++] = 0xdeadbeaf;
1498			}
1499
1500			amdgpu_test_exec_cs_helper_raw(device, context_handle,
1501						       ip_type, ring_id, i, pm4,
1502						       1, resources, ib_info,
1503						       ibs_request, secure);
1504
1505			/* verify if SDMA test result meets with expected */
1506			i = 0;
1507			if (!secure) {
1508				while(i < sdma_write_length) {
1509					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1510				}
1511			} else if (ip_type == AMDGPU_HW_IP_GFX) {
1512				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1513				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1514				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1515				 * command, 1-loop_until_compare_satisfied.
1516				 * single_pass_atomic, 0-lru
1517				 * engine_sel, 0-micro_engine
1518				 */
1519				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1520							ATOMIC_MEM_COMMAND(1) |
1521							ATOMIC_MEM_CACHEPOLICAY(0) |
1522							ATOMIC_MEM_ENGINESEL(0));
1523				pm4[i++] = 0xfffffffc & bo_mc;
1524				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1525				pm4[i++] = 0x12345678;
1526				pm4[i++] = 0x0;
1527				pm4[i++] = 0xdeadbeaf;
1528				pm4[i++] = 0x0;
1529				pm4[i++] = 0x100;
1530				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1531							ip_type, ring_id, i, pm4,
1532							1, resources, ib_info,
1533							ibs_request, true);
1534			} else if (ip_type == AMDGPU_HW_IP_DMA) {
1535				/* restore the bo_cpu to compare */
1536				bo_cpu_origin = bo_cpu[0];
1537				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1538				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1539				 * loop, 1-loop_until_compare_satisfied.
1540				 * single_pass_atomic, 0-lru
1541				 */
1542				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1543							       0,
1544							       SDMA_ATOMIC_LOOP(1) |
1545							       SDMA_ATOMIC_TMZ(1) |
1546							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1547				pm4[i++] = 0xfffffffc & bo_mc;
1548				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1549				pm4[i++] = 0x12345678;
1550				pm4[i++] = 0x0;
1551				pm4[i++] = 0xdeadbeaf;
1552				pm4[i++] = 0x0;
1553				pm4[i++] = 0x100;
1554				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1555							ip_type, ring_id, i, pm4,
1556							1, resources, ib_info,
1557							ibs_request, true);
1558				/* DMA's atomic behavir is unlike GFX
1559				 * If the comparing data is not equal to destination data,
1560				 * For GFX, loop again till gfx timeout(system hang).
1561				 * For DMA, loop again till timer expired and then send interrupt.
1562				 * So testcase can't use interrupt mechanism.
1563				 * We take another way to verify. When the comparing data is not
1564				 * equal to destination data, overwrite the source data to the destination
1565				 * buffer. Otherwise, original destination data unchanged.
1566				 * So if the bo_cpu data is overwritten, the result is passed.
1567				 */
1568				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1569
1570				/* compare again for the case of dest_data != cmp_data */
1571				i = 0;
1572				/* restore again, here dest_data should be */
1573				bo_cpu_origin = bo_cpu[0];
1574				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1575				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1576							       0,
1577							       SDMA_ATOMIC_LOOP(1) |
1578							       SDMA_ATOMIC_TMZ(1) |
1579							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1580				pm4[i++] = 0xfffffffc & bo_mc;
1581				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1582				pm4[i++] = 0x87654321;
1583				pm4[i++] = 0x0;
1584				pm4[i++] = 0xdeadbeaf;
1585				pm4[i++] = 0x0;
1586				pm4[i++] = 0x100;
1587				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1588							ip_type, ring_id, i, pm4,
1589							1, resources, ib_info,
1590							ibs_request, true);
1591				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1592				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1593			}
1594
1595			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1596						     sdma_write_length * sizeof(uint32_t));
1597			CU_ASSERT_EQUAL(r, 0);
1598			loop++;
1599		}
1600	}
1601	/* clean resources */
1602	free(resources);
1603	free(ibs_request);
1604	free(ib_info);
1605	free(pm4);
1606
1607	/* end of test */
1608	r = amdgpu_cs_ctx_free(context_handle);
1609	CU_ASSERT_EQUAL(r, 0);
1610}
1611
1612static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1613{
1614	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1615								  ip_type,
1616								  false);
1617}
1618
1619static void amdgpu_command_submission_sdma_write_linear(void)
1620{
1621	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1622}
1623
1624static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1625{
1626	const int sdma_write_length = 1024 * 1024;
1627	const int pm4_dw = 256;
1628	amdgpu_context_handle context_handle;
1629	amdgpu_bo_handle bo;
1630	amdgpu_bo_handle *resources;
1631	uint32_t *pm4;
1632	struct amdgpu_cs_ib_info *ib_info;
1633	struct amdgpu_cs_request *ibs_request;
1634	uint64_t bo_mc;
1635	volatile uint32_t *bo_cpu;
1636	int i, j, r, loop, ring_id;
1637	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1638	amdgpu_va_handle va_handle;
1639	struct drm_amdgpu_info_hw_ip hw_ip_info;
1640
1641	pm4 = calloc(pm4_dw, sizeof(*pm4));
1642	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1643
1644	ib_info = calloc(1, sizeof(*ib_info));
1645	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1646
1647	ibs_request = calloc(1, sizeof(*ibs_request));
1648	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1649
1650	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1651	CU_ASSERT_EQUAL(r, 0);
1652
1653	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1654	CU_ASSERT_EQUAL(r, 0);
1655
1656	/* prepare resource */
1657	resources = calloc(1, sizeof(amdgpu_bo_handle));
1658	CU_ASSERT_NOT_EQUAL(resources, NULL);
1659
1660	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1661		loop = 0;
1662		while(loop < 2) {
1663			/* allocate UC bo for sDMA use */
1664			r = amdgpu_bo_alloc_and_map(device_handle,
1665						    sdma_write_length, 4096,
1666						    AMDGPU_GEM_DOMAIN_GTT,
1667						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1668						    &bo_mc, &va_handle);
1669			CU_ASSERT_EQUAL(r, 0);
1670
1671			/* clear bo */
1672			memset((void*)bo_cpu, 0, sdma_write_length);
1673
1674			resources[0] = bo;
1675
1676			/* fulfill PM4: test DMA const fill */
1677			i = j = 0;
1678			if (ip_type == AMDGPU_HW_IP_DMA) {
1679				if (family_id == AMDGPU_FAMILY_SI) {
1680					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1681								  0, 0, 0,
1682								  sdma_write_length / 4);
1683					pm4[i++] = 0xfffffffc & bo_mc;
1684					pm4[i++] = 0xdeadbeaf;
1685					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1686				} else {
1687					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1688							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1689					pm4[i++] = 0xffffffff & bo_mc;
1690					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1691					pm4[i++] = 0xdeadbeaf;
1692					if (family_id >= AMDGPU_FAMILY_AI)
1693						pm4[i++] = sdma_write_length - 1;
1694					else
1695						pm4[i++] = sdma_write_length;
1696				}
1697			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1698				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1699				if (family_id == AMDGPU_FAMILY_SI) {
1700					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1701					pm4[i++] = 0xdeadbeaf;
1702					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1703						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1704						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1705						   PACKET3_DMA_DATA_SI_CP_SYNC;
1706					pm4[i++] = 0xffffffff & bo_mc;
1707					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1708					pm4[i++] = sdma_write_length;
1709				} else {
1710					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1711					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1712						   PACKET3_DMA_DATA_DST_SEL(0) |
1713						   PACKET3_DMA_DATA_SRC_SEL(2) |
1714						   PACKET3_DMA_DATA_CP_SYNC;
1715					pm4[i++] = 0xdeadbeaf;
1716					pm4[i++] = 0;
1717					pm4[i++] = 0xfffffffc & bo_mc;
1718					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1719					pm4[i++] = sdma_write_length;
1720				}
1721			}
1722
1723			amdgpu_test_exec_cs_helper(context_handle,
1724						   ip_type, ring_id,
1725						   i, pm4,
1726						   1, resources,
1727						   ib_info, ibs_request);
1728
1729			/* verify if SDMA test result meets with expected */
1730			i = 0;
1731			while(i < (sdma_write_length / 4)) {
1732				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1733			}
1734
1735			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1736						     sdma_write_length);
1737			CU_ASSERT_EQUAL(r, 0);
1738			loop++;
1739		}
1740	}
1741	/* clean resources */
1742	free(resources);
1743	free(ibs_request);
1744	free(ib_info);
1745	free(pm4);
1746
1747	/* end of test */
1748	r = amdgpu_cs_ctx_free(context_handle);
1749	CU_ASSERT_EQUAL(r, 0);
1750}
1751
1752static void amdgpu_command_submission_sdma_const_fill(void)
1753{
1754	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1755}
1756
1757static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1758{
1759	const int sdma_write_length = 1024;
1760	const int pm4_dw = 256;
1761	amdgpu_context_handle context_handle;
1762	amdgpu_bo_handle bo1, bo2;
1763	amdgpu_bo_handle *resources;
1764	uint32_t *pm4;
1765	struct amdgpu_cs_ib_info *ib_info;
1766	struct amdgpu_cs_request *ibs_request;
1767	uint64_t bo1_mc, bo2_mc;
1768	volatile unsigned char *bo1_cpu, *bo2_cpu;
1769	int i, j, r, loop1, loop2, ring_id;
1770	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1771	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1772	struct drm_amdgpu_info_hw_ip hw_ip_info;
1773
1774	pm4 = calloc(pm4_dw, sizeof(*pm4));
1775	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1776
1777	ib_info = calloc(1, sizeof(*ib_info));
1778	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1779
1780	ibs_request = calloc(1, sizeof(*ibs_request));
1781	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1782
1783	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1784	CU_ASSERT_EQUAL(r, 0);
1785
1786	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1787	CU_ASSERT_EQUAL(r, 0);
1788
1789	/* prepare resource */
1790	resources = calloc(2, sizeof(amdgpu_bo_handle));
1791	CU_ASSERT_NOT_EQUAL(resources, NULL);
1792
1793	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1794		loop1 = loop2 = 0;
1795		/* run 9 circle to test all mapping combination */
1796		while(loop1 < 2) {
1797			while(loop2 < 2) {
1798				/* allocate UC bo1for sDMA use */
1799				r = amdgpu_bo_alloc_and_map(device_handle,
1800							    sdma_write_length, 4096,
1801							    AMDGPU_GEM_DOMAIN_GTT,
1802							    gtt_flags[loop1], &bo1,
1803							    (void**)&bo1_cpu, &bo1_mc,
1804							    &bo1_va_handle);
1805				CU_ASSERT_EQUAL(r, 0);
1806
1807				/* set bo1 */
1808				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1809
1810				/* allocate UC bo2 for sDMA use */
1811				r = amdgpu_bo_alloc_and_map(device_handle,
1812							    sdma_write_length, 4096,
1813							    AMDGPU_GEM_DOMAIN_GTT,
1814							    gtt_flags[loop2], &bo2,
1815							    (void**)&bo2_cpu, &bo2_mc,
1816							    &bo2_va_handle);
1817				CU_ASSERT_EQUAL(r, 0);
1818
1819				/* clear bo2 */
1820				memset((void*)bo2_cpu, 0, sdma_write_length);
1821
1822				resources[0] = bo1;
1823				resources[1] = bo2;
1824
1825				/* fulfill PM4: test DMA copy linear */
1826				i = j = 0;
1827				if (ip_type == AMDGPU_HW_IP_DMA) {
1828					if (family_id == AMDGPU_FAMILY_SI) {
1829						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1830									  0, 0, 0,
1831									  sdma_write_length);
1832						pm4[i++] = 0xffffffff & bo2_mc;
1833						pm4[i++] = 0xffffffff & bo1_mc;
1834						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1835						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1836					} else {
1837						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1838								       SDMA_COPY_SUB_OPCODE_LINEAR,
1839								       0);
1840						if (family_id >= AMDGPU_FAMILY_AI)
1841							pm4[i++] = sdma_write_length - 1;
1842						else
1843							pm4[i++] = sdma_write_length;
1844						pm4[i++] = 0;
1845						pm4[i++] = 0xffffffff & bo1_mc;
1846						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1847						pm4[i++] = 0xffffffff & bo2_mc;
1848						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1849					}
1850				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1851					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1852					if (family_id == AMDGPU_FAMILY_SI) {
1853						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1854						pm4[i++] = 0xfffffffc & bo1_mc;
1855						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1856							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1857							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1858							   PACKET3_DMA_DATA_SI_CP_SYNC |
1859							   (0xffff00000000 & bo1_mc) >> 32;
1860						pm4[i++] = 0xfffffffc & bo2_mc;
1861						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1862						pm4[i++] = sdma_write_length;
1863					} else {
1864						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1865						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1866							   PACKET3_DMA_DATA_DST_SEL(0) |
1867							   PACKET3_DMA_DATA_SRC_SEL(0) |
1868							   PACKET3_DMA_DATA_CP_SYNC;
1869						pm4[i++] = 0xfffffffc & bo1_mc;
1870						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1871						pm4[i++] = 0xfffffffc & bo2_mc;
1872						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1873						pm4[i++] = sdma_write_length;
1874					}
1875				}
1876
1877				amdgpu_test_exec_cs_helper(context_handle,
1878							   ip_type, ring_id,
1879							   i, pm4,
1880							   2, resources,
1881							   ib_info, ibs_request);
1882
1883				/* verify if SDMA test result meets with expected */
1884				i = 0;
1885				while(i < sdma_write_length) {
1886					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1887				}
1888				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1889							     sdma_write_length);
1890				CU_ASSERT_EQUAL(r, 0);
1891				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1892							     sdma_write_length);
1893				CU_ASSERT_EQUAL(r, 0);
1894				loop2++;
1895			}
1896			loop1++;
1897		}
1898	}
1899	/* clean resources */
1900	free(resources);
1901	free(ibs_request);
1902	free(ib_info);
1903	free(pm4);
1904
1905	/* end of test */
1906	r = amdgpu_cs_ctx_free(context_handle);
1907	CU_ASSERT_EQUAL(r, 0);
1908}
1909
1910static void amdgpu_command_submission_sdma_copy_linear(void)
1911{
1912	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1913}
1914
1915static void amdgpu_command_submission_sdma(void)
1916{
1917	amdgpu_command_submission_sdma_write_linear();
1918	amdgpu_command_submission_sdma_const_fill();
1919	amdgpu_command_submission_sdma_copy_linear();
1920}
1921
1922static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1923{
1924	amdgpu_context_handle context_handle;
1925	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1926	void *ib_result_cpu, *ib_result_ce_cpu;
1927	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1928	struct amdgpu_cs_request ibs_request[2] = {0};
1929	struct amdgpu_cs_ib_info ib_info[2];
1930	struct amdgpu_cs_fence fence_status[2] = {0};
1931	uint32_t *ptr;
1932	uint32_t expired;
1933	amdgpu_bo_list_handle bo_list;
1934	amdgpu_va_handle va_handle, va_handle_ce;
1935	int r;
1936	int i = 0, ib_cs_num = 2;
1937
1938	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1939	CU_ASSERT_EQUAL(r, 0);
1940
1941	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1942				    AMDGPU_GEM_DOMAIN_GTT, 0,
1943				    &ib_result_handle, &ib_result_cpu,
1944				    &ib_result_mc_address, &va_handle);
1945	CU_ASSERT_EQUAL(r, 0);
1946
1947	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1948				    AMDGPU_GEM_DOMAIN_GTT, 0,
1949				    &ib_result_ce_handle, &ib_result_ce_cpu,
1950				    &ib_result_ce_mc_address, &va_handle_ce);
1951	CU_ASSERT_EQUAL(r, 0);
1952
1953	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1954			       ib_result_ce_handle, &bo_list);
1955	CU_ASSERT_EQUAL(r, 0);
1956
1957	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1958
1959	/* IT_SET_CE_DE_COUNTERS */
1960	ptr = ib_result_ce_cpu;
1961	if (family_id != AMDGPU_FAMILY_SI) {
1962		ptr[i++] = 0xc0008900;
1963		ptr[i++] = 0;
1964	}
1965	ptr[i++] = 0xc0008400;
1966	ptr[i++] = 1;
1967	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1968	ib_info[0].size = i;
1969	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1970
1971	/* IT_WAIT_ON_CE_COUNTER */
1972	ptr = ib_result_cpu;
1973	ptr[0] = 0xc0008600;
1974	ptr[1] = 0x00000001;
1975	ib_info[1].ib_mc_address = ib_result_mc_address;
1976	ib_info[1].size = 2;
1977
1978	for (i = 0; i < ib_cs_num; i++) {
1979		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1980		ibs_request[i].number_of_ibs = 2;
1981		ibs_request[i].ibs = ib_info;
1982		ibs_request[i].resources = bo_list;
1983		ibs_request[i].fence_info.handle = NULL;
1984	}
1985
1986	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1987
1988	CU_ASSERT_EQUAL(r, 0);
1989
1990	for (i = 0; i < ib_cs_num; i++) {
1991		fence_status[i].context = context_handle;
1992		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1993		fence_status[i].fence = ibs_request[i].seq_no;
1994	}
1995
1996	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1997				AMDGPU_TIMEOUT_INFINITE,
1998				&expired, NULL);
1999	CU_ASSERT_EQUAL(r, 0);
2000
2001	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2002				     ib_result_mc_address, 4096);
2003	CU_ASSERT_EQUAL(r, 0);
2004
2005	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2006				     ib_result_ce_mc_address, 4096);
2007	CU_ASSERT_EQUAL(r, 0);
2008
2009	r = amdgpu_bo_list_destroy(bo_list);
2010	CU_ASSERT_EQUAL(r, 0);
2011
2012	r = amdgpu_cs_ctx_free(context_handle);
2013	CU_ASSERT_EQUAL(r, 0);
2014}
2015
2016static void amdgpu_command_submission_multi_fence(void)
2017{
2018	amdgpu_command_submission_multi_fence_wait_all(true);
2019	amdgpu_command_submission_multi_fence_wait_all(false);
2020}
2021
2022static void amdgpu_userptr_test(void)
2023{
2024	int i, r, j;
2025	uint32_t *pm4 = NULL;
2026	uint64_t bo_mc;
2027	void *ptr = NULL;
2028	int pm4_dw = 256;
2029	int sdma_write_length = 4;
2030	amdgpu_bo_handle handle;
2031	amdgpu_context_handle context_handle;
2032	struct amdgpu_cs_ib_info *ib_info;
2033	struct amdgpu_cs_request *ibs_request;
2034	amdgpu_bo_handle buf_handle;
2035	amdgpu_va_handle va_handle;
2036
2037	pm4 = calloc(pm4_dw, sizeof(*pm4));
2038	CU_ASSERT_NOT_EQUAL(pm4, NULL);
2039
2040	ib_info = calloc(1, sizeof(*ib_info));
2041	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2042
2043	ibs_request = calloc(1, sizeof(*ibs_request));
2044	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2045
2046	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2047	CU_ASSERT_EQUAL(r, 0);
2048
2049	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2050	CU_ASSERT_NOT_EQUAL(ptr, NULL);
2051	memset(ptr, 0, BUFFER_SIZE);
2052
2053	r = amdgpu_create_bo_from_user_mem(device_handle,
2054					   ptr, BUFFER_SIZE, &buf_handle);
2055	CU_ASSERT_EQUAL(r, 0);
2056
2057	r = amdgpu_va_range_alloc(device_handle,
2058				  amdgpu_gpu_va_range_general,
2059				  BUFFER_SIZE, 1, 0, &bo_mc,
2060				  &va_handle, 0);
2061	CU_ASSERT_EQUAL(r, 0);
2062
2063	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2064	CU_ASSERT_EQUAL(r, 0);
2065
2066	handle = buf_handle;
2067
2068	j = i = 0;
2069
2070	if (family_id == AMDGPU_FAMILY_SI)
2071		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2072				sdma_write_length);
2073	else
2074		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2075				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2076	pm4[i++] = 0xffffffff & bo_mc;
2077	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2078	if (family_id >= AMDGPU_FAMILY_AI)
2079		pm4[i++] = sdma_write_length - 1;
2080	else if (family_id != AMDGPU_FAMILY_SI)
2081		pm4[i++] = sdma_write_length;
2082
2083	while (j++ < sdma_write_length)
2084		pm4[i++] = 0xdeadbeaf;
2085
2086	if (!fork()) {
2087		pm4[0] = 0x0;
2088		exit(0);
2089	}
2090
2091	amdgpu_test_exec_cs_helper(context_handle,
2092				   AMDGPU_HW_IP_DMA, 0,
2093				   i, pm4,
2094				   1, &handle,
2095				   ib_info, ibs_request);
2096	i = 0;
2097	while (i < sdma_write_length) {
2098		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2099	}
2100	free(ibs_request);
2101	free(ib_info);
2102	free(pm4);
2103
2104	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2105	CU_ASSERT_EQUAL(r, 0);
2106	r = amdgpu_va_range_free(va_handle);
2107	CU_ASSERT_EQUAL(r, 0);
2108	r = amdgpu_bo_free(buf_handle);
2109	CU_ASSERT_EQUAL(r, 0);
2110	free(ptr);
2111
2112	r = amdgpu_cs_ctx_free(context_handle);
2113	CU_ASSERT_EQUAL(r, 0);
2114
2115	wait(NULL);
2116}
2117
2118static void amdgpu_sync_dependency_test(void)
2119{
2120	amdgpu_context_handle context_handle[2];
2121	amdgpu_bo_handle ib_result_handle;
2122	void *ib_result_cpu;
2123	uint64_t ib_result_mc_address;
2124	struct amdgpu_cs_request ibs_request;
2125	struct amdgpu_cs_ib_info ib_info;
2126	struct amdgpu_cs_fence fence_status;
2127	uint32_t expired;
2128	int i, j, r;
2129	amdgpu_bo_list_handle bo_list;
2130	amdgpu_va_handle va_handle;
2131	static uint32_t *ptr;
2132	uint64_t seq_no;
2133
2134	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2135	CU_ASSERT_EQUAL(r, 0);
2136	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2137	CU_ASSERT_EQUAL(r, 0);
2138
2139	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2140			AMDGPU_GEM_DOMAIN_GTT, 0,
2141						    &ib_result_handle, &ib_result_cpu,
2142						    &ib_result_mc_address, &va_handle);
2143	CU_ASSERT_EQUAL(r, 0);
2144
2145	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2146			       &bo_list);
2147	CU_ASSERT_EQUAL(r, 0);
2148
2149	ptr = ib_result_cpu;
2150	i = 0;
2151
2152	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2153
2154	/* Dispatch minimal init config and verify it's executed */
2155	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2156	ptr[i++] = 0x80000000;
2157	ptr[i++] = 0x80000000;
2158
2159	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2160	ptr[i++] = 0x80000000;
2161
2162
2163	/* Program compute regs */
2164	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2165	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2166	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2167	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2168
2169
2170	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2171	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2172	/*
2173	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
2174	                                      SGPRS = 1
2175	                                      PRIORITY = 0
2176	                                      FLOAT_MODE = 192 (0xc0)
2177	                                      PRIV = 0
2178	                                      DX10_CLAMP = 1
2179	                                      DEBUG_MODE = 0
2180	                                      IEEE_MODE = 0
2181	                                      BULKY = 0
2182	                                      CDBG_USER = 0
2183	 *
2184	 */
2185	ptr[i++] = 0x002c0040;
2186
2187
2188	/*
2189	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2190	                                      USER_SGPR = 8
2191	                                      TRAP_PRESENT = 0
2192	                                      TGID_X_EN = 0
2193	                                      TGID_Y_EN = 0
2194	                                      TGID_Z_EN = 0
2195	                                      TG_SIZE_EN = 0
2196	                                      TIDIG_COMP_CNT = 0
2197	                                      EXCP_EN_MSB = 0
2198	                                      LDS_SIZE = 0
2199	                                      EXCP_EN = 0
2200	 *
2201	 */
2202	ptr[i++] = 0x00000010;
2203
2204
2205/*
2206 * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2207                                         WAVESIZE = 0
2208 *
2209 */
2210	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2211	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2212	ptr[i++] = 0x00000100;
2213
2214	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2215	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2216	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2217	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2218
2219	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2220	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2221	ptr[i++] = 0;
2222
2223	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2224	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2225	ptr[i++] = 1;
2226	ptr[i++] = 1;
2227	ptr[i++] = 1;
2228
2229
2230	/* Dispatch */
2231	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2232	ptr[i++] = 1;
2233	ptr[i++] = 1;
2234	ptr[i++] = 1;
2235	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2236
2237
2238	while (i & 7)
2239		ptr[i++] =  0xffff1000; /* type3 nop packet */
2240
2241	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2242	ib_info.ib_mc_address = ib_result_mc_address;
2243	ib_info.size = i;
2244
2245	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2246	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2247	ibs_request.ring = 0;
2248	ibs_request.number_of_ibs = 1;
2249	ibs_request.ibs = &ib_info;
2250	ibs_request.resources = bo_list;
2251	ibs_request.fence_info.handle = NULL;
2252
2253	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2254	CU_ASSERT_EQUAL(r, 0);
2255	seq_no = ibs_request.seq_no;
2256
2257
2258
2259	/* Prepare second command with dependency on the first */
2260	j = i;
2261	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2262	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2263	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2264	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2265	ptr[i++] = 99;
2266
2267	while (i & 7)
2268		ptr[i++] =  0xffff1000; /* type3 nop packet */
2269
2270	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2271	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2272	ib_info.size = i - j;
2273
2274	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2275	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2276	ibs_request.ring = 0;
2277	ibs_request.number_of_ibs = 1;
2278	ibs_request.ibs = &ib_info;
2279	ibs_request.resources = bo_list;
2280	ibs_request.fence_info.handle = NULL;
2281
2282	ibs_request.number_of_dependencies = 1;
2283
2284	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2285	ibs_request.dependencies[0].context = context_handle[1];
2286	ibs_request.dependencies[0].ip_instance = 0;
2287	ibs_request.dependencies[0].ring = 0;
2288	ibs_request.dependencies[0].fence = seq_no;
2289
2290
2291	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2292	CU_ASSERT_EQUAL(r, 0);
2293
2294
2295	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2296	fence_status.context = context_handle[0];
2297	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2298	fence_status.ip_instance = 0;
2299	fence_status.ring = 0;
2300	fence_status.fence = ibs_request.seq_no;
2301
2302	r = amdgpu_cs_query_fence_status(&fence_status,
2303		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2304	CU_ASSERT_EQUAL(r, 0);
2305
2306	/* Expect the second command to wait for shader to complete */
2307	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2308
2309	r = amdgpu_bo_list_destroy(bo_list);
2310	CU_ASSERT_EQUAL(r, 0);
2311
2312	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2313				     ib_result_mc_address, 4096);
2314	CU_ASSERT_EQUAL(r, 0);
2315
2316	r = amdgpu_cs_ctx_free(context_handle[0]);
2317	CU_ASSERT_EQUAL(r, 0);
2318	r = amdgpu_cs_ctx_free(context_handle[1]);
2319	CU_ASSERT_EQUAL(r, 0);
2320
2321	free(ibs_request.dependencies);
2322}
2323
2324static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2325{
2326	struct amdgpu_test_shader *shader;
2327	int i, loop = 0x10000;
2328
2329	switch (family) {
2330		case AMDGPU_FAMILY_AI:
2331			shader = &memcpy_cs_hang_slow_ai;
2332			break;
2333		case AMDGPU_FAMILY_RV:
2334			shader = &memcpy_cs_hang_slow_rv;
2335			break;
2336		default:
2337			return -1;
2338			break;
2339	}
2340
2341	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2342
2343	for (i = 0; i < loop; i++)
2344		memcpy(ptr + shader->header_length + shader->body_length * i,
2345			shader->shader + shader->header_length,
2346			shader->body_length * sizeof(uint32_t));
2347
2348	memcpy(ptr + shader->header_length + shader->body_length * loop,
2349		shader->shader + shader->header_length + shader->body_length,
2350		shader->foot_length * sizeof(uint32_t));
2351
2352	return 0;
2353}
2354
2355static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2356					   int cs_type)
2357{
2358	uint32_t shader_size;
2359	const uint32_t *shader;
2360
2361	switch (cs_type) {
2362		case CS_BUFFERCLEAR:
2363			shader = bufferclear_cs_shader_gfx9;
2364			shader_size = sizeof(bufferclear_cs_shader_gfx9);
2365			break;
2366		case CS_BUFFERCOPY:
2367			shader = buffercopy_cs_shader_gfx9;
2368			shader_size = sizeof(buffercopy_cs_shader_gfx9);
2369			break;
2370		case CS_HANG:
2371			shader = memcpy_ps_hang;
2372			shader_size = sizeof(memcpy_ps_hang);
2373			break;
2374		default:
2375			return -1;
2376			break;
2377	}
2378
2379	memcpy(ptr, shader, shader_size);
2380	return 0;
2381}
2382
2383static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2384{
2385	int i = 0;
2386
2387	/* Write context control and load shadowing register if necessary */
2388	if (ip_type == AMDGPU_HW_IP_GFX) {
2389		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2390		ptr[i++] = 0x80000000;
2391		ptr[i++] = 0x80000000;
2392	}
2393
2394	/* Issue commands to set default compute state. */
2395	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2396	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2397	ptr[i++] = 0x204;
2398	i += 3;
2399
2400	/* clear mmCOMPUTE_TMPRING_SIZE */
2401	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2402	ptr[i++] = 0x218;
2403	ptr[i++] = 0;
2404
2405	return i;
2406}
2407
2408static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2409{
2410	int i = 0;
2411
2412	/*  Issue commands to set cu mask used in current dispatch */
2413	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2414	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2415	ptr[i++] = 0x216;
2416	ptr[i++] = 0xffffffff;
2417	ptr[i++] = 0xffffffff;
2418	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2419	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2420	ptr[i++] = 0x219;
2421	ptr[i++] = 0xffffffff;
2422	ptr[i++] = 0xffffffff;
2423
2424	return i;
2425}
2426
2427static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2428{
2429	int i, j;
2430
2431	i = 0;
2432
2433	/* Writes shader state to HW */
2434	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2435	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2436	ptr[i++] = 0x20c;
2437	ptr[i++] = (shader_addr >> 8);
2438	ptr[i++] = (shader_addr >> 40);
2439	/* write sh regs*/
2440	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2441		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2442		/* - Gfx9ShRegBase */
2443		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2444		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2445	}
2446
2447	return i;
2448}
2449
2450static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2451					 uint32_t ip_type,
2452					 uint32_t ring)
2453{
2454	amdgpu_context_handle context_handle;
2455	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2456	volatile unsigned char *ptr_dst;
2457	void *ptr_shader;
2458	uint32_t *ptr_cmd;
2459	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2460	amdgpu_va_handle va_dst, va_shader, va_cmd;
2461	int i, r;
2462	int bo_dst_size = 16384;
2463	int bo_shader_size = 4096;
2464	int bo_cmd_size = 4096;
2465	struct amdgpu_cs_request ibs_request = {0};
2466	struct amdgpu_cs_ib_info ib_info= {0};
2467	amdgpu_bo_list_handle bo_list;
2468	struct amdgpu_cs_fence fence_status = {0};
2469	uint32_t expired;
2470
2471	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2472	CU_ASSERT_EQUAL(r, 0);
2473
2474	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2475					AMDGPU_GEM_DOMAIN_GTT, 0,
2476					&bo_cmd, (void **)&ptr_cmd,
2477					&mc_address_cmd, &va_cmd);
2478	CU_ASSERT_EQUAL(r, 0);
2479	memset(ptr_cmd, 0, bo_cmd_size);
2480
2481	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2482					AMDGPU_GEM_DOMAIN_VRAM, 0,
2483					&bo_shader, &ptr_shader,
2484					&mc_address_shader, &va_shader);
2485	CU_ASSERT_EQUAL(r, 0);
2486	memset(ptr_shader, 0, bo_shader_size);
2487
2488	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2489	CU_ASSERT_EQUAL(r, 0);
2490
2491	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2492					AMDGPU_GEM_DOMAIN_VRAM, 0,
2493					&bo_dst, (void **)&ptr_dst,
2494					&mc_address_dst, &va_dst);
2495	CU_ASSERT_EQUAL(r, 0);
2496
2497	i = 0;
2498	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2499
2500	/*  Issue commands to set cu mask used in current dispatch */
2501	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2502
2503	/* Writes shader state to HW */
2504	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2505
2506	/* Write constant data */
2507	/* Writes the UAV constant data to the SGPRs. */
2508	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2509	ptr_cmd[i++] = 0x240;
2510	ptr_cmd[i++] = mc_address_dst;
2511	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2512	ptr_cmd[i++] = 0x400;
2513	ptr_cmd[i++] = 0x74fac;
2514
2515	/* Sets a range of pixel shader constants */
2516	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2517	ptr_cmd[i++] = 0x244;
2518	ptr_cmd[i++] = 0x22222222;
2519	ptr_cmd[i++] = 0x22222222;
2520	ptr_cmd[i++] = 0x22222222;
2521	ptr_cmd[i++] = 0x22222222;
2522
2523	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2524	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2525	ptr_cmd[i++] = 0x215;
2526	ptr_cmd[i++] = 0;
2527
2528	/* dispatch direct command */
2529	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2530	ptr_cmd[i++] = 0x10;
2531	ptr_cmd[i++] = 1;
2532	ptr_cmd[i++] = 1;
2533	ptr_cmd[i++] = 1;
2534
2535	while (i & 7)
2536		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2537
2538	resources[0] = bo_dst;
2539	resources[1] = bo_shader;
2540	resources[2] = bo_cmd;
2541	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2542	CU_ASSERT_EQUAL(r, 0);
2543
2544	ib_info.ib_mc_address = mc_address_cmd;
2545	ib_info.size = i;
2546	ibs_request.ip_type = ip_type;
2547	ibs_request.ring = ring;
2548	ibs_request.resources = bo_list;
2549	ibs_request.number_of_ibs = 1;
2550	ibs_request.ibs = &ib_info;
2551	ibs_request.fence_info.handle = NULL;
2552
2553	/* submit CS */
2554	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2555	CU_ASSERT_EQUAL(r, 0);
2556
2557	r = amdgpu_bo_list_destroy(bo_list);
2558	CU_ASSERT_EQUAL(r, 0);
2559
2560	fence_status.ip_type = ip_type;
2561	fence_status.ip_instance = 0;
2562	fence_status.ring = ring;
2563	fence_status.context = context_handle;
2564	fence_status.fence = ibs_request.seq_no;
2565
2566	/* wait for IB accomplished */
2567	r = amdgpu_cs_query_fence_status(&fence_status,
2568					 AMDGPU_TIMEOUT_INFINITE,
2569					 0, &expired);
2570	CU_ASSERT_EQUAL(r, 0);
2571	CU_ASSERT_EQUAL(expired, true);
2572
2573	/* verify if memset test result meets with expected */
2574	i = 0;
2575	while(i < bo_dst_size) {
2576		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2577	}
2578
2579	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2580	CU_ASSERT_EQUAL(r, 0);
2581
2582	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2583	CU_ASSERT_EQUAL(r, 0);
2584
2585	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2586	CU_ASSERT_EQUAL(r, 0);
2587
2588	r = amdgpu_cs_ctx_free(context_handle);
2589	CU_ASSERT_EQUAL(r, 0);
2590}
2591
2592static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2593					uint32_t ip_type,
2594					uint32_t ring,
2595					int hang)
2596{
2597	amdgpu_context_handle context_handle;
2598	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2599	volatile unsigned char *ptr_dst;
2600	void *ptr_shader;
2601	unsigned char *ptr_src;
2602	uint32_t *ptr_cmd;
2603	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2604	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2605	int i, r;
2606	int bo_dst_size = 16384;
2607	int bo_shader_size = 4096;
2608	int bo_cmd_size = 4096;
2609	struct amdgpu_cs_request ibs_request = {0};
2610	struct amdgpu_cs_ib_info ib_info= {0};
2611	uint32_t expired, hang_state, hangs;
2612	enum cs_type cs_type;
2613	amdgpu_bo_list_handle bo_list;
2614	struct amdgpu_cs_fence fence_status = {0};
2615
2616	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2617	CU_ASSERT_EQUAL(r, 0);
2618
2619	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2620				    AMDGPU_GEM_DOMAIN_GTT, 0,
2621				    &bo_cmd, (void **)&ptr_cmd,
2622				    &mc_address_cmd, &va_cmd);
2623	CU_ASSERT_EQUAL(r, 0);
2624	memset(ptr_cmd, 0, bo_cmd_size);
2625
2626	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2627					AMDGPU_GEM_DOMAIN_VRAM, 0,
2628					&bo_shader, &ptr_shader,
2629					&mc_address_shader, &va_shader);
2630	CU_ASSERT_EQUAL(r, 0);
2631	memset(ptr_shader, 0, bo_shader_size);
2632
2633	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2634	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2635	CU_ASSERT_EQUAL(r, 0);
2636
2637	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2638					AMDGPU_GEM_DOMAIN_VRAM, 0,
2639					&bo_src, (void **)&ptr_src,
2640					&mc_address_src, &va_src);
2641	CU_ASSERT_EQUAL(r, 0);
2642
2643	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2644					AMDGPU_GEM_DOMAIN_VRAM, 0,
2645					&bo_dst, (void **)&ptr_dst,
2646					&mc_address_dst, &va_dst);
2647	CU_ASSERT_EQUAL(r, 0);
2648
2649	memset(ptr_src, 0x55, bo_dst_size);
2650
2651	i = 0;
2652	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2653
2654	/*  Issue commands to set cu mask used in current dispatch */
2655	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2656
2657	/* Writes shader state to HW */
2658	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2659
2660	/* Write constant data */
2661	/* Writes the texture resource constants data to the SGPRs */
2662	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2663	ptr_cmd[i++] = 0x240;
2664	ptr_cmd[i++] = mc_address_src;
2665	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2666	ptr_cmd[i++] = 0x400;
2667	ptr_cmd[i++] = 0x74fac;
2668
2669	/* Writes the UAV constant data to the SGPRs. */
2670	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2671	ptr_cmd[i++] = 0x244;
2672	ptr_cmd[i++] = mc_address_dst;
2673	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2674	ptr_cmd[i++] = 0x400;
2675	ptr_cmd[i++] = 0x74fac;
2676
2677	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2678	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2679	ptr_cmd[i++] = 0x215;
2680	ptr_cmd[i++] = 0;
2681
2682	/* dispatch direct command */
2683	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2684	ptr_cmd[i++] = 0x10;
2685	ptr_cmd[i++] = 1;
2686	ptr_cmd[i++] = 1;
2687	ptr_cmd[i++] = 1;
2688
2689	while (i & 7)
2690		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2691
2692	resources[0] = bo_shader;
2693	resources[1] = bo_src;
2694	resources[2] = bo_dst;
2695	resources[3] = bo_cmd;
2696	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2697	CU_ASSERT_EQUAL(r, 0);
2698
2699	ib_info.ib_mc_address = mc_address_cmd;
2700	ib_info.size = i;
2701	ibs_request.ip_type = ip_type;
2702	ibs_request.ring = ring;
2703	ibs_request.resources = bo_list;
2704	ibs_request.number_of_ibs = 1;
2705	ibs_request.ibs = &ib_info;
2706	ibs_request.fence_info.handle = NULL;
2707	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2708	CU_ASSERT_EQUAL(r, 0);
2709
2710	fence_status.ip_type = ip_type;
2711	fence_status.ip_instance = 0;
2712	fence_status.ring = ring;
2713	fence_status.context = context_handle;
2714	fence_status.fence = ibs_request.seq_no;
2715
2716	/* wait for IB accomplished */
2717	r = amdgpu_cs_query_fence_status(&fence_status,
2718					 AMDGPU_TIMEOUT_INFINITE,
2719					 0, &expired);
2720
2721	if (!hang) {
2722		CU_ASSERT_EQUAL(r, 0);
2723		CU_ASSERT_EQUAL(expired, true);
2724
2725		/* verify if memcpy test result meets with expected */
2726		i = 0;
2727		while(i < bo_dst_size) {
2728			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2729			i++;
2730		}
2731	} else {
2732		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2733		CU_ASSERT_EQUAL(r, 0);
2734		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2735	}
2736
2737	r = amdgpu_bo_list_destroy(bo_list);
2738	CU_ASSERT_EQUAL(r, 0);
2739
2740	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2741	CU_ASSERT_EQUAL(r, 0);
2742	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2743	CU_ASSERT_EQUAL(r, 0);
2744
2745	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2746	CU_ASSERT_EQUAL(r, 0);
2747
2748	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2749	CU_ASSERT_EQUAL(r, 0);
2750
2751	r = amdgpu_cs_ctx_free(context_handle);
2752	CU_ASSERT_EQUAL(r, 0);
2753}
2754
2755static void amdgpu_compute_dispatch_test(void)
2756{
2757	int r;
2758	struct drm_amdgpu_info_hw_ip info;
2759	uint32_t ring_id;
2760
2761	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2762	CU_ASSERT_EQUAL(r, 0);
2763	if (!info.available_rings)
2764		printf("SKIP ... as there's no compute ring\n");
2765
2766	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2767		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2768		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2769	}
2770}
2771
2772static void amdgpu_gfx_dispatch_test(void)
2773{
2774	int r;
2775	struct drm_amdgpu_info_hw_ip info;
2776	uint32_t ring_id;
2777
2778	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2779	CU_ASSERT_EQUAL(r, 0);
2780	if (!info.available_rings)
2781		printf("SKIP ... as there's no graphics ring\n");
2782
2783	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2784		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2785		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2786	}
2787}
2788
2789void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2790{
2791	int r;
2792	struct drm_amdgpu_info_hw_ip info;
2793	uint32_t ring_id;
2794
2795	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2796	CU_ASSERT_EQUAL(r, 0);
2797	if (!info.available_rings)
2798		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2799
2800	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2801		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2802		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2803		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2804	}
2805}
2806
2807static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2808						  uint32_t ip_type, uint32_t ring)
2809{
2810	amdgpu_context_handle context_handle;
2811	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2812	volatile unsigned char *ptr_dst;
2813	void *ptr_shader;
2814	unsigned char *ptr_src;
2815	uint32_t *ptr_cmd;
2816	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2817	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2818	int i, r;
2819	int bo_dst_size = 0x4000000;
2820	int bo_shader_size = 0x400000;
2821	int bo_cmd_size = 4096;
2822	struct amdgpu_cs_request ibs_request = {0};
2823	struct amdgpu_cs_ib_info ib_info= {0};
2824	uint32_t hang_state, hangs, expired;
2825	struct amdgpu_gpu_info gpu_info = {0};
2826	amdgpu_bo_list_handle bo_list;
2827	struct amdgpu_cs_fence fence_status = {0};
2828
2829	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2830	CU_ASSERT_EQUAL(r, 0);
2831
2832	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2833	CU_ASSERT_EQUAL(r, 0);
2834
2835	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2836				    AMDGPU_GEM_DOMAIN_GTT, 0,
2837				    &bo_cmd, (void **)&ptr_cmd,
2838				    &mc_address_cmd, &va_cmd);
2839	CU_ASSERT_EQUAL(r, 0);
2840	memset(ptr_cmd, 0, bo_cmd_size);
2841
2842	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2843					AMDGPU_GEM_DOMAIN_VRAM, 0,
2844					&bo_shader, &ptr_shader,
2845					&mc_address_shader, &va_shader);
2846	CU_ASSERT_EQUAL(r, 0);
2847	memset(ptr_shader, 0, bo_shader_size);
2848
2849	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2850	CU_ASSERT_EQUAL(r, 0);
2851
2852	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2853					AMDGPU_GEM_DOMAIN_VRAM, 0,
2854					&bo_src, (void **)&ptr_src,
2855					&mc_address_src, &va_src);
2856	CU_ASSERT_EQUAL(r, 0);
2857
2858	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2859					AMDGPU_GEM_DOMAIN_VRAM, 0,
2860					&bo_dst, (void **)&ptr_dst,
2861					&mc_address_dst, &va_dst);
2862	CU_ASSERT_EQUAL(r, 0);
2863
2864	memset(ptr_src, 0x55, bo_dst_size);
2865
2866	i = 0;
2867	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2868
2869	/*  Issue commands to set cu mask used in current dispatch */
2870	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2871
2872	/* Writes shader state to HW */
2873	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2874
2875	/* Write constant data */
2876	/* Writes the texture resource constants data to the SGPRs */
2877	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2878	ptr_cmd[i++] = 0x240;
2879	ptr_cmd[i++] = mc_address_src;
2880	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2881	ptr_cmd[i++] = 0x400000;
2882	ptr_cmd[i++] = 0x74fac;
2883
2884	/* Writes the UAV constant data to the SGPRs. */
2885	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2886	ptr_cmd[i++] = 0x244;
2887	ptr_cmd[i++] = mc_address_dst;
2888	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2889	ptr_cmd[i++] = 0x400000;
2890	ptr_cmd[i++] = 0x74fac;
2891
2892	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2893	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2894	ptr_cmd[i++] = 0x215;
2895	ptr_cmd[i++] = 0;
2896
2897	/* dispatch direct command */
2898	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2899	ptr_cmd[i++] = 0x10000;
2900	ptr_cmd[i++] = 1;
2901	ptr_cmd[i++] = 1;
2902	ptr_cmd[i++] = 1;
2903
2904	while (i & 7)
2905		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2906
2907	resources[0] = bo_shader;
2908	resources[1] = bo_src;
2909	resources[2] = bo_dst;
2910	resources[3] = bo_cmd;
2911	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2912	CU_ASSERT_EQUAL(r, 0);
2913
2914	ib_info.ib_mc_address = mc_address_cmd;
2915	ib_info.size = i;
2916	ibs_request.ip_type = ip_type;
2917	ibs_request.ring = ring;
2918	ibs_request.resources = bo_list;
2919	ibs_request.number_of_ibs = 1;
2920	ibs_request.ibs = &ib_info;
2921	ibs_request.fence_info.handle = NULL;
2922	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2923	CU_ASSERT_EQUAL(r, 0);
2924
2925	fence_status.ip_type = ip_type;
2926	fence_status.ip_instance = 0;
2927	fence_status.ring = ring;
2928	fence_status.context = context_handle;
2929	fence_status.fence = ibs_request.seq_no;
2930
2931	/* wait for IB accomplished */
2932	r = amdgpu_cs_query_fence_status(&fence_status,
2933					 AMDGPU_TIMEOUT_INFINITE,
2934					 0, &expired);
2935
2936	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2937	CU_ASSERT_EQUAL(r, 0);
2938	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2939
2940	r = amdgpu_bo_list_destroy(bo_list);
2941	CU_ASSERT_EQUAL(r, 0);
2942
2943	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2944	CU_ASSERT_EQUAL(r, 0);
2945	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2946	CU_ASSERT_EQUAL(r, 0);
2947
2948	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2949	CU_ASSERT_EQUAL(r, 0);
2950
2951	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2952	CU_ASSERT_EQUAL(r, 0);
2953
2954	r = amdgpu_cs_ctx_free(context_handle);
2955	CU_ASSERT_EQUAL(r, 0);
2956}
2957
2958void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2959{
2960	int r;
2961	struct drm_amdgpu_info_hw_ip info;
2962	uint32_t ring_id;
2963
2964	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2965	CU_ASSERT_EQUAL(r, 0);
2966	if (!info.available_rings)
2967		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2968
2969	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2970		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2971		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2972		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2973	}
2974}
2975
2976static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2977{
2978	struct amdgpu_test_shader *shader;
2979	int i, loop = 0x40000;
2980
2981	switch (family) {
2982		case AMDGPU_FAMILY_AI:
2983		case AMDGPU_FAMILY_RV:
2984			shader = &memcpy_ps_hang_slow_ai;
2985			break;
2986		default:
2987			return -1;
2988			break;
2989	}
2990
2991	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2992
2993	for (i = 0; i < loop; i++)
2994		memcpy(ptr + shader->header_length + shader->body_length * i,
2995			shader->shader + shader->header_length,
2996			shader->body_length * sizeof(uint32_t));
2997
2998	memcpy(ptr + shader->header_length + shader->body_length * loop,
2999		shader->shader + shader->header_length + shader->body_length,
3000		shader->foot_length * sizeof(uint32_t));
3001
3002	return 0;
3003}
3004
3005static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
3006{
3007	int i;
3008	uint32_t shader_offset= 256;
3009	uint32_t mem_offset, patch_code_offset;
3010	uint32_t shader_size, patchinfo_code_size;
3011	const uint32_t *shader;
3012	const uint32_t *patchinfo_code;
3013	const uint32_t *patchcode_offset;
3014
3015	switch (ps_type) {
3016		case PS_CONST:
3017			shader = ps_const_shader_gfx9;
3018			shader_size = sizeof(ps_const_shader_gfx9);
3019			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3020			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3021			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3022			break;
3023		case PS_TEX:
3024			shader = ps_tex_shader_gfx9;
3025			shader_size = sizeof(ps_tex_shader_gfx9);
3026			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3027			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3028			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3029			break;
3030		case PS_HANG:
3031			shader = memcpy_ps_hang;
3032			shader_size = sizeof(memcpy_ps_hang);
3033
3034			memcpy(ptr, shader, shader_size);
3035			return 0;
3036		default:
3037			return -1;
3038			break;
3039	}
3040
3041	/* write main shader program */
3042	for (i = 0 ; i < 10; i++) {
3043		mem_offset = i * shader_offset;
3044		memcpy(ptr + mem_offset, shader, shader_size);
3045	}
3046
3047	/* overwrite patch codes */
3048	for (i = 0 ; i < 10; i++) {
3049		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3050		patch_code_offset = i * patchinfo_code_size;
3051		memcpy(ptr + mem_offset,
3052			patchinfo_code + patch_code_offset,
3053			patchinfo_code_size * sizeof(uint32_t));
3054	}
3055
3056	return 0;
3057}
3058
3059/* load RectPosTexFast_VS */
3060static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
3061{
3062	const uint32_t *shader;
3063	uint32_t shader_size;
3064
3065	shader = vs_RectPosTexFast_shader_gfx9;
3066	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3067
3068	memcpy(ptr, shader, shader_size);
3069
3070	return 0;
3071}
3072
3073static int amdgpu_draw_init(uint32_t *ptr)
3074{
3075	int i = 0;
3076	const uint32_t *preamblecache_ptr;
3077	uint32_t preamblecache_size;
3078
3079	/* Write context control and load shadowing register if necessary */
3080	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3081	ptr[i++] = 0x80000000;
3082	ptr[i++] = 0x80000000;
3083
3084	preamblecache_ptr = preamblecache_gfx9;
3085	preamblecache_size = sizeof(preamblecache_gfx9);
3086
3087	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3088	return i + preamblecache_size/sizeof(uint32_t);
3089}
3090
3091static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3092							 uint64_t dst_addr,
3093							 int hang_slow)
3094{
3095	int i = 0;
3096
3097	/* setup color buffer */
3098	/* offset   reg
3099	   0xA318   CB_COLOR0_BASE
3100	   0xA319   CB_COLOR0_BASE_EXT
3101	   0xA31A   CB_COLOR0_ATTRIB2
3102	   0xA31B   CB_COLOR0_VIEW
3103	   0xA31C   CB_COLOR0_INFO
3104	   0xA31D   CB_COLOR0_ATTRIB
3105	   0xA31E   CB_COLOR0_DCC_CONTROL
3106	   0xA31F   CB_COLOR0_CMASK
3107	   0xA320   CB_COLOR0_CMASK_BASE_EXT
3108	   0xA321   CB_COLOR0_FMASK
3109	   0xA322   CB_COLOR0_FMASK_BASE_EXT
3110	   0xA323   CB_COLOR0_CLEAR_WORD0
3111	   0xA324   CB_COLOR0_CLEAR_WORD1
3112	   0xA325   CB_COLOR0_DCC_BASE
3113	   0xA326   CB_COLOR0_DCC_BASE_EXT */
3114	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3115	ptr[i++] = 0x318;
3116	ptr[i++] = dst_addr >> 8;
3117	ptr[i++] = dst_addr >> 40;
3118	ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
3119	ptr[i++] = 0;
3120	ptr[i++] = 0x50438;
3121	ptr[i++] = 0x10140000;
3122	i += 9;
3123
3124	/* mmCB_MRT0_EPITCH */
3125	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3126	ptr[i++] = 0x1e8;
3127	ptr[i++] = hang_slow ? 0x7ff : 0x1f;
3128
3129	/* 0xA32B   CB_COLOR1_BASE */
3130	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3131	ptr[i++] = 0x32b;
3132	ptr[i++] = 0;
3133
3134	/* 0xA33A   CB_COLOR1_BASE */
3135	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3136	ptr[i++] = 0x33a;
3137	ptr[i++] = 0;
3138
3139	/* SPI_SHADER_COL_FORMAT */
3140	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3141	ptr[i++] = 0x1c5;
3142	ptr[i++] = 9;
3143
3144	/* Setup depth buffer */
3145	/* mmDB_Z_INFO */
3146	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3147	ptr[i++] = 0xe;
3148	i += 2;
3149
3150	return i;
3151}
3152
3153static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
3154{
3155	int i = 0;
3156	const uint32_t *cached_cmd_ptr;
3157	uint32_t cached_cmd_size;
3158
3159	/* mmPA_SC_TILE_STEERING_OVERRIDE */
3160	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3161	ptr[i++] = 0xd7;
3162	ptr[i++] = 0;
3163
3164	ptr[i++] = 0xffff1000;
3165	ptr[i++] = 0xc0021000;
3166
3167	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3168	ptr[i++] = 0xd7;
3169	ptr[i++] = 1;
3170
3171	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3172	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3173	ptr[i++] = 0x2fe;
3174	i += 16;
3175
3176	/* mmPA_SC_CENTROID_PRIORITY_0 */
3177	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3178	ptr[i++] = 0x2f5;
3179	i += 2;
3180
3181	cached_cmd_ptr = cached_cmd_gfx9;
3182	cached_cmd_size = sizeof(cached_cmd_gfx9);
3183
3184	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3185	if (hang_slow)
3186		*(ptr + i + 12) = 0x8000800;
3187	i += cached_cmd_size/sizeof(uint32_t);
3188
3189	return i;
3190}
3191
3192static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3193						  int ps_type,
3194						  uint64_t shader_addr,
3195						  int hang_slow)
3196{
3197	int i = 0;
3198
3199	/* mmPA_CL_VS_OUT_CNTL */
3200	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3201	ptr[i++] = 0x207;
3202	ptr[i++] = 0;
3203
3204	/* mmSPI_SHADER_PGM_RSRC3_VS */
3205	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3206	ptr[i++] = 0x46;
3207	ptr[i++] = 0xffff;
3208
3209	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3210	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3211	ptr[i++] = 0x48;
3212	ptr[i++] = shader_addr >> 8;
3213	ptr[i++] = shader_addr >> 40;
3214
3215	/* mmSPI_SHADER_PGM_RSRC1_VS */
3216	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3217	ptr[i++] = 0x4a;
3218	ptr[i++] = 0xc0081;
3219	/* mmSPI_SHADER_PGM_RSRC2_VS */
3220	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3221	ptr[i++] = 0x4b;
3222	ptr[i++] = 0x18;
3223
3224	/* mmSPI_VS_OUT_CONFIG */
3225	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3226	ptr[i++] = 0x1b1;
3227	ptr[i++] = 2;
3228
3229	/* mmSPI_SHADER_POS_FORMAT */
3230	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3231	ptr[i++] = 0x1c3;
3232	ptr[i++] = 4;
3233
3234	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3235	ptr[i++] = 0x4c;
3236	i += 2;
3237	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3238	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3239
3240	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3241	ptr[i++] = 0x50;
3242	i += 2;
3243	if (ps_type == PS_CONST) {
3244		i += 2;
3245	} else if (ps_type == PS_TEX) {
3246		ptr[i++] = 0x3f800000;
3247		ptr[i++] = 0x3f800000;
3248	}
3249
3250	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3251	ptr[i++] = 0x54;
3252	i += 4;
3253
3254	return i;
3255}
3256
3257static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3258				   int ps_type,
3259				   uint64_t shader_addr)
3260{
3261	int i, j;
3262	const uint32_t *sh_registers;
3263	const uint32_t *context_registers;
3264	uint32_t num_sh_reg, num_context_reg;
3265
3266	if (ps_type == PS_CONST) {
3267		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3268		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3269		num_sh_reg = ps_num_sh_registers_gfx9;
3270		num_context_reg = ps_num_context_registers_gfx9;
3271	} else if (ps_type == PS_TEX) {
3272		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3273		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3274		num_sh_reg = ps_num_sh_registers_gfx9;
3275		num_context_reg = ps_num_context_registers_gfx9;
3276	}
3277
3278	i = 0;
3279
3280	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
3281	   0x2c08   SPI_SHADER_PGM_LO_PS
3282	   0x2c09   SPI_SHADER_PGM_HI_PS */
3283	shader_addr += 256 * 9;
3284	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3285	ptr[i++] = 0x7;
3286	ptr[i++] = 0xffff;
3287	ptr[i++] = shader_addr >> 8;
3288	ptr[i++] = shader_addr >> 40;
3289
3290	for (j = 0; j < num_sh_reg; j++) {
3291		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3292		ptr[i++] = sh_registers[j * 2] - 0x2c00;
3293		ptr[i++] = sh_registers[j * 2 + 1];
3294	}
3295
3296	for (j = 0; j < num_context_reg; j++) {
3297		if (context_registers[j * 2] != 0xA1C5) {
3298			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3299			ptr[i++] = context_registers[j * 2] - 0xa000;
3300			ptr[i++] = context_registers[j * 2 + 1];
3301		}
3302
3303		if (context_registers[j * 2] == 0xA1B4) {
3304			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3305			ptr[i++] = 0x1b3;
3306			ptr[i++] = 2;
3307		}
3308	}
3309
3310	return i;
3311}
3312
3313static int amdgpu_draw_draw(uint32_t *ptr)
3314{
3315	int i = 0;
3316
3317	/* mmIA_MULTI_VGT_PARAM */
3318	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3319	ptr[i++] = 0x40000258;
3320	ptr[i++] = 0xd00ff;
3321
3322	/* mmVGT_PRIMITIVE_TYPE */
3323	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3324	ptr[i++] = 0x10000242;
3325	ptr[i++] = 0x11;
3326
3327	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3328	ptr[i++] = 3;
3329	ptr[i++] = 2;
3330
3331	return i;
3332}
3333
3334void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3335			amdgpu_bo_handle bo_shader_ps,
3336			amdgpu_bo_handle bo_shader_vs,
3337			uint64_t mc_address_shader_ps,
3338			uint64_t mc_address_shader_vs,
3339			uint32_t ring_id)
3340{
3341	amdgpu_context_handle context_handle;
3342	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3343	volatile unsigned char *ptr_dst;
3344	uint32_t *ptr_cmd;
3345	uint64_t mc_address_dst, mc_address_cmd;
3346	amdgpu_va_handle va_dst, va_cmd;
3347	int i, r;
3348	int bo_dst_size = 16384;
3349	int bo_cmd_size = 4096;
3350	struct amdgpu_cs_request ibs_request = {0};
3351	struct amdgpu_cs_ib_info ib_info = {0};
3352	struct amdgpu_cs_fence fence_status = {0};
3353	uint32_t expired;
3354	amdgpu_bo_list_handle bo_list;
3355
3356	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3357	CU_ASSERT_EQUAL(r, 0);
3358
3359	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3360					AMDGPU_GEM_DOMAIN_GTT, 0,
3361					&bo_cmd, (void **)&ptr_cmd,
3362					&mc_address_cmd, &va_cmd);
3363	CU_ASSERT_EQUAL(r, 0);
3364	memset(ptr_cmd, 0, bo_cmd_size);
3365
3366	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3367					AMDGPU_GEM_DOMAIN_VRAM, 0,
3368					&bo_dst, (void **)&ptr_dst,
3369					&mc_address_dst, &va_dst);
3370	CU_ASSERT_EQUAL(r, 0);
3371
3372	i = 0;
3373	i += amdgpu_draw_init(ptr_cmd + i);
3374
3375	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3376
3377	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3378
3379	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3380
3381	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3382
3383	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3384	ptr_cmd[i++] = 0xc;
3385	ptr_cmd[i++] = 0x33333333;
3386	ptr_cmd[i++] = 0x33333333;
3387	ptr_cmd[i++] = 0x33333333;
3388	ptr_cmd[i++] = 0x33333333;
3389
3390	i += amdgpu_draw_draw(ptr_cmd + i);
3391
3392	while (i & 7)
3393		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3394
3395	resources[0] = bo_dst;
3396	resources[1] = bo_shader_ps;
3397	resources[2] = bo_shader_vs;
3398	resources[3] = bo_cmd;
3399	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3400	CU_ASSERT_EQUAL(r, 0);
3401
3402	ib_info.ib_mc_address = mc_address_cmd;
3403	ib_info.size = i;
3404	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3405	ibs_request.ring = ring_id;
3406	ibs_request.resources = bo_list;
3407	ibs_request.number_of_ibs = 1;
3408	ibs_request.ibs = &ib_info;
3409	ibs_request.fence_info.handle = NULL;
3410
3411	/* submit CS */
3412	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3413	CU_ASSERT_EQUAL(r, 0);
3414
3415	r = amdgpu_bo_list_destroy(bo_list);
3416	CU_ASSERT_EQUAL(r, 0);
3417
3418	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3419	fence_status.ip_instance = 0;
3420	fence_status.ring = ring_id;
3421	fence_status.context = context_handle;
3422	fence_status.fence = ibs_request.seq_no;
3423
3424	/* wait for IB accomplished */
3425	r = amdgpu_cs_query_fence_status(&fence_status,
3426					 AMDGPU_TIMEOUT_INFINITE,
3427					 0, &expired);
3428	CU_ASSERT_EQUAL(r, 0);
3429	CU_ASSERT_EQUAL(expired, true);
3430
3431	/* verify if memset test result meets with expected */
3432	i = 0;
3433	while(i < bo_dst_size) {
3434		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3435	}
3436
3437	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3438	CU_ASSERT_EQUAL(r, 0);
3439
3440	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3441	CU_ASSERT_EQUAL(r, 0);
3442
3443	r = amdgpu_cs_ctx_free(context_handle);
3444	CU_ASSERT_EQUAL(r, 0);
3445}
3446
3447static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3448				    uint32_t ring)
3449{
3450	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3451	void *ptr_shader_ps;
3452	void *ptr_shader_vs;
3453	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3454	amdgpu_va_handle va_shader_ps, va_shader_vs;
3455	int r;
3456	int bo_shader_size = 4096;
3457
3458	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3459					AMDGPU_GEM_DOMAIN_VRAM, 0,
3460					&bo_shader_ps, &ptr_shader_ps,
3461					&mc_address_shader_ps, &va_shader_ps);
3462	CU_ASSERT_EQUAL(r, 0);
3463	memset(ptr_shader_ps, 0, bo_shader_size);
3464
3465	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3466					AMDGPU_GEM_DOMAIN_VRAM, 0,
3467					&bo_shader_vs, &ptr_shader_vs,
3468					&mc_address_shader_vs, &va_shader_vs);
3469	CU_ASSERT_EQUAL(r, 0);
3470	memset(ptr_shader_vs, 0, bo_shader_size);
3471
3472	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3473	CU_ASSERT_EQUAL(r, 0);
3474
3475	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3476	CU_ASSERT_EQUAL(r, 0);
3477
3478	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3479			mc_address_shader_ps, mc_address_shader_vs, ring);
3480
3481	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3482	CU_ASSERT_EQUAL(r, 0);
3483
3484	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3485	CU_ASSERT_EQUAL(r, 0);
3486}
3487
3488static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3489			       amdgpu_bo_handle bo_shader_ps,
3490			       amdgpu_bo_handle bo_shader_vs,
3491			       uint64_t mc_address_shader_ps,
3492			       uint64_t mc_address_shader_vs,
3493			       uint32_t ring, int hang)
3494{
3495	amdgpu_context_handle context_handle;
3496	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3497	volatile unsigned char *ptr_dst;
3498	unsigned char *ptr_src;
3499	uint32_t *ptr_cmd;
3500	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3501	amdgpu_va_handle va_dst, va_src, va_cmd;
3502	int i, r;
3503	int bo_size = 16384;
3504	int bo_cmd_size = 4096;
3505	struct amdgpu_cs_request ibs_request = {0};
3506	struct amdgpu_cs_ib_info ib_info= {0};
3507	uint32_t hang_state, hangs;
3508	uint32_t expired;
3509	amdgpu_bo_list_handle bo_list;
3510	struct amdgpu_cs_fence fence_status = {0};
3511
3512	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3513	CU_ASSERT_EQUAL(r, 0);
3514
3515	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3516				    AMDGPU_GEM_DOMAIN_GTT, 0,
3517				    &bo_cmd, (void **)&ptr_cmd,
3518				    &mc_address_cmd, &va_cmd);
3519	CU_ASSERT_EQUAL(r, 0);
3520	memset(ptr_cmd, 0, bo_cmd_size);
3521
3522	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3523					AMDGPU_GEM_DOMAIN_VRAM, 0,
3524					&bo_src, (void **)&ptr_src,
3525					&mc_address_src, &va_src);
3526	CU_ASSERT_EQUAL(r, 0);
3527
3528	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3529					AMDGPU_GEM_DOMAIN_VRAM, 0,
3530					&bo_dst, (void **)&ptr_dst,
3531					&mc_address_dst, &va_dst);
3532	CU_ASSERT_EQUAL(r, 0);
3533
3534	memset(ptr_src, 0x55, bo_size);
3535
3536	i = 0;
3537	i += amdgpu_draw_init(ptr_cmd + i);
3538
3539	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3540
3541	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3542
3543	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3544
3545	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3546
3547	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3548	ptr_cmd[i++] = 0xc;
3549	ptr_cmd[i++] = mc_address_src >> 8;
3550	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3551	ptr_cmd[i++] = 0x7c01f;
3552	ptr_cmd[i++] = 0x90500fac;
3553	ptr_cmd[i++] = 0x3e000;
3554	i += 3;
3555
3556	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3557	ptr_cmd[i++] = 0x14;
3558	ptr_cmd[i++] = 0x92;
3559	i += 3;
3560
3561	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3562	ptr_cmd[i++] = 0x191;
3563	ptr_cmd[i++] = 0;
3564
3565	i += amdgpu_draw_draw(ptr_cmd + i);
3566
3567	while (i & 7)
3568		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3569
3570	resources[0] = bo_dst;
3571	resources[1] = bo_src;
3572	resources[2] = bo_shader_ps;
3573	resources[3] = bo_shader_vs;
3574	resources[4] = bo_cmd;
3575	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3576	CU_ASSERT_EQUAL(r, 0);
3577
3578	ib_info.ib_mc_address = mc_address_cmd;
3579	ib_info.size = i;
3580	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3581	ibs_request.ring = ring;
3582	ibs_request.resources = bo_list;
3583	ibs_request.number_of_ibs = 1;
3584	ibs_request.ibs = &ib_info;
3585	ibs_request.fence_info.handle = NULL;
3586	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3587	CU_ASSERT_EQUAL(r, 0);
3588
3589	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3590	fence_status.ip_instance = 0;
3591	fence_status.ring = ring;
3592	fence_status.context = context_handle;
3593	fence_status.fence = ibs_request.seq_no;
3594
3595	/* wait for IB accomplished */
3596	r = amdgpu_cs_query_fence_status(&fence_status,
3597					 AMDGPU_TIMEOUT_INFINITE,
3598					 0, &expired);
3599	if (!hang) {
3600		CU_ASSERT_EQUAL(r, 0);
3601		CU_ASSERT_EQUAL(expired, true);
3602
3603		/* verify if memcpy test result meets with expected */
3604		i = 0;
3605		while(i < bo_size) {
3606			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3607			i++;
3608		}
3609	} else {
3610		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3611		CU_ASSERT_EQUAL(r, 0);
3612		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3613	}
3614
3615	r = amdgpu_bo_list_destroy(bo_list);
3616	CU_ASSERT_EQUAL(r, 0);
3617
3618	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3619	CU_ASSERT_EQUAL(r, 0);
3620	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3621	CU_ASSERT_EQUAL(r, 0);
3622
3623	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3624	CU_ASSERT_EQUAL(r, 0);
3625
3626	r = amdgpu_cs_ctx_free(context_handle);
3627	CU_ASSERT_EQUAL(r, 0);
3628}
3629
3630void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3631			     int hang)
3632{
3633	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3634	void *ptr_shader_ps;
3635	void *ptr_shader_vs;
3636	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3637	amdgpu_va_handle va_shader_ps, va_shader_vs;
3638	int bo_shader_size = 4096;
3639	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3640	int r;
3641
3642	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3643					AMDGPU_GEM_DOMAIN_VRAM, 0,
3644					&bo_shader_ps, &ptr_shader_ps,
3645					&mc_address_shader_ps, &va_shader_ps);
3646	CU_ASSERT_EQUAL(r, 0);
3647	memset(ptr_shader_ps, 0, bo_shader_size);
3648
3649	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3650					AMDGPU_GEM_DOMAIN_VRAM, 0,
3651					&bo_shader_vs, &ptr_shader_vs,
3652					&mc_address_shader_vs, &va_shader_vs);
3653	CU_ASSERT_EQUAL(r, 0);
3654	memset(ptr_shader_vs, 0, bo_shader_size);
3655
3656	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3657	CU_ASSERT_EQUAL(r, 0);
3658
3659	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3660	CU_ASSERT_EQUAL(r, 0);
3661
3662	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3663			mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3664
3665	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3666	CU_ASSERT_EQUAL(r, 0);
3667
3668	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3669	CU_ASSERT_EQUAL(r, 0);
3670}
3671
3672static void amdgpu_draw_test(void)
3673{
3674	int r;
3675	struct drm_amdgpu_info_hw_ip info;
3676	uint32_t ring_id;
3677
3678	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3679	CU_ASSERT_EQUAL(r, 0);
3680	if (!info.available_rings)
3681		printf("SKIP ... as there's no graphics ring\n");
3682
3683	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3684		amdgpu_memset_draw_test(device_handle, ring_id);
3685		amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3686	}
3687}
3688
3689void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3690{
3691	amdgpu_context_handle context_handle;
3692	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3693	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3694	void *ptr_shader_ps;
3695	void *ptr_shader_vs;
3696	volatile unsigned char *ptr_dst;
3697	unsigned char *ptr_src;
3698	uint32_t *ptr_cmd;
3699	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3700	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3701	amdgpu_va_handle va_shader_ps, va_shader_vs;
3702	amdgpu_va_handle va_dst, va_src, va_cmd;
3703	struct amdgpu_gpu_info gpu_info = {0};
3704	int i, r;
3705	int bo_size = 0x4000000;
3706	int bo_shader_ps_size = 0x400000;
3707	int bo_shader_vs_size = 4096;
3708	int bo_cmd_size = 4096;
3709	struct amdgpu_cs_request ibs_request = {0};
3710	struct amdgpu_cs_ib_info ib_info= {0};
3711	uint32_t hang_state, hangs, expired;
3712	amdgpu_bo_list_handle bo_list;
3713	struct amdgpu_cs_fence fence_status = {0};
3714
3715	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3716	CU_ASSERT_EQUAL(r, 0);
3717
3718	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3719	CU_ASSERT_EQUAL(r, 0);
3720
3721	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3722				    AMDGPU_GEM_DOMAIN_GTT, 0,
3723				    &bo_cmd, (void **)&ptr_cmd,
3724				    &mc_address_cmd, &va_cmd);
3725	CU_ASSERT_EQUAL(r, 0);
3726	memset(ptr_cmd, 0, bo_cmd_size);
3727
3728	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3729					AMDGPU_GEM_DOMAIN_VRAM, 0,
3730					&bo_shader_ps, &ptr_shader_ps,
3731					&mc_address_shader_ps, &va_shader_ps);
3732	CU_ASSERT_EQUAL(r, 0);
3733	memset(ptr_shader_ps, 0, bo_shader_ps_size);
3734
3735	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3736					AMDGPU_GEM_DOMAIN_VRAM, 0,
3737					&bo_shader_vs, &ptr_shader_vs,
3738					&mc_address_shader_vs, &va_shader_vs);
3739	CU_ASSERT_EQUAL(r, 0);
3740	memset(ptr_shader_vs, 0, bo_shader_vs_size);
3741
3742	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3743	CU_ASSERT_EQUAL(r, 0);
3744
3745	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3746	CU_ASSERT_EQUAL(r, 0);
3747
3748	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3749					AMDGPU_GEM_DOMAIN_VRAM, 0,
3750					&bo_src, (void **)&ptr_src,
3751					&mc_address_src, &va_src);
3752	CU_ASSERT_EQUAL(r, 0);
3753
3754	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3755					AMDGPU_GEM_DOMAIN_VRAM, 0,
3756					&bo_dst, (void **)&ptr_dst,
3757					&mc_address_dst, &va_dst);
3758	CU_ASSERT_EQUAL(r, 0);
3759
3760	memset(ptr_src, 0x55, bo_size);
3761
3762	i = 0;
3763	i += amdgpu_draw_init(ptr_cmd + i);
3764
3765	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3766
3767	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3768
3769	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3770							mc_address_shader_vs, 1);
3771
3772	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3773
3774	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3775	ptr_cmd[i++] = 0xc;
3776	ptr_cmd[i++] = mc_address_src >> 8;
3777	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3778	ptr_cmd[i++] = 0x1ffc7ff;
3779	ptr_cmd[i++] = 0x90500fac;
3780	ptr_cmd[i++] = 0xffe000;
3781	i += 3;
3782
3783	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3784	ptr_cmd[i++] = 0x14;
3785	ptr_cmd[i++] = 0x92;
3786	i += 3;
3787
3788	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3789	ptr_cmd[i++] = 0x191;
3790	ptr_cmd[i++] = 0;
3791
3792	i += amdgpu_draw_draw(ptr_cmd + i);
3793
3794	while (i & 7)
3795		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3796
3797	resources[0] = bo_dst;
3798	resources[1] = bo_src;
3799	resources[2] = bo_shader_ps;
3800	resources[3] = bo_shader_vs;
3801	resources[4] = bo_cmd;
3802	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3803	CU_ASSERT_EQUAL(r, 0);
3804
3805	ib_info.ib_mc_address = mc_address_cmd;
3806	ib_info.size = i;
3807	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3808	ibs_request.ring = ring;
3809	ibs_request.resources = bo_list;
3810	ibs_request.number_of_ibs = 1;
3811	ibs_request.ibs = &ib_info;
3812	ibs_request.fence_info.handle = NULL;
3813	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3814	CU_ASSERT_EQUAL(r, 0);
3815
3816	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3817	fence_status.ip_instance = 0;
3818	fence_status.ring = ring;
3819	fence_status.context = context_handle;
3820	fence_status.fence = ibs_request.seq_no;
3821
3822	/* wait for IB accomplished */
3823	r = amdgpu_cs_query_fence_status(&fence_status,
3824					 AMDGPU_TIMEOUT_INFINITE,
3825					 0, &expired);
3826
3827	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3828	CU_ASSERT_EQUAL(r, 0);
3829	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3830
3831	r = amdgpu_bo_list_destroy(bo_list);
3832	CU_ASSERT_EQUAL(r, 0);
3833
3834	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3835	CU_ASSERT_EQUAL(r, 0);
3836	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3837	CU_ASSERT_EQUAL(r, 0);
3838
3839	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3840	CU_ASSERT_EQUAL(r, 0);
3841
3842	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3843	CU_ASSERT_EQUAL(r, 0);
3844	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3845	CU_ASSERT_EQUAL(r, 0);
3846
3847	r = amdgpu_cs_ctx_free(context_handle);
3848	CU_ASSERT_EQUAL(r, 0);
3849}
3850
3851static void amdgpu_gpu_reset_test(void)
3852{
3853	int r;
3854	char debugfs_path[256], tmp[10];
3855	int fd;
3856	struct stat sbuf;
3857	amdgpu_context_handle context_handle;
3858	uint32_t hang_state, hangs;
3859
3860	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3861	CU_ASSERT_EQUAL(r, 0);
3862
3863	r = fstat(drm_amdgpu[0], &sbuf);
3864	CU_ASSERT_EQUAL(r, 0);
3865
3866	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3867	fd = open(debugfs_path, O_RDONLY);
3868	CU_ASSERT(fd >= 0);
3869
3870	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3871	CU_ASSERT(r > 0);
3872
3873	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3874	CU_ASSERT_EQUAL(r, 0);
3875	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3876
3877	close(fd);
3878	r = amdgpu_cs_ctx_free(context_handle);
3879	CU_ASSERT_EQUAL(r, 0);
3880
3881	amdgpu_compute_dispatch_test();
3882	amdgpu_gfx_dispatch_test();
3883}
3884