basic_tests.c revision 5324fb0d
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24#include <stdio.h>
25#include <stdlib.h>
26#include <unistd.h>
27#ifdef HAVE_ALLOCA_H
28# include <alloca.h>
29#endif
30#include <sys/wait.h>
31
32#include "CUnit/Basic.h"
33
34#include "amdgpu_test.h"
35#include "amdgpu_drm.h"
36#include "util_math.h"
37
38static  amdgpu_device_handle device_handle;
39static  uint32_t  major_version;
40static  uint32_t  minor_version;
41static  uint32_t  family_id;
42
43static void amdgpu_query_info_test(void);
44static void amdgpu_command_submission_gfx(void);
45static void amdgpu_command_submission_compute(void);
46static void amdgpu_command_submission_multi_fence(void);
47static void amdgpu_command_submission_sdma(void);
48static void amdgpu_userptr_test(void);
49static void amdgpu_semaphore_test(void);
50static void amdgpu_sync_dependency_test(void);
51static void amdgpu_bo_eviction_test(void);
52static void amdgpu_dispatch_test(void);
53static void amdgpu_draw_test(void);
54
55static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
56static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
57static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
58static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
59				       unsigned ip_type,
60				       int instance, int pm4_dw, uint32_t *pm4_src,
61				       int res_cnt, amdgpu_bo_handle *resources,
62				       struct amdgpu_cs_ib_info *ib_info,
63				       struct amdgpu_cs_request *ibs_request);
64
65CU_TestInfo basic_tests[] = {
66	{ "Query Info Test",  amdgpu_query_info_test },
67	{ "Userptr Test",  amdgpu_userptr_test },
68	{ "bo eviction Test",  amdgpu_bo_eviction_test },
69	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
70	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
71	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
72	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
73	{ "SW semaphore Test",  amdgpu_semaphore_test },
74	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
75	{ "Dispatch Test",  amdgpu_dispatch_test },
76	{ "Draw Test",  amdgpu_draw_test },
77	CU_TEST_INFO_NULL,
78};
79#define BUFFER_SIZE (8 * 1024)
80#define SDMA_PKT_HEADER_op_offset 0
81#define SDMA_PKT_HEADER_op_mask   0x000000FF
82#define SDMA_PKT_HEADER_op_shift  0
83#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
84#define SDMA_OPCODE_CONSTANT_FILL  11
85#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
86	/* 0 = byte fill
87	 * 2 = DW fill
88	 */
89#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
90					(((sub_op) & 0xFF) << 8) |	\
91					(((op) & 0xFF) << 0))
92#define	SDMA_OPCODE_WRITE				  2
93#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
94#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
95
96#define	SDMA_OPCODE_COPY				  1
97#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
98
99#define GFX_COMPUTE_NOP  0xffff1000
100#define SDMA_NOP  0x0
101
102/* PM4 */
103#define	PACKET_TYPE0	0
104#define	PACKET_TYPE1	1
105#define	PACKET_TYPE2	2
106#define	PACKET_TYPE3	3
107
108#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
109#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
110#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
111#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
112#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
113			 ((reg) & 0xFFFF) |			\
114			 ((n) & 0x3FFF) << 16)
115#define CP_PACKET2			0x80000000
116#define		PACKET2_PAD_SHIFT		0
117#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
118
119#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
120
121#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
122			 (((op) & 0xFF) << 8) |				\
123			 ((n) & 0x3FFF) << 16)
124#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
125
126/* Packet 3 types */
127#define	PACKET3_NOP					0x10
128
129#define	PACKET3_WRITE_DATA				0x37
130#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
131		/* 0 - register
132		 * 1 - memory (sync - via GRBM)
133		 * 2 - gl2
134		 * 3 - gds
135		 * 4 - reserved
136		 * 5 - memory (async - direct)
137		 */
138#define		WR_ONE_ADDR                             (1 << 16)
139#define		WR_CONFIRM                              (1 << 20)
140#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
141		/* 0 - LRU
142		 * 1 - Stream
143		 */
144#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
145		/* 0 - me
146		 * 1 - pfp
147		 * 2 - ce
148		 */
149
150#define	PACKET3_DMA_DATA				0x50
151/* 1. header
152 * 2. CONTROL
153 * 3. SRC_ADDR_LO or DATA [31:0]
154 * 4. SRC_ADDR_HI [31:0]
155 * 5. DST_ADDR_LO [31:0]
156 * 6. DST_ADDR_HI [7:0]
157 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
158 */
159/* CONTROL */
160#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
161		/* 0 - ME
162		 * 1 - PFP
163		 */
164#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
165		/* 0 - LRU
166		 * 1 - Stream
167		 * 2 - Bypass
168		 */
169#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
170#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
171		/* 0 - DST_ADDR using DAS
172		 * 1 - GDS
173		 * 3 - DST_ADDR using L2
174		 */
175#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
176		/* 0 - LRU
177		 * 1 - Stream
178		 * 2 - Bypass
179		 */
180#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
181#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
182		/* 0 - SRC_ADDR using SAS
183		 * 1 - GDS
184		 * 2 - DATA
185		 * 3 - SRC_ADDR using L2
186		 */
187#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
188/* COMMAND */
189#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
190#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
191		/* 0 - none
192		 * 1 - 8 in 16
193		 * 2 - 8 in 32
194		 * 3 - 8 in 64
195		 */
196#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
197		/* 0 - none
198		 * 1 - 8 in 16
199		 * 2 - 8 in 32
200		 * 3 - 8 in 64
201		 */
202#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
203		/* 0 - memory
204		 * 1 - register
205		 */
206#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
207		/* 0 - memory
208		 * 1 - register
209		 */
210#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
211#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
212#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
213
214#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
215						(((b) & 0x1) << 26) |		\
216						(((t) & 0x1) << 23) |		\
217						(((s) & 0x1) << 22) |		\
218						(((cnt) & 0xFFFFF) << 0))
219#define	SDMA_OPCODE_COPY_SI	3
220#define SDMA_OPCODE_CONSTANT_FILL_SI	13
221#define SDMA_NOP_SI  0xf
222#define GFX_COMPUTE_NOP_SI 0x80000000
223#define	PACKET3_DMA_DATA_SI	0x41
224#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
225		/* 0 - ME
226		 * 1 - PFP
227		 */
228#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
229		/* 0 - DST_ADDR using DAS
230		 * 1 - GDS
231		 * 3 - DST_ADDR using L2
232		 */
233#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
234		/* 0 - SRC_ADDR using SAS
235		 * 1 - GDS
236		 * 2 - DATA
237		 * 3 - SRC_ADDR using L2
238		 */
239#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
240
241
242#define PKT3_CONTEXT_CONTROL                   0x28
243#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
244#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
245#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
246
247#define PKT3_CLEAR_STATE                       0x12
248
249#define PKT3_SET_SH_REG                        0x76
250#define		PACKET3_SET_SH_REG_START			0x00002c00
251
252#define	PACKET3_DISPATCH_DIRECT				0x15
253#define PACKET3_EVENT_WRITE				0x46
254#define PACKET3_ACQUIRE_MEM				0x58
255#define PACKET3_SET_CONTEXT_REG				0x69
256#define PACKET3_SET_UCONFIG_REG				0x79
257#define PACKET3_DRAW_INDEX_AUTO				0x2D
258/* gfx 8 */
259#define mmCOMPUTE_PGM_LO                                                        0x2e0c
260#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
261#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
262#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
263#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
264#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
265#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
266
267
268
269#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
270		      ((num & 0x0000ff00) << 8) | \
271		      ((num & 0x00ff0000) >> 8) | \
272		      ((num & 0x000000ff) << 24))
273
274
275/* Shader code
276 * void main()
277{
278
279	float x = some_input;
280		for (unsigned i = 0; i < 1000000; i++)
281  	x = sin(x);
282
283	u[0] = 42u;
284}
285*/
286
287static  uint32_t shader_bin[] = {
288	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
289	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
290	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
291	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
292};
293
294#define CODE_OFFSET 512
295#define DATA_OFFSET 1024
296
297enum cs_type {
298	CS_BUFFERCLEAR,
299	CS_BUFFERCOPY
300};
301
302static const uint32_t bufferclear_cs_shader_gfx9[] = {
303    0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
304    0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
305    0xBF810000
306};
307
308static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
309	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
310	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
311	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
312	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
313	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
314};
315
316static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
317
318static const uint32_t buffercopy_cs_shader_gfx9[] = {
319    0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
320    0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
321};
322
323static const uint32_t preamblecache_gfx9[] = {
324	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
325	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
326	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
327	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
328	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
329	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
330	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
331	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
332	0xc0026900, 0x311,  0x3, 0x0, 0xc0026900, 0x316, 0x1e, 0x20,
333	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
334	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
335	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
336	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
337	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
338	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
339	0xc0016900, 0x314, 0x0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
340	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
341	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
342	0xc0017900, 0x24b, 0x0
343};
344
345enum ps_type {
346	PS_CONST,
347	PS_TEX
348};
349
350static const uint32_t ps_const_shader_gfx9[] = {
351    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
352    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
353    0xC4001C0F, 0x00000100, 0xBF810000
354};
355
356static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
357
358static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
359    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
360     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
361     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
362     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
363     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
364     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
365     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
366     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
367     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
368     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
369    }
370};
371
372static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
373    0x00000004
374};
375
376static const uint32_t ps_num_sh_registers_gfx9 = 2;
377
378static const uint32_t ps_const_sh_registers_gfx9[][2] = {
379    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
380    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
381};
382
383static const uint32_t ps_num_context_registers_gfx9 = 7;
384
385static const uint32_t ps_const_context_reg_gfx9[][2] = {
386    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
387    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
388    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
389    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
390    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
391    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
392    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
393};
394
395static const uint32_t ps_tex_shader_gfx9[] = {
396    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
397    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
398    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
399    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
400    0x00000100, 0xBF810000
401};
402
403static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
404    0x0000000B
405};
406
407static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
408
409static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
410    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
411     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
412     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
413     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
414     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
415     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
416     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
417     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
418     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
419     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
420    }
421};
422
423static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
424    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
425    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
426};
427
428static const uint32_t ps_tex_context_reg_gfx9[][2] = {
429    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
430    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
431    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
432    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
433    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
434    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
435    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
436};
437
438static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
439    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
440    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
441    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
442    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
443    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
444    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
445    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
446    0xC400020F, 0x05060403, 0xBF810000
447};
448
449static const uint32_t cached_cmd_gfx9[] = {
450	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
451	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
452	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
453	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
454	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
455	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
456	0xc0026900, 0x292, 0x20, 0x60201b8,
457	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
458};
459
460int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
461			unsigned alignment, unsigned heap, uint64_t alloc_flags,
462			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
463			uint64_t *mc_address,
464			amdgpu_va_handle *va_handle)
465{
466	struct amdgpu_bo_alloc_request request = {};
467	amdgpu_bo_handle buf_handle;
468	amdgpu_va_handle handle;
469	uint64_t vmc_addr;
470	int r;
471
472	request.alloc_size = size;
473	request.phys_alignment = alignment;
474	request.preferred_heap = heap;
475	request.flags = alloc_flags;
476
477	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
478	if (r)
479		return r;
480
481	r = amdgpu_va_range_alloc(dev,
482				  amdgpu_gpu_va_range_general,
483				  size, alignment, 0, &vmc_addr,
484				  &handle, 0);
485	if (r)
486		goto error_va_alloc;
487
488	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
489				   AMDGPU_VM_PAGE_READABLE |
490				   AMDGPU_VM_PAGE_WRITEABLE |
491				   AMDGPU_VM_PAGE_EXECUTABLE |
492				   mapping_flags,
493				   AMDGPU_VA_OP_MAP);
494	if (r)
495		goto error_va_map;
496
497	r = amdgpu_bo_cpu_map(buf_handle, cpu);
498	if (r)
499		goto error_cpu_map;
500
501	*bo = buf_handle;
502	*mc_address = vmc_addr;
503	*va_handle = handle;
504
505	return 0;
506
507 error_cpu_map:
508	amdgpu_bo_cpu_unmap(buf_handle);
509
510 error_va_map:
511	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
512
513 error_va_alloc:
514	amdgpu_bo_free(buf_handle);
515	return r;
516}
517
518
519
520int suite_basic_tests_init(void)
521{
522	struct amdgpu_gpu_info gpu_info = {0};
523	int r;
524
525	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
526				   &minor_version, &device_handle);
527
528	if (r) {
529		if ((r == -EACCES) && (errno == EACCES))
530			printf("\n\nError:%s. "
531				"Hint:Try to run this test program as root.",
532				strerror(errno));
533		return CUE_SINIT_FAILED;
534	}
535
536	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
537	if (r)
538		return CUE_SINIT_FAILED;
539
540	family_id = gpu_info.family_id;
541
542	return CUE_SUCCESS;
543}
544
545int suite_basic_tests_clean(void)
546{
547	int r = amdgpu_device_deinitialize(device_handle);
548
549	if (r == 0)
550		return CUE_SUCCESS;
551	else
552		return CUE_SCLEAN_FAILED;
553}
554
555static void amdgpu_query_info_test(void)
556{
557	struct amdgpu_gpu_info gpu_info = {0};
558	uint32_t version, feature;
559	int r;
560
561	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
562	CU_ASSERT_EQUAL(r, 0);
563
564	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
565					  0, &version, &feature);
566	CU_ASSERT_EQUAL(r, 0);
567}
568
569static void amdgpu_command_submission_gfx_separate_ibs(void)
570{
571	amdgpu_context_handle context_handle;
572	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
573	void *ib_result_cpu, *ib_result_ce_cpu;
574	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
575	struct amdgpu_cs_request ibs_request = {0};
576	struct amdgpu_cs_ib_info ib_info[2];
577	struct amdgpu_cs_fence fence_status = {0};
578	uint32_t *ptr;
579	uint32_t expired;
580	amdgpu_bo_list_handle bo_list;
581	amdgpu_va_handle va_handle, va_handle_ce;
582	int r, i = 0;
583
584	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
585	CU_ASSERT_EQUAL(r, 0);
586
587	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
588				    AMDGPU_GEM_DOMAIN_GTT, 0,
589				    &ib_result_handle, &ib_result_cpu,
590				    &ib_result_mc_address, &va_handle);
591	CU_ASSERT_EQUAL(r, 0);
592
593	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
594				    AMDGPU_GEM_DOMAIN_GTT, 0,
595				    &ib_result_ce_handle, &ib_result_ce_cpu,
596				    &ib_result_ce_mc_address, &va_handle_ce);
597	CU_ASSERT_EQUAL(r, 0);
598
599	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
600			       ib_result_ce_handle, &bo_list);
601	CU_ASSERT_EQUAL(r, 0);
602
603	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
604
605	/* IT_SET_CE_DE_COUNTERS */
606	ptr = ib_result_ce_cpu;
607	if (family_id != AMDGPU_FAMILY_SI) {
608		ptr[i++] = 0xc0008900;
609		ptr[i++] = 0;
610	}
611	ptr[i++] = 0xc0008400;
612	ptr[i++] = 1;
613	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
614	ib_info[0].size = i;
615	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
616
617	/* IT_WAIT_ON_CE_COUNTER */
618	ptr = ib_result_cpu;
619	ptr[0] = 0xc0008600;
620	ptr[1] = 0x00000001;
621	ib_info[1].ib_mc_address = ib_result_mc_address;
622	ib_info[1].size = 2;
623
624	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
625	ibs_request.number_of_ibs = 2;
626	ibs_request.ibs = ib_info;
627	ibs_request.resources = bo_list;
628	ibs_request.fence_info.handle = NULL;
629
630	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
631
632	CU_ASSERT_EQUAL(r, 0);
633
634	fence_status.context = context_handle;
635	fence_status.ip_type = AMDGPU_HW_IP_GFX;
636	fence_status.ip_instance = 0;
637	fence_status.fence = ibs_request.seq_no;
638
639	r = amdgpu_cs_query_fence_status(&fence_status,
640					 AMDGPU_TIMEOUT_INFINITE,
641					 0, &expired);
642	CU_ASSERT_EQUAL(r, 0);
643
644	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
645				     ib_result_mc_address, 4096);
646	CU_ASSERT_EQUAL(r, 0);
647
648	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
649				     ib_result_ce_mc_address, 4096);
650	CU_ASSERT_EQUAL(r, 0);
651
652	r = amdgpu_bo_list_destroy(bo_list);
653	CU_ASSERT_EQUAL(r, 0);
654
655	r = amdgpu_cs_ctx_free(context_handle);
656	CU_ASSERT_EQUAL(r, 0);
657
658}
659
660static void amdgpu_command_submission_gfx_shared_ib(void)
661{
662	amdgpu_context_handle context_handle;
663	amdgpu_bo_handle ib_result_handle;
664	void *ib_result_cpu;
665	uint64_t ib_result_mc_address;
666	struct amdgpu_cs_request ibs_request = {0};
667	struct amdgpu_cs_ib_info ib_info[2];
668	struct amdgpu_cs_fence fence_status = {0};
669	uint32_t *ptr;
670	uint32_t expired;
671	amdgpu_bo_list_handle bo_list;
672	amdgpu_va_handle va_handle;
673	int r, i = 0;
674
675	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
676	CU_ASSERT_EQUAL(r, 0);
677
678	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
679				    AMDGPU_GEM_DOMAIN_GTT, 0,
680				    &ib_result_handle, &ib_result_cpu,
681				    &ib_result_mc_address, &va_handle);
682	CU_ASSERT_EQUAL(r, 0);
683
684	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
685			       &bo_list);
686	CU_ASSERT_EQUAL(r, 0);
687
688	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
689
690	/* IT_SET_CE_DE_COUNTERS */
691	ptr = ib_result_cpu;
692	if (family_id != AMDGPU_FAMILY_SI) {
693		ptr[i++] = 0xc0008900;
694		ptr[i++] = 0;
695	}
696	ptr[i++] = 0xc0008400;
697	ptr[i++] = 1;
698	ib_info[0].ib_mc_address = ib_result_mc_address;
699	ib_info[0].size = i;
700	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
701
702	ptr = (uint32_t *)ib_result_cpu + 4;
703	ptr[0] = 0xc0008600;
704	ptr[1] = 0x00000001;
705	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
706	ib_info[1].size = 2;
707
708	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
709	ibs_request.number_of_ibs = 2;
710	ibs_request.ibs = ib_info;
711	ibs_request.resources = bo_list;
712	ibs_request.fence_info.handle = NULL;
713
714	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
715
716	CU_ASSERT_EQUAL(r, 0);
717
718	fence_status.context = context_handle;
719	fence_status.ip_type = AMDGPU_HW_IP_GFX;
720	fence_status.ip_instance = 0;
721	fence_status.fence = ibs_request.seq_no;
722
723	r = amdgpu_cs_query_fence_status(&fence_status,
724					 AMDGPU_TIMEOUT_INFINITE,
725					 0, &expired);
726	CU_ASSERT_EQUAL(r, 0);
727
728	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
729				     ib_result_mc_address, 4096);
730	CU_ASSERT_EQUAL(r, 0);
731
732	r = amdgpu_bo_list_destroy(bo_list);
733	CU_ASSERT_EQUAL(r, 0);
734
735	r = amdgpu_cs_ctx_free(context_handle);
736	CU_ASSERT_EQUAL(r, 0);
737}
738
739static void amdgpu_command_submission_gfx_cp_write_data(void)
740{
741	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
742}
743
744static void amdgpu_command_submission_gfx_cp_const_fill(void)
745{
746	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
747}
748
749static void amdgpu_command_submission_gfx_cp_copy_data(void)
750{
751	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
752}
753
754static void amdgpu_bo_eviction_test(void)
755{
756	const int sdma_write_length = 1024;
757	const int pm4_dw = 256;
758	amdgpu_context_handle context_handle;
759	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
760	amdgpu_bo_handle *resources;
761	uint32_t *pm4;
762	struct amdgpu_cs_ib_info *ib_info;
763	struct amdgpu_cs_request *ibs_request;
764	uint64_t bo1_mc, bo2_mc;
765	volatile unsigned char *bo1_cpu, *bo2_cpu;
766	int i, j, r, loop1, loop2;
767	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
768	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
769	struct amdgpu_heap_info vram_info, gtt_info;
770
771	pm4 = calloc(pm4_dw, sizeof(*pm4));
772	CU_ASSERT_NOT_EQUAL(pm4, NULL);
773
774	ib_info = calloc(1, sizeof(*ib_info));
775	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
776
777	ibs_request = calloc(1, sizeof(*ibs_request));
778	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
779
780	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
781	CU_ASSERT_EQUAL(r, 0);
782
783	/* prepare resource */
784	resources = calloc(4, sizeof(amdgpu_bo_handle));
785	CU_ASSERT_NOT_EQUAL(resources, NULL);
786
787	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
788				   0, &vram_info);
789	CU_ASSERT_EQUAL(r, 0);
790
791	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
792				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
793	CU_ASSERT_EQUAL(r, 0);
794	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
795				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
796	CU_ASSERT_EQUAL(r, 0);
797
798	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
799				   0, &gtt_info);
800	CU_ASSERT_EQUAL(r, 0);
801
802	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
803				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
804	CU_ASSERT_EQUAL(r, 0);
805	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
806				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
807	CU_ASSERT_EQUAL(r, 0);
808
809
810
811	loop1 = loop2 = 0;
812	/* run 9 circle to test all mapping combination */
813	while(loop1 < 2) {
814		while(loop2 < 2) {
815			/* allocate UC bo1for sDMA use */
816			r = amdgpu_bo_alloc_and_map(device_handle,
817						    sdma_write_length, 4096,
818						    AMDGPU_GEM_DOMAIN_GTT,
819						    gtt_flags[loop1], &bo1,
820						    (void**)&bo1_cpu, &bo1_mc,
821						    &bo1_va_handle);
822			CU_ASSERT_EQUAL(r, 0);
823
824			/* set bo1 */
825			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
826
827			/* allocate UC bo2 for sDMA use */
828			r = amdgpu_bo_alloc_and_map(device_handle,
829						    sdma_write_length, 4096,
830						    AMDGPU_GEM_DOMAIN_GTT,
831						    gtt_flags[loop2], &bo2,
832						    (void**)&bo2_cpu, &bo2_mc,
833						    &bo2_va_handle);
834			CU_ASSERT_EQUAL(r, 0);
835
836			/* clear bo2 */
837			memset((void*)bo2_cpu, 0, sdma_write_length);
838
839			resources[0] = bo1;
840			resources[1] = bo2;
841			resources[2] = vram_max[loop2];
842			resources[3] = gtt_max[loop2];
843
844			/* fulfill PM4: test DMA copy linear */
845			i = j = 0;
846			if (family_id == AMDGPU_FAMILY_SI) {
847				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
848							  sdma_write_length);
849				pm4[i++] = 0xffffffff & bo2_mc;
850				pm4[i++] = 0xffffffff & bo1_mc;
851				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
852				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
853			} else {
854				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
855				if (family_id >= AMDGPU_FAMILY_AI)
856					pm4[i++] = sdma_write_length - 1;
857				else
858					pm4[i++] = sdma_write_length;
859				pm4[i++] = 0;
860				pm4[i++] = 0xffffffff & bo1_mc;
861				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
862				pm4[i++] = 0xffffffff & bo2_mc;
863				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
864			}
865
866			amdgpu_test_exec_cs_helper(context_handle,
867						   AMDGPU_HW_IP_DMA, 0,
868						   i, pm4,
869						   4, resources,
870						   ib_info, ibs_request);
871
872			/* verify if SDMA test result meets with expected */
873			i = 0;
874			while(i < sdma_write_length) {
875				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
876			}
877			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
878						     sdma_write_length);
879			CU_ASSERT_EQUAL(r, 0);
880			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
881						     sdma_write_length);
882			CU_ASSERT_EQUAL(r, 0);
883			loop2++;
884		}
885		loop2 = 0;
886		loop1++;
887	}
888	amdgpu_bo_free(vram_max[0]);
889	amdgpu_bo_free(vram_max[1]);
890	amdgpu_bo_free(gtt_max[0]);
891	amdgpu_bo_free(gtt_max[1]);
892	/* clean resources */
893	free(resources);
894	free(ibs_request);
895	free(ib_info);
896	free(pm4);
897
898	/* end of test */
899	r = amdgpu_cs_ctx_free(context_handle);
900	CU_ASSERT_EQUAL(r, 0);
901}
902
903
904static void amdgpu_command_submission_gfx(void)
905{
906	/* write data using the CP */
907	amdgpu_command_submission_gfx_cp_write_data();
908	/* const fill using the CP */
909	amdgpu_command_submission_gfx_cp_const_fill();
910	/* copy data using the CP */
911	amdgpu_command_submission_gfx_cp_copy_data();
912	/* separate IB buffers for multi-IB submission */
913	amdgpu_command_submission_gfx_separate_ibs();
914	/* shared IB buffer for multi-IB submission */
915	amdgpu_command_submission_gfx_shared_ib();
916}
917
918static void amdgpu_semaphore_test(void)
919{
920	amdgpu_context_handle context_handle[2];
921	amdgpu_semaphore_handle sem;
922	amdgpu_bo_handle ib_result_handle[2];
923	void *ib_result_cpu[2];
924	uint64_t ib_result_mc_address[2];
925	struct amdgpu_cs_request ibs_request[2] = {0};
926	struct amdgpu_cs_ib_info ib_info[2] = {0};
927	struct amdgpu_cs_fence fence_status = {0};
928	uint32_t *ptr;
929	uint32_t expired;
930	uint32_t sdma_nop, gfx_nop;
931	amdgpu_bo_list_handle bo_list[2];
932	amdgpu_va_handle va_handle[2];
933	int r, i;
934
935	if (family_id == AMDGPU_FAMILY_SI) {
936		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
937		gfx_nop = GFX_COMPUTE_NOP_SI;
938	} else {
939		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
940		gfx_nop = GFX_COMPUTE_NOP;
941	}
942
943	r = amdgpu_cs_create_semaphore(&sem);
944	CU_ASSERT_EQUAL(r, 0);
945	for (i = 0; i < 2; i++) {
946		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
947		CU_ASSERT_EQUAL(r, 0);
948
949		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
950					    AMDGPU_GEM_DOMAIN_GTT, 0,
951					    &ib_result_handle[i], &ib_result_cpu[i],
952					    &ib_result_mc_address[i], &va_handle[i]);
953		CU_ASSERT_EQUAL(r, 0);
954
955		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
956				       NULL, &bo_list[i]);
957		CU_ASSERT_EQUAL(r, 0);
958	}
959
960	/* 1. same context different engine */
961	ptr = ib_result_cpu[0];
962	ptr[0] = sdma_nop;
963	ib_info[0].ib_mc_address = ib_result_mc_address[0];
964	ib_info[0].size = 1;
965
966	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
967	ibs_request[0].number_of_ibs = 1;
968	ibs_request[0].ibs = &ib_info[0];
969	ibs_request[0].resources = bo_list[0];
970	ibs_request[0].fence_info.handle = NULL;
971	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
972	CU_ASSERT_EQUAL(r, 0);
973	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
974	CU_ASSERT_EQUAL(r, 0);
975
976	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
977	CU_ASSERT_EQUAL(r, 0);
978	ptr = ib_result_cpu[1];
979	ptr[0] = gfx_nop;
980	ib_info[1].ib_mc_address = ib_result_mc_address[1];
981	ib_info[1].size = 1;
982
983	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
984	ibs_request[1].number_of_ibs = 1;
985	ibs_request[1].ibs = &ib_info[1];
986	ibs_request[1].resources = bo_list[1];
987	ibs_request[1].fence_info.handle = NULL;
988
989	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
990	CU_ASSERT_EQUAL(r, 0);
991
992	fence_status.context = context_handle[0];
993	fence_status.ip_type = AMDGPU_HW_IP_GFX;
994	fence_status.ip_instance = 0;
995	fence_status.fence = ibs_request[1].seq_no;
996	r = amdgpu_cs_query_fence_status(&fence_status,
997					 500000000, 0, &expired);
998	CU_ASSERT_EQUAL(r, 0);
999	CU_ASSERT_EQUAL(expired, true);
1000
1001	/* 2. same engine different context */
1002	ptr = ib_result_cpu[0];
1003	ptr[0] = gfx_nop;
1004	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1005	ib_info[0].size = 1;
1006
1007	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
1008	ibs_request[0].number_of_ibs = 1;
1009	ibs_request[0].ibs = &ib_info[0];
1010	ibs_request[0].resources = bo_list[0];
1011	ibs_request[0].fence_info.handle = NULL;
1012	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1013	CU_ASSERT_EQUAL(r, 0);
1014	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1015	CU_ASSERT_EQUAL(r, 0);
1016
1017	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
1018	CU_ASSERT_EQUAL(r, 0);
1019	ptr = ib_result_cpu[1];
1020	ptr[0] = gfx_nop;
1021	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1022	ib_info[1].size = 1;
1023
1024	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1025	ibs_request[1].number_of_ibs = 1;
1026	ibs_request[1].ibs = &ib_info[1];
1027	ibs_request[1].resources = bo_list[1];
1028	ibs_request[1].fence_info.handle = NULL;
1029	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1030
1031	CU_ASSERT_EQUAL(r, 0);
1032
1033	fence_status.context = context_handle[1];
1034	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1035	fence_status.ip_instance = 0;
1036	fence_status.fence = ibs_request[1].seq_no;
1037	r = amdgpu_cs_query_fence_status(&fence_status,
1038					 500000000, 0, &expired);
1039	CU_ASSERT_EQUAL(r, 0);
1040	CU_ASSERT_EQUAL(expired, true);
1041
1042	for (i = 0; i < 2; i++) {
1043		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1044					     ib_result_mc_address[i], 4096);
1045		CU_ASSERT_EQUAL(r, 0);
1046
1047		r = amdgpu_bo_list_destroy(bo_list[i]);
1048		CU_ASSERT_EQUAL(r, 0);
1049
1050		r = amdgpu_cs_ctx_free(context_handle[i]);
1051		CU_ASSERT_EQUAL(r, 0);
1052	}
1053
1054	r = amdgpu_cs_destroy_semaphore(sem);
1055	CU_ASSERT_EQUAL(r, 0);
1056}
1057
1058static void amdgpu_command_submission_compute_nop(void)
1059{
1060	amdgpu_context_handle context_handle;
1061	amdgpu_bo_handle ib_result_handle;
1062	void *ib_result_cpu;
1063	uint64_t ib_result_mc_address;
1064	struct amdgpu_cs_request ibs_request;
1065	struct amdgpu_cs_ib_info ib_info;
1066	struct amdgpu_cs_fence fence_status;
1067	uint32_t *ptr;
1068	uint32_t expired;
1069	int r, instance;
1070	amdgpu_bo_list_handle bo_list;
1071	amdgpu_va_handle va_handle;
1072	struct drm_amdgpu_info_hw_ip info;
1073
1074	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1075	CU_ASSERT_EQUAL(r, 0);
1076
1077	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1078	CU_ASSERT_EQUAL(r, 0);
1079
1080	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1081		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1082					    AMDGPU_GEM_DOMAIN_GTT, 0,
1083					    &ib_result_handle, &ib_result_cpu,
1084					    &ib_result_mc_address, &va_handle);
1085		CU_ASSERT_EQUAL(r, 0);
1086
1087		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1088				       &bo_list);
1089		CU_ASSERT_EQUAL(r, 0);
1090
1091		ptr = ib_result_cpu;
1092		memset(ptr, 0, 16);
1093		ptr[0]=PACKET3(PACKET3_NOP, 14);
1094
1095		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1096		ib_info.ib_mc_address = ib_result_mc_address;
1097		ib_info.size = 16;
1098
1099		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1100		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1101		ibs_request.ring = instance;
1102		ibs_request.number_of_ibs = 1;
1103		ibs_request.ibs = &ib_info;
1104		ibs_request.resources = bo_list;
1105		ibs_request.fence_info.handle = NULL;
1106
1107		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1108		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1109		CU_ASSERT_EQUAL(r, 0);
1110
1111		fence_status.context = context_handle;
1112		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1113		fence_status.ip_instance = 0;
1114		fence_status.ring = instance;
1115		fence_status.fence = ibs_request.seq_no;
1116
1117		r = amdgpu_cs_query_fence_status(&fence_status,
1118						 AMDGPU_TIMEOUT_INFINITE,
1119						 0, &expired);
1120		CU_ASSERT_EQUAL(r, 0);
1121
1122		r = amdgpu_bo_list_destroy(bo_list);
1123		CU_ASSERT_EQUAL(r, 0);
1124
1125		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1126					     ib_result_mc_address, 4096);
1127		CU_ASSERT_EQUAL(r, 0);
1128	}
1129
1130	r = amdgpu_cs_ctx_free(context_handle);
1131	CU_ASSERT_EQUAL(r, 0);
1132}
1133
1134static void amdgpu_command_submission_compute_cp_write_data(void)
1135{
1136	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1137}
1138
1139static void amdgpu_command_submission_compute_cp_const_fill(void)
1140{
1141	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1142}
1143
1144static void amdgpu_command_submission_compute_cp_copy_data(void)
1145{
1146	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1147}
1148
1149static void amdgpu_command_submission_compute(void)
1150{
1151	/* write data using the CP */
1152	amdgpu_command_submission_compute_cp_write_data();
1153	/* const fill using the CP */
1154	amdgpu_command_submission_compute_cp_const_fill();
1155	/* copy data using the CP */
1156	amdgpu_command_submission_compute_cp_copy_data();
1157	/* nop test */
1158	amdgpu_command_submission_compute_nop();
1159}
1160
1161/*
1162 * caller need create/release:
1163 * pm4_src, resources, ib_info, and ibs_request
1164 * submit command stream described in ibs_request and wait for this IB accomplished
1165 */
1166static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1167				       unsigned ip_type,
1168				       int instance, int pm4_dw, uint32_t *pm4_src,
1169				       int res_cnt, amdgpu_bo_handle *resources,
1170				       struct amdgpu_cs_ib_info *ib_info,
1171				       struct amdgpu_cs_request *ibs_request)
1172{
1173	int r;
1174	uint32_t expired;
1175	uint32_t *ring_ptr;
1176	amdgpu_bo_handle ib_result_handle;
1177	void *ib_result_cpu;
1178	uint64_t ib_result_mc_address;
1179	struct amdgpu_cs_fence fence_status = {0};
1180	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1181	amdgpu_va_handle va_handle;
1182
1183	/* prepare CS */
1184	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1185	CU_ASSERT_NOT_EQUAL(resources, NULL);
1186	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1187	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1188	CU_ASSERT_TRUE(pm4_dw <= 1024);
1189
1190	/* allocate IB */
1191	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1192				    AMDGPU_GEM_DOMAIN_GTT, 0,
1193				    &ib_result_handle, &ib_result_cpu,
1194				    &ib_result_mc_address, &va_handle);
1195	CU_ASSERT_EQUAL(r, 0);
1196
1197	/* copy PM4 packet to ring from caller */
1198	ring_ptr = ib_result_cpu;
1199	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1200
1201	ib_info->ib_mc_address = ib_result_mc_address;
1202	ib_info->size = pm4_dw;
1203
1204	ibs_request->ip_type = ip_type;
1205	ibs_request->ring = instance;
1206	ibs_request->number_of_ibs = 1;
1207	ibs_request->ibs = ib_info;
1208	ibs_request->fence_info.handle = NULL;
1209
1210	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1211	all_res[res_cnt] = ib_result_handle;
1212
1213	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1214				  NULL, &ibs_request->resources);
1215	CU_ASSERT_EQUAL(r, 0);
1216
1217	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1218
1219	/* submit CS */
1220	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1221	CU_ASSERT_EQUAL(r, 0);
1222
1223	r = amdgpu_bo_list_destroy(ibs_request->resources);
1224	CU_ASSERT_EQUAL(r, 0);
1225
1226	fence_status.ip_type = ip_type;
1227	fence_status.ip_instance = 0;
1228	fence_status.ring = ibs_request->ring;
1229	fence_status.context = context_handle;
1230	fence_status.fence = ibs_request->seq_no;
1231
1232	/* wait for IB accomplished */
1233	r = amdgpu_cs_query_fence_status(&fence_status,
1234					 AMDGPU_TIMEOUT_INFINITE,
1235					 0, &expired);
1236	CU_ASSERT_EQUAL(r, 0);
1237	CU_ASSERT_EQUAL(expired, true);
1238
1239	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1240				     ib_result_mc_address, 4096);
1241	CU_ASSERT_EQUAL(r, 0);
1242}
1243
1244static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1245{
1246	const int sdma_write_length = 128;
1247	const int pm4_dw = 256;
1248	amdgpu_context_handle context_handle;
1249	amdgpu_bo_handle bo;
1250	amdgpu_bo_handle *resources;
1251	uint32_t *pm4;
1252	struct amdgpu_cs_ib_info *ib_info;
1253	struct amdgpu_cs_request *ibs_request;
1254	uint64_t bo_mc;
1255	volatile uint32_t *bo_cpu;
1256	int i, j, r, loop, ring_id;
1257	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1258	amdgpu_va_handle va_handle;
1259	struct drm_amdgpu_info_hw_ip hw_ip_info;
1260
1261	pm4 = calloc(pm4_dw, sizeof(*pm4));
1262	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1263
1264	ib_info = calloc(1, sizeof(*ib_info));
1265	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1266
1267	ibs_request = calloc(1, sizeof(*ibs_request));
1268	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1269
1270	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1271	CU_ASSERT_EQUAL(r, 0);
1272
1273	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1274	CU_ASSERT_EQUAL(r, 0);
1275
1276	/* prepare resource */
1277	resources = calloc(1, sizeof(amdgpu_bo_handle));
1278	CU_ASSERT_NOT_EQUAL(resources, NULL);
1279
1280	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1281		loop = 0;
1282		while(loop < 2) {
1283			/* allocate UC bo for sDMA use */
1284			r = amdgpu_bo_alloc_and_map(device_handle,
1285						    sdma_write_length * sizeof(uint32_t),
1286						    4096, AMDGPU_GEM_DOMAIN_GTT,
1287						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1288						    &bo_mc, &va_handle);
1289			CU_ASSERT_EQUAL(r, 0);
1290
1291			/* clear bo */
1292			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1293
1294			resources[0] = bo;
1295
1296			/* fulfill PM4: test DMA write-linear */
1297			i = j = 0;
1298			if (ip_type == AMDGPU_HW_IP_DMA) {
1299				if (family_id == AMDGPU_FAMILY_SI)
1300					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1301								  sdma_write_length);
1302				else
1303					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1304							       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1305				pm4[i++] = 0xffffffff & bo_mc;
1306				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1307				if (family_id >= AMDGPU_FAMILY_AI)
1308					pm4[i++] = sdma_write_length - 1;
1309				else if (family_id != AMDGPU_FAMILY_SI)
1310					pm4[i++] = sdma_write_length;
1311				while(j++ < sdma_write_length)
1312					pm4[i++] = 0xdeadbeaf;
1313			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1314				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1315				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1316				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1317				pm4[i++] = 0xfffffffc & bo_mc;
1318				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1319				while(j++ < sdma_write_length)
1320					pm4[i++] = 0xdeadbeaf;
1321			}
1322
1323			amdgpu_test_exec_cs_helper(context_handle,
1324						   ip_type, ring_id,
1325						   i, pm4,
1326						   1, resources,
1327						   ib_info, ibs_request);
1328
1329			/* verify if SDMA test result meets with expected */
1330			i = 0;
1331			while(i < sdma_write_length) {
1332				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1333			}
1334
1335			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1336						     sdma_write_length * sizeof(uint32_t));
1337			CU_ASSERT_EQUAL(r, 0);
1338			loop++;
1339		}
1340	}
1341	/* clean resources */
1342	free(resources);
1343	free(ibs_request);
1344	free(ib_info);
1345	free(pm4);
1346
1347	/* end of test */
1348	r = amdgpu_cs_ctx_free(context_handle);
1349	CU_ASSERT_EQUAL(r, 0);
1350}
1351
1352static void amdgpu_command_submission_sdma_write_linear(void)
1353{
1354	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1355}
1356
1357static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1358{
1359	const int sdma_write_length = 1024 * 1024;
1360	const int pm4_dw = 256;
1361	amdgpu_context_handle context_handle;
1362	amdgpu_bo_handle bo;
1363	amdgpu_bo_handle *resources;
1364	uint32_t *pm4;
1365	struct amdgpu_cs_ib_info *ib_info;
1366	struct amdgpu_cs_request *ibs_request;
1367	uint64_t bo_mc;
1368	volatile uint32_t *bo_cpu;
1369	int i, j, r, loop, ring_id;
1370	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1371	amdgpu_va_handle va_handle;
1372	struct drm_amdgpu_info_hw_ip hw_ip_info;
1373
1374	pm4 = calloc(pm4_dw, sizeof(*pm4));
1375	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1376
1377	ib_info = calloc(1, sizeof(*ib_info));
1378	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1379
1380	ibs_request = calloc(1, sizeof(*ibs_request));
1381	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1382
1383	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1384	CU_ASSERT_EQUAL(r, 0);
1385
1386	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1387	CU_ASSERT_EQUAL(r, 0);
1388
1389	/* prepare resource */
1390	resources = calloc(1, sizeof(amdgpu_bo_handle));
1391	CU_ASSERT_NOT_EQUAL(resources, NULL);
1392
1393	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1394		loop = 0;
1395		while(loop < 2) {
1396			/* allocate UC bo for sDMA use */
1397			r = amdgpu_bo_alloc_and_map(device_handle,
1398						    sdma_write_length, 4096,
1399						    AMDGPU_GEM_DOMAIN_GTT,
1400						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1401						    &bo_mc, &va_handle);
1402			CU_ASSERT_EQUAL(r, 0);
1403
1404			/* clear bo */
1405			memset((void*)bo_cpu, 0, sdma_write_length);
1406
1407			resources[0] = bo;
1408
1409			/* fulfill PM4: test DMA const fill */
1410			i = j = 0;
1411			if (ip_type == AMDGPU_HW_IP_DMA) {
1412				if (family_id == AMDGPU_FAMILY_SI) {
1413					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1414								  0, 0, 0,
1415								  sdma_write_length / 4);
1416					pm4[i++] = 0xfffffffc & bo_mc;
1417					pm4[i++] = 0xdeadbeaf;
1418					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1419				} else {
1420					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1421							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1422					pm4[i++] = 0xffffffff & bo_mc;
1423					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1424					pm4[i++] = 0xdeadbeaf;
1425					if (family_id >= AMDGPU_FAMILY_AI)
1426						pm4[i++] = sdma_write_length - 1;
1427					else
1428						pm4[i++] = sdma_write_length;
1429				}
1430			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1431				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1432				if (family_id == AMDGPU_FAMILY_SI) {
1433					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1434					pm4[i++] = 0xdeadbeaf;
1435					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1436						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1437						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1438						   PACKET3_DMA_DATA_SI_CP_SYNC;
1439					pm4[i++] = 0xffffffff & bo_mc;
1440					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1441					pm4[i++] = sdma_write_length;
1442				} else {
1443					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1444					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1445						   PACKET3_DMA_DATA_DST_SEL(0) |
1446						   PACKET3_DMA_DATA_SRC_SEL(2) |
1447						   PACKET3_DMA_DATA_CP_SYNC;
1448					pm4[i++] = 0xdeadbeaf;
1449					pm4[i++] = 0;
1450					pm4[i++] = 0xfffffffc & bo_mc;
1451					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1452					pm4[i++] = sdma_write_length;
1453				}
1454			}
1455
1456			amdgpu_test_exec_cs_helper(context_handle,
1457						   ip_type, ring_id,
1458						   i, pm4,
1459						   1, resources,
1460						   ib_info, ibs_request);
1461
1462			/* verify if SDMA test result meets with expected */
1463			i = 0;
1464			while(i < (sdma_write_length / 4)) {
1465				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1466			}
1467
1468			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1469						     sdma_write_length);
1470			CU_ASSERT_EQUAL(r, 0);
1471			loop++;
1472		}
1473	}
1474	/* clean resources */
1475	free(resources);
1476	free(ibs_request);
1477	free(ib_info);
1478	free(pm4);
1479
1480	/* end of test */
1481	r = amdgpu_cs_ctx_free(context_handle);
1482	CU_ASSERT_EQUAL(r, 0);
1483}
1484
1485static void amdgpu_command_submission_sdma_const_fill(void)
1486{
1487	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1488}
1489
1490static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1491{
1492	const int sdma_write_length = 1024;
1493	const int pm4_dw = 256;
1494	amdgpu_context_handle context_handle;
1495	amdgpu_bo_handle bo1, bo2;
1496	amdgpu_bo_handle *resources;
1497	uint32_t *pm4;
1498	struct amdgpu_cs_ib_info *ib_info;
1499	struct amdgpu_cs_request *ibs_request;
1500	uint64_t bo1_mc, bo2_mc;
1501	volatile unsigned char *bo1_cpu, *bo2_cpu;
1502	int i, j, r, loop1, loop2, ring_id;
1503	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1504	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1505	struct drm_amdgpu_info_hw_ip hw_ip_info;
1506
1507	pm4 = calloc(pm4_dw, sizeof(*pm4));
1508	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1509
1510	ib_info = calloc(1, sizeof(*ib_info));
1511	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1512
1513	ibs_request = calloc(1, sizeof(*ibs_request));
1514	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1515
1516	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1517	CU_ASSERT_EQUAL(r, 0);
1518
1519	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1520	CU_ASSERT_EQUAL(r, 0);
1521
1522	/* prepare resource */
1523	resources = calloc(2, sizeof(amdgpu_bo_handle));
1524	CU_ASSERT_NOT_EQUAL(resources, NULL);
1525
1526	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1527		loop1 = loop2 = 0;
1528		/* run 9 circle to test all mapping combination */
1529		while(loop1 < 2) {
1530			while(loop2 < 2) {
1531				/* allocate UC bo1for sDMA use */
1532				r = amdgpu_bo_alloc_and_map(device_handle,
1533							    sdma_write_length, 4096,
1534							    AMDGPU_GEM_DOMAIN_GTT,
1535							    gtt_flags[loop1], &bo1,
1536							    (void**)&bo1_cpu, &bo1_mc,
1537							    &bo1_va_handle);
1538				CU_ASSERT_EQUAL(r, 0);
1539
1540				/* set bo1 */
1541				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1542
1543				/* allocate UC bo2 for sDMA use */
1544				r = amdgpu_bo_alloc_and_map(device_handle,
1545							    sdma_write_length, 4096,
1546							    AMDGPU_GEM_DOMAIN_GTT,
1547							    gtt_flags[loop2], &bo2,
1548							    (void**)&bo2_cpu, &bo2_mc,
1549							    &bo2_va_handle);
1550				CU_ASSERT_EQUAL(r, 0);
1551
1552				/* clear bo2 */
1553				memset((void*)bo2_cpu, 0, sdma_write_length);
1554
1555				resources[0] = bo1;
1556				resources[1] = bo2;
1557
1558				/* fulfill PM4: test DMA copy linear */
1559				i = j = 0;
1560				if (ip_type == AMDGPU_HW_IP_DMA) {
1561					if (family_id == AMDGPU_FAMILY_SI) {
1562						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1563									  0, 0, 0,
1564									  sdma_write_length);
1565						pm4[i++] = 0xffffffff & bo2_mc;
1566						pm4[i++] = 0xffffffff & bo1_mc;
1567						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1568						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1569					} else {
1570						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1571								       SDMA_COPY_SUB_OPCODE_LINEAR,
1572								       0);
1573						if (family_id >= AMDGPU_FAMILY_AI)
1574							pm4[i++] = sdma_write_length - 1;
1575						else
1576							pm4[i++] = sdma_write_length;
1577						pm4[i++] = 0;
1578						pm4[i++] = 0xffffffff & bo1_mc;
1579						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1580						pm4[i++] = 0xffffffff & bo2_mc;
1581						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1582					}
1583				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1584					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1585					if (family_id == AMDGPU_FAMILY_SI) {
1586						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1587						pm4[i++] = 0xfffffffc & bo1_mc;
1588						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1589							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1590							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1591							   PACKET3_DMA_DATA_SI_CP_SYNC |
1592							   (0xffff00000000 & bo1_mc) >> 32;
1593						pm4[i++] = 0xfffffffc & bo2_mc;
1594						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1595						pm4[i++] = sdma_write_length;
1596					} else {
1597						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1598						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1599							   PACKET3_DMA_DATA_DST_SEL(0) |
1600							   PACKET3_DMA_DATA_SRC_SEL(0) |
1601							   PACKET3_DMA_DATA_CP_SYNC;
1602						pm4[i++] = 0xfffffffc & bo1_mc;
1603						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1604						pm4[i++] = 0xfffffffc & bo2_mc;
1605						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1606						pm4[i++] = sdma_write_length;
1607					}
1608				}
1609
1610				amdgpu_test_exec_cs_helper(context_handle,
1611							   ip_type, ring_id,
1612							   i, pm4,
1613							   2, resources,
1614							   ib_info, ibs_request);
1615
1616				/* verify if SDMA test result meets with expected */
1617				i = 0;
1618				while(i < sdma_write_length) {
1619					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1620				}
1621				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1622							     sdma_write_length);
1623				CU_ASSERT_EQUAL(r, 0);
1624				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1625							     sdma_write_length);
1626				CU_ASSERT_EQUAL(r, 0);
1627				loop2++;
1628			}
1629			loop1++;
1630		}
1631	}
1632	/* clean resources */
1633	free(resources);
1634	free(ibs_request);
1635	free(ib_info);
1636	free(pm4);
1637
1638	/* end of test */
1639	r = amdgpu_cs_ctx_free(context_handle);
1640	CU_ASSERT_EQUAL(r, 0);
1641}
1642
1643static void amdgpu_command_submission_sdma_copy_linear(void)
1644{
1645	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1646}
1647
1648static void amdgpu_command_submission_sdma(void)
1649{
1650	amdgpu_command_submission_sdma_write_linear();
1651	amdgpu_command_submission_sdma_const_fill();
1652	amdgpu_command_submission_sdma_copy_linear();
1653}
1654
1655static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1656{
1657	amdgpu_context_handle context_handle;
1658	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1659	void *ib_result_cpu, *ib_result_ce_cpu;
1660	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1661	struct amdgpu_cs_request ibs_request[2] = {0};
1662	struct amdgpu_cs_ib_info ib_info[2];
1663	struct amdgpu_cs_fence fence_status[2] = {0};
1664	uint32_t *ptr;
1665	uint32_t expired;
1666	amdgpu_bo_list_handle bo_list;
1667	amdgpu_va_handle va_handle, va_handle_ce;
1668	int r;
1669	int i = 0, ib_cs_num = 2;
1670
1671	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1672	CU_ASSERT_EQUAL(r, 0);
1673
1674	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1675				    AMDGPU_GEM_DOMAIN_GTT, 0,
1676				    &ib_result_handle, &ib_result_cpu,
1677				    &ib_result_mc_address, &va_handle);
1678	CU_ASSERT_EQUAL(r, 0);
1679
1680	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1681				    AMDGPU_GEM_DOMAIN_GTT, 0,
1682				    &ib_result_ce_handle, &ib_result_ce_cpu,
1683				    &ib_result_ce_mc_address, &va_handle_ce);
1684	CU_ASSERT_EQUAL(r, 0);
1685
1686	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1687			       ib_result_ce_handle, &bo_list);
1688	CU_ASSERT_EQUAL(r, 0);
1689
1690	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1691
1692	/* IT_SET_CE_DE_COUNTERS */
1693	ptr = ib_result_ce_cpu;
1694	if (family_id != AMDGPU_FAMILY_SI) {
1695		ptr[i++] = 0xc0008900;
1696		ptr[i++] = 0;
1697	}
1698	ptr[i++] = 0xc0008400;
1699	ptr[i++] = 1;
1700	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1701	ib_info[0].size = i;
1702	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1703
1704	/* IT_WAIT_ON_CE_COUNTER */
1705	ptr = ib_result_cpu;
1706	ptr[0] = 0xc0008600;
1707	ptr[1] = 0x00000001;
1708	ib_info[1].ib_mc_address = ib_result_mc_address;
1709	ib_info[1].size = 2;
1710
1711	for (i = 0; i < ib_cs_num; i++) {
1712		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1713		ibs_request[i].number_of_ibs = 2;
1714		ibs_request[i].ibs = ib_info;
1715		ibs_request[i].resources = bo_list;
1716		ibs_request[i].fence_info.handle = NULL;
1717	}
1718
1719	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1720
1721	CU_ASSERT_EQUAL(r, 0);
1722
1723	for (i = 0; i < ib_cs_num; i++) {
1724		fence_status[i].context = context_handle;
1725		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1726		fence_status[i].fence = ibs_request[i].seq_no;
1727	}
1728
1729	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1730				AMDGPU_TIMEOUT_INFINITE,
1731				&expired, NULL);
1732	CU_ASSERT_EQUAL(r, 0);
1733
1734	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1735				     ib_result_mc_address, 4096);
1736	CU_ASSERT_EQUAL(r, 0);
1737
1738	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1739				     ib_result_ce_mc_address, 4096);
1740	CU_ASSERT_EQUAL(r, 0);
1741
1742	r = amdgpu_bo_list_destroy(bo_list);
1743	CU_ASSERT_EQUAL(r, 0);
1744
1745	r = amdgpu_cs_ctx_free(context_handle);
1746	CU_ASSERT_EQUAL(r, 0);
1747}
1748
1749static void amdgpu_command_submission_multi_fence(void)
1750{
1751	amdgpu_command_submission_multi_fence_wait_all(true);
1752	amdgpu_command_submission_multi_fence_wait_all(false);
1753}
1754
1755static void amdgpu_userptr_test(void)
1756{
1757	int i, r, j;
1758	uint32_t *pm4 = NULL;
1759	uint64_t bo_mc;
1760	void *ptr = NULL;
1761	int pm4_dw = 256;
1762	int sdma_write_length = 4;
1763	amdgpu_bo_handle handle;
1764	amdgpu_context_handle context_handle;
1765	struct amdgpu_cs_ib_info *ib_info;
1766	struct amdgpu_cs_request *ibs_request;
1767	amdgpu_bo_handle buf_handle;
1768	amdgpu_va_handle va_handle;
1769
1770	pm4 = calloc(pm4_dw, sizeof(*pm4));
1771	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1772
1773	ib_info = calloc(1, sizeof(*ib_info));
1774	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1775
1776	ibs_request = calloc(1, sizeof(*ibs_request));
1777	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1778
1779	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1780	CU_ASSERT_EQUAL(r, 0);
1781
1782	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
1783	CU_ASSERT_NOT_EQUAL(ptr, NULL);
1784	memset(ptr, 0, BUFFER_SIZE);
1785
1786	r = amdgpu_create_bo_from_user_mem(device_handle,
1787					   ptr, BUFFER_SIZE, &buf_handle);
1788	CU_ASSERT_EQUAL(r, 0);
1789
1790	r = amdgpu_va_range_alloc(device_handle,
1791				  amdgpu_gpu_va_range_general,
1792				  BUFFER_SIZE, 1, 0, &bo_mc,
1793				  &va_handle, 0);
1794	CU_ASSERT_EQUAL(r, 0);
1795
1796	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
1797	CU_ASSERT_EQUAL(r, 0);
1798
1799	handle = buf_handle;
1800
1801	j = i = 0;
1802
1803	if (family_id == AMDGPU_FAMILY_SI)
1804		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1805				sdma_write_length);
1806	else
1807		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1808				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1809	pm4[i++] = 0xffffffff & bo_mc;
1810	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1811	if (family_id >= AMDGPU_FAMILY_AI)
1812		pm4[i++] = sdma_write_length - 1;
1813	else if (family_id != AMDGPU_FAMILY_SI)
1814		pm4[i++] = sdma_write_length;
1815
1816	while (j++ < sdma_write_length)
1817		pm4[i++] = 0xdeadbeaf;
1818
1819	if (!fork()) {
1820		pm4[0] = 0x0;
1821		exit(0);
1822	}
1823
1824	amdgpu_test_exec_cs_helper(context_handle,
1825				   AMDGPU_HW_IP_DMA, 0,
1826				   i, pm4,
1827				   1, &handle,
1828				   ib_info, ibs_request);
1829	i = 0;
1830	while (i < sdma_write_length) {
1831		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
1832	}
1833	free(ibs_request);
1834	free(ib_info);
1835	free(pm4);
1836
1837	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
1838	CU_ASSERT_EQUAL(r, 0);
1839	r = amdgpu_va_range_free(va_handle);
1840	CU_ASSERT_EQUAL(r, 0);
1841	r = amdgpu_bo_free(buf_handle);
1842	CU_ASSERT_EQUAL(r, 0);
1843	free(ptr);
1844
1845	r = amdgpu_cs_ctx_free(context_handle);
1846	CU_ASSERT_EQUAL(r, 0);
1847
1848	wait(NULL);
1849}
1850
1851static void amdgpu_sync_dependency_test(void)
1852{
1853	amdgpu_context_handle context_handle[2];
1854	amdgpu_bo_handle ib_result_handle;
1855	void *ib_result_cpu;
1856	uint64_t ib_result_mc_address;
1857	struct amdgpu_cs_request ibs_request;
1858	struct amdgpu_cs_ib_info ib_info;
1859	struct amdgpu_cs_fence fence_status;
1860	uint32_t expired;
1861	int i, j, r;
1862	amdgpu_bo_list_handle bo_list;
1863	amdgpu_va_handle va_handle;
1864	static uint32_t *ptr;
1865	uint64_t seq_no;
1866
1867	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
1868	CU_ASSERT_EQUAL(r, 0);
1869	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
1870	CU_ASSERT_EQUAL(r, 0);
1871
1872	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
1873			AMDGPU_GEM_DOMAIN_GTT, 0,
1874						    &ib_result_handle, &ib_result_cpu,
1875						    &ib_result_mc_address, &va_handle);
1876	CU_ASSERT_EQUAL(r, 0);
1877
1878	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1879			       &bo_list);
1880	CU_ASSERT_EQUAL(r, 0);
1881
1882	ptr = ib_result_cpu;
1883	i = 0;
1884
1885	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
1886
1887	/* Dispatch minimal init config and verify it's executed */
1888	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
1889	ptr[i++] = 0x80000000;
1890	ptr[i++] = 0x80000000;
1891
1892	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
1893	ptr[i++] = 0x80000000;
1894
1895
1896	/* Program compute regs */
1897	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1898	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1899	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
1900	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
1901
1902
1903	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1904	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
1905	/*
1906	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
1907	                                      SGPRS = 1
1908	                                      PRIORITY = 0
1909	                                      FLOAT_MODE = 192 (0xc0)
1910	                                      PRIV = 0
1911	                                      DX10_CLAMP = 1
1912	                                      DEBUG_MODE = 0
1913	                                      IEEE_MODE = 0
1914	                                      BULKY = 0
1915	                                      CDBG_USER = 0
1916	 *
1917	 */
1918	ptr[i++] = 0x002c0040;
1919
1920
1921	/*
1922	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
1923	                                      USER_SGPR = 8
1924	                                      TRAP_PRESENT = 0
1925	                                      TGID_X_EN = 0
1926	                                      TGID_Y_EN = 0
1927	                                      TGID_Z_EN = 0
1928	                                      TG_SIZE_EN = 0
1929	                                      TIDIG_COMP_CNT = 0
1930	                                      EXCP_EN_MSB = 0
1931	                                      LDS_SIZE = 0
1932	                                      EXCP_EN = 0
1933	 *
1934	 */
1935	ptr[i++] = 0x00000010;
1936
1937
1938/*
1939 * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
1940                                         WAVESIZE = 0
1941 *
1942 */
1943	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1944	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
1945	ptr[i++] = 0x00000100;
1946
1947	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1948	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
1949	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
1950	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
1951
1952	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1953	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
1954	ptr[i++] = 0;
1955
1956	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
1957	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
1958	ptr[i++] = 1;
1959	ptr[i++] = 1;
1960	ptr[i++] = 1;
1961
1962
1963	/* Dispatch */
1964	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1965	ptr[i++] = 1;
1966	ptr[i++] = 1;
1967	ptr[i++] = 1;
1968	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
1969
1970
1971	while (i & 7)
1972		ptr[i++] =  0xffff1000; /* type3 nop packet */
1973
1974	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1975	ib_info.ib_mc_address = ib_result_mc_address;
1976	ib_info.size = i;
1977
1978	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1979	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1980	ibs_request.ring = 0;
1981	ibs_request.number_of_ibs = 1;
1982	ibs_request.ibs = &ib_info;
1983	ibs_request.resources = bo_list;
1984	ibs_request.fence_info.handle = NULL;
1985
1986	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
1987	CU_ASSERT_EQUAL(r, 0);
1988	seq_no = ibs_request.seq_no;
1989
1990
1991
1992	/* Prepare second command with dependency on the first */
1993	j = i;
1994	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
1995	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1996	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
1997	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
1998	ptr[i++] = 99;
1999
2000	while (i & 7)
2001		ptr[i++] =  0xffff1000; /* type3 nop packet */
2002
2003	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2004	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2005	ib_info.size = i - j;
2006
2007	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2008	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2009	ibs_request.ring = 0;
2010	ibs_request.number_of_ibs = 1;
2011	ibs_request.ibs = &ib_info;
2012	ibs_request.resources = bo_list;
2013	ibs_request.fence_info.handle = NULL;
2014
2015	ibs_request.number_of_dependencies = 1;
2016
2017	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2018	ibs_request.dependencies[0].context = context_handle[1];
2019	ibs_request.dependencies[0].ip_instance = 0;
2020	ibs_request.dependencies[0].ring = 0;
2021	ibs_request.dependencies[0].fence = seq_no;
2022
2023
2024	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2025	CU_ASSERT_EQUAL(r, 0);
2026
2027
2028	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2029	fence_status.context = context_handle[0];
2030	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2031	fence_status.ip_instance = 0;
2032	fence_status.ring = 0;
2033	fence_status.fence = ibs_request.seq_no;
2034
2035	r = amdgpu_cs_query_fence_status(&fence_status,
2036		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2037	CU_ASSERT_EQUAL(r, 0);
2038
2039	/* Expect the second command to wait for shader to complete */
2040	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2041
2042	r = amdgpu_bo_list_destroy(bo_list);
2043	CU_ASSERT_EQUAL(r, 0);
2044
2045	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2046				     ib_result_mc_address, 4096);
2047	CU_ASSERT_EQUAL(r, 0);
2048
2049	r = amdgpu_cs_ctx_free(context_handle[0]);
2050	CU_ASSERT_EQUAL(r, 0);
2051	r = amdgpu_cs_ctx_free(context_handle[1]);
2052	CU_ASSERT_EQUAL(r, 0);
2053
2054	free(ibs_request.dependencies);
2055}
2056
2057static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2058					   int cs_type)
2059{
2060	uint32_t shader_size;
2061	const uint32_t *shader;
2062
2063	switch (cs_type) {
2064		case CS_BUFFERCLEAR:
2065			shader = bufferclear_cs_shader_gfx9;
2066			shader_size = sizeof(bufferclear_cs_shader_gfx9);
2067			break;
2068		case CS_BUFFERCOPY:
2069			shader = buffercopy_cs_shader_gfx9;
2070			shader_size = sizeof(buffercopy_cs_shader_gfx9);
2071			break;
2072		default:
2073			return -1;
2074			break;
2075	}
2076
2077	memcpy(ptr, shader, shader_size);
2078	return 0;
2079}
2080
2081static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2082{
2083	int i = 0;
2084
2085	/* Write context control and load shadowing register if necessary */
2086	if (ip_type == AMDGPU_HW_IP_GFX) {
2087		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2088		ptr[i++] = 0x80000000;
2089		ptr[i++] = 0x80000000;
2090	}
2091
2092	/* Issue commands to set default compute state. */
2093	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2094	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2095	ptr[i++] = 0x204;
2096	i += 3;
2097	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2098	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2099	ptr[i++] = 0x215;
2100	ptr[i++] = 0;
2101	/* clear mmCOMPUTE_TMPRING_SIZE */
2102	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2103	ptr[i++] = 0x218;
2104	ptr[i++] = 0;
2105
2106	return i;
2107}
2108
2109static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2110{
2111	int i = 0;
2112
2113	/*  Issue commands to set cu mask used in current dispatch */
2114	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2115	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2116	ptr[i++] = 0x216;
2117	ptr[i++] = 0xffffffff;
2118	ptr[i++] = 0xffffffff;
2119	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2120	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2121	ptr[i++] = 0x219;
2122	ptr[i++] = 0xffffffff;
2123	ptr[i++] = 0xffffffff;
2124
2125	return i;
2126}
2127
2128static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2129{
2130	int i, j;
2131
2132	i = 0;
2133
2134	/* Writes shader state to HW */
2135	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2136	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2137	ptr[i++] = 0x20c;
2138	ptr[i++] = (shader_addr >> 8);
2139	ptr[i++] = (shader_addr >> 40);
2140	/* write sh regs*/
2141	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2142		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2143		/* - Gfx9ShRegBase */
2144		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2145		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2146	}
2147
2148	return i;
2149}
2150
2151static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2152					 uint32_t ip_type,
2153					 uint32_t ring)
2154{
2155	amdgpu_context_handle context_handle;
2156	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2157	volatile unsigned char *ptr_dst;
2158	void *ptr_shader;
2159	uint32_t *ptr_cmd;
2160	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2161	amdgpu_va_handle va_dst, va_shader, va_cmd;
2162	int i, r;
2163	int bo_dst_size = 16384;
2164	int bo_shader_size = 4096;
2165	int bo_cmd_size = 4096;
2166	struct amdgpu_cs_request ibs_request = {0};
2167	struct amdgpu_cs_ib_info ib_info= {0};
2168	amdgpu_bo_list_handle bo_list;
2169	struct amdgpu_cs_fence fence_status = {0};
2170	uint32_t expired;
2171
2172	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2173	CU_ASSERT_EQUAL(r, 0);
2174
2175	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2176					AMDGPU_GEM_DOMAIN_GTT, 0,
2177					&bo_cmd, (void **)&ptr_cmd,
2178					&mc_address_cmd, &va_cmd);
2179	CU_ASSERT_EQUAL(r, 0);
2180	memset(ptr_cmd, 0, bo_cmd_size);
2181
2182	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2183					AMDGPU_GEM_DOMAIN_VRAM, 0,
2184					&bo_shader, &ptr_shader,
2185					&mc_address_shader, &va_shader);
2186	CU_ASSERT_EQUAL(r, 0);
2187
2188	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2189	CU_ASSERT_EQUAL(r, 0);
2190
2191	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2192					AMDGPU_GEM_DOMAIN_VRAM, 0,
2193					&bo_dst, (void **)&ptr_dst,
2194					&mc_address_dst, &va_dst);
2195	CU_ASSERT_EQUAL(r, 0);
2196
2197	i = 0;
2198	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2199
2200	/*  Issue commands to set cu mask used in current dispatch */
2201	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2202
2203	/* Writes shader state to HW */
2204	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2205
2206	/* Write constant data */
2207	/* Writes the UAV constant data to the SGPRs. */
2208	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2209	ptr_cmd[i++] = 0x240;
2210	ptr_cmd[i++] = mc_address_dst;
2211	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2212	ptr_cmd[i++] = 0x400;
2213	ptr_cmd[i++] = 0x74fac;
2214
2215	/* Sets a range of pixel shader constants */
2216	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2217	ptr_cmd[i++] = 0x244;
2218	ptr_cmd[i++] = 0x22222222;
2219	ptr_cmd[i++] = 0x22222222;
2220	ptr_cmd[i++] = 0x22222222;
2221	ptr_cmd[i++] = 0x22222222;
2222
2223	/* dispatch direct command */
2224	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2225	ptr_cmd[i++] = 0x10;
2226	ptr_cmd[i++] = 1;
2227	ptr_cmd[i++] = 1;
2228	ptr_cmd[i++] = 1;
2229
2230	while (i & 7)
2231		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2232
2233	resources[0] = bo_dst;
2234	resources[1] = bo_shader;
2235	resources[2] = bo_cmd;
2236	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2237	CU_ASSERT_EQUAL(r, 0);
2238
2239	ib_info.ib_mc_address = mc_address_cmd;
2240	ib_info.size = i;
2241	ibs_request.ip_type = ip_type;
2242	ibs_request.ring = ring;
2243	ibs_request.resources = bo_list;
2244	ibs_request.number_of_ibs = 1;
2245	ibs_request.ibs = &ib_info;
2246	ibs_request.fence_info.handle = NULL;
2247
2248	/* submit CS */
2249	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2250	CU_ASSERT_EQUAL(r, 0);
2251
2252	r = amdgpu_bo_list_destroy(bo_list);
2253	CU_ASSERT_EQUAL(r, 0);
2254
2255	fence_status.ip_type = ip_type;
2256	fence_status.ip_instance = 0;
2257	fence_status.ring = ring;
2258	fence_status.context = context_handle;
2259	fence_status.fence = ibs_request.seq_no;
2260
2261	/* wait for IB accomplished */
2262	r = amdgpu_cs_query_fence_status(&fence_status,
2263					 AMDGPU_TIMEOUT_INFINITE,
2264					 0, &expired);
2265	CU_ASSERT_EQUAL(r, 0);
2266	CU_ASSERT_EQUAL(expired, true);
2267
2268	/* verify if memset test result meets with expected */
2269	i = 0;
2270	while(i < bo_dst_size) {
2271		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2272	}
2273
2274	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2275	CU_ASSERT_EQUAL(r, 0);
2276
2277	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2278	CU_ASSERT_EQUAL(r, 0);
2279
2280	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2281	CU_ASSERT_EQUAL(r, 0);
2282
2283	r = amdgpu_cs_ctx_free(context_handle);
2284	CU_ASSERT_EQUAL(r, 0);
2285}
2286
2287static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2288					uint32_t ip_type,
2289					uint32_t ring)
2290{
2291	amdgpu_context_handle context_handle;
2292	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2293	volatile unsigned char *ptr_dst;
2294	void *ptr_shader;
2295	unsigned char *ptr_src;
2296	uint32_t *ptr_cmd;
2297	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2298	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2299	int i, r;
2300	int bo_dst_size = 16384;
2301	int bo_shader_size = 4096;
2302	int bo_cmd_size = 4096;
2303	struct amdgpu_cs_request ibs_request = {0};
2304	struct amdgpu_cs_ib_info ib_info= {0};
2305	uint32_t expired;
2306	amdgpu_bo_list_handle bo_list;
2307	struct amdgpu_cs_fence fence_status = {0};
2308
2309	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2310	CU_ASSERT_EQUAL(r, 0);
2311
2312	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2313				    AMDGPU_GEM_DOMAIN_GTT, 0,
2314				    &bo_cmd, (void **)&ptr_cmd,
2315				    &mc_address_cmd, &va_cmd);
2316	CU_ASSERT_EQUAL(r, 0);
2317	memset(ptr_cmd, 0, bo_cmd_size);
2318
2319	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2320					AMDGPU_GEM_DOMAIN_VRAM, 0,
2321					&bo_shader, &ptr_shader,
2322					&mc_address_shader, &va_shader);
2323	CU_ASSERT_EQUAL(r, 0);
2324
2325	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCOPY );
2326	CU_ASSERT_EQUAL(r, 0);
2327
2328	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2329					AMDGPU_GEM_DOMAIN_VRAM, 0,
2330					&bo_src, (void **)&ptr_src,
2331					&mc_address_src, &va_src);
2332	CU_ASSERT_EQUAL(r, 0);
2333
2334	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2335					AMDGPU_GEM_DOMAIN_VRAM, 0,
2336					&bo_dst, (void **)&ptr_dst,
2337					&mc_address_dst, &va_dst);
2338	CU_ASSERT_EQUAL(r, 0);
2339
2340	memset(ptr_src, 0x55, bo_dst_size);
2341
2342	i = 0;
2343	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2344
2345	/*  Issue commands to set cu mask used in current dispatch */
2346	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2347
2348	/* Writes shader state to HW */
2349	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2350
2351	/* Write constant data */
2352	/* Writes the texture resource constants data to the SGPRs */
2353	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2354	ptr_cmd[i++] = 0x240;
2355	ptr_cmd[i++] = mc_address_src;
2356	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2357	ptr_cmd[i++] = 0x400;
2358	ptr_cmd[i++] = 0x74fac;
2359
2360	/* Writes the UAV constant data to the SGPRs. */
2361	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2362	ptr_cmd[i++] = 0x244;
2363	ptr_cmd[i++] = mc_address_dst;
2364	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2365	ptr_cmd[i++] = 0x400;
2366	ptr_cmd[i++] = 0x74fac;
2367
2368	/* dispatch direct command */
2369	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2370	ptr_cmd[i++] = 0x10;
2371	ptr_cmd[i++] = 1;
2372	ptr_cmd[i++] = 1;
2373	ptr_cmd[i++] = 1;
2374
2375	while (i & 7)
2376		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2377
2378	resources[0] = bo_shader;
2379	resources[1] = bo_src;
2380	resources[2] = bo_dst;
2381	resources[3] = bo_cmd;
2382	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2383	CU_ASSERT_EQUAL(r, 0);
2384
2385	ib_info.ib_mc_address = mc_address_cmd;
2386	ib_info.size = i;
2387	ibs_request.ip_type = ip_type;
2388	ibs_request.ring = ring;
2389	ibs_request.resources = bo_list;
2390	ibs_request.number_of_ibs = 1;
2391	ibs_request.ibs = &ib_info;
2392	ibs_request.fence_info.handle = NULL;
2393	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2394	CU_ASSERT_EQUAL(r, 0);
2395
2396	fence_status.ip_type = ip_type;
2397	fence_status.ip_instance = 0;
2398	fence_status.ring = ring;
2399	fence_status.context = context_handle;
2400	fence_status.fence = ibs_request.seq_no;
2401
2402	/* wait for IB accomplished */
2403	r = amdgpu_cs_query_fence_status(&fence_status,
2404					 AMDGPU_TIMEOUT_INFINITE,
2405					 0, &expired);
2406	CU_ASSERT_EQUAL(r, 0);
2407	CU_ASSERT_EQUAL(expired, true);
2408
2409	/* verify if memcpy test result meets with expected */
2410	i = 0;
2411	while(i < bo_dst_size) {
2412		CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2413		i++;
2414	}
2415
2416	r = amdgpu_bo_list_destroy(bo_list);
2417	CU_ASSERT_EQUAL(r, 0);
2418
2419	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2420	CU_ASSERT_EQUAL(r, 0);
2421	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2422	CU_ASSERT_EQUAL(r, 0);
2423
2424	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2425	CU_ASSERT_EQUAL(r, 0);
2426
2427	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2428	CU_ASSERT_EQUAL(r, 0);
2429
2430	r = amdgpu_cs_ctx_free(context_handle);
2431	CU_ASSERT_EQUAL(r, 0);
2432}
2433static void amdgpu_dispatch_test(void)
2434{
2435	int r;
2436	struct drm_amdgpu_info_hw_ip info;
2437	uint32_t ring_id;
2438
2439	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2440	CU_ASSERT_EQUAL(r, 0);
2441
2442	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2443		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2444		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2445	}
2446
2447	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2448	CU_ASSERT_EQUAL(r, 0);
2449
2450	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2451		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2452		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2453	}
2454}
2455
2456static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
2457{
2458	int i;
2459	uint32_t shader_offset= 256;
2460	uint32_t mem_offset, patch_code_offset;
2461	uint32_t shader_size, patchinfo_code_size;
2462	const uint32_t *shader;
2463	const uint32_t *patchinfo_code;
2464	const uint32_t *patchcode_offset;
2465
2466	switch (ps_type) {
2467		case PS_CONST:
2468			shader = ps_const_shader_gfx9;
2469			shader_size = sizeof(ps_const_shader_gfx9);
2470			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
2471			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
2472			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
2473			break;
2474		case PS_TEX:
2475			shader = ps_tex_shader_gfx9;
2476			shader_size = sizeof(ps_tex_shader_gfx9);
2477			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
2478			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
2479			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
2480			break;
2481		default:
2482			return -1;
2483			break;
2484	}
2485
2486	/* write main shader program */
2487	for (i = 0 ; i < 10; i++) {
2488		mem_offset = i * shader_offset;
2489		memcpy(ptr + mem_offset, shader, shader_size);
2490	}
2491
2492	/* overwrite patch codes */
2493	for (i = 0 ; i < 10; i++) {
2494		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
2495		patch_code_offset = i * patchinfo_code_size;
2496		memcpy(ptr + mem_offset,
2497			patchinfo_code + patch_code_offset,
2498			patchinfo_code_size * sizeof(uint32_t));
2499	}
2500
2501	return 0;
2502}
2503
2504/* load RectPosTexFast_VS */
2505static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
2506{
2507	const uint32_t *shader;
2508	uint32_t shader_size;
2509
2510	shader = vs_RectPosTexFast_shader_gfx9;
2511	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
2512
2513	memcpy(ptr, shader, shader_size);
2514
2515	return 0;
2516}
2517
2518static int amdgpu_draw_init(uint32_t *ptr)
2519{
2520	int i = 0;
2521	const uint32_t *preamblecache_ptr;
2522	uint32_t preamblecache_size;
2523
2524	/* Write context control and load shadowing register if necessary */
2525	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2526	ptr[i++] = 0x80000000;
2527	ptr[i++] = 0x80000000;
2528
2529	preamblecache_ptr = preamblecache_gfx9;
2530	preamblecache_size = sizeof(preamblecache_gfx9);
2531
2532	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
2533	return i + preamblecache_size/sizeof(uint32_t);
2534}
2535
2536static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
2537							 uint64_t dst_addr)
2538{
2539	int i = 0;
2540
2541	/* setup color buffer */
2542	/* offset   reg
2543	   0xA318   CB_COLOR0_BASE
2544	   0xA319   CB_COLOR0_BASE_EXT
2545	   0xA31A   CB_COLOR0_ATTRIB2
2546	   0xA31B   CB_COLOR0_VIEW
2547	   0xA31C   CB_COLOR0_INFO
2548	   0xA31D   CB_COLOR0_ATTRIB
2549	   0xA31E   CB_COLOR0_DCC_CONTROL
2550	   0xA31F   CB_COLOR0_CMASK
2551	   0xA320   CB_COLOR0_CMASK_BASE_EXT
2552	   0xA321   CB_COLOR0_FMASK
2553	   0xA322   CB_COLOR0_FMASK_BASE_EXT
2554	   0xA323   CB_COLOR0_CLEAR_WORD0
2555	   0xA324   CB_COLOR0_CLEAR_WORD1
2556	   0xA325   CB_COLOR0_DCC_BASE
2557	   0xA326   CB_COLOR0_DCC_BASE_EXT */
2558	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
2559	ptr[i++] = 0x318;
2560	ptr[i++] = dst_addr >> 8;
2561	ptr[i++] = dst_addr >> 40;
2562	ptr[i++] = 0x7c01f;
2563	ptr[i++] = 0;
2564	ptr[i++] = 0x50438;
2565	ptr[i++] = 0x10140000;
2566	i += 9;
2567
2568	/* mmCB_MRT0_EPITCH */
2569	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2570	ptr[i++] = 0x1e8;
2571	ptr[i++] = 0x1f;
2572
2573	/* 0xA32B   CB_COLOR1_BASE */
2574	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2575	ptr[i++] = 0x32b;
2576	ptr[i++] = 0;
2577
2578	/* 0xA33A   CB_COLOR1_BASE */
2579	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2580	ptr[i++] = 0x33a;
2581	ptr[i++] = 0;
2582
2583	/* SPI_SHADER_COL_FORMAT */
2584	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2585	ptr[i++] = 0x1c5;
2586	ptr[i++] = 9;
2587
2588	/* Setup depth buffer */
2589	/* mmDB_Z_INFO */
2590	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
2591	ptr[i++] = 0xe;
2592	i += 2;
2593
2594	return i;
2595}
2596
2597static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr)
2598{
2599	int i = 0;
2600	const uint32_t *cached_cmd_ptr;
2601	uint32_t cached_cmd_size;
2602
2603	/* mmPA_SC_TILE_STEERING_OVERRIDE */
2604	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2605	ptr[i++] = 0xd7;
2606	ptr[i++] = 0;
2607
2608	ptr[i++] = 0xffff1000;
2609	ptr[i++] = 0xc0021000;
2610
2611	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2612	ptr[i++] = 0xd7;
2613	ptr[i++] = 1;
2614
2615	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
2616	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
2617	ptr[i++] = 0x2fe;
2618	i += 16;
2619
2620	/* mmPA_SC_CENTROID_PRIORITY_0 */
2621	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
2622	ptr[i++] = 0x2f5;
2623	i += 2;
2624
2625	cached_cmd_ptr = cached_cmd_gfx9;
2626	cached_cmd_size = sizeof(cached_cmd_gfx9);
2627
2628	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
2629	i += cached_cmd_size/sizeof(uint32_t);
2630
2631	return i;
2632}
2633
2634static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
2635						  int ps_type,
2636						  uint64_t shader_addr)
2637{
2638	int i = 0;
2639
2640	/* mmPA_CL_VS_OUT_CNTL */
2641	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2642	ptr[i++] = 0x207;
2643	ptr[i++] = 0;
2644
2645	/* mmSPI_SHADER_PGM_RSRC3_VS */
2646	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2647	ptr[i++] = 0x46;
2648	ptr[i++] = 0xffff;
2649
2650	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
2651	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2652	ptr[i++] = 0x48;
2653	ptr[i++] = shader_addr >> 8;
2654	ptr[i++] = shader_addr >> 40;
2655
2656	/* mmSPI_SHADER_PGM_RSRC1_VS */
2657	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2658	ptr[i++] = 0x4a;
2659	ptr[i++] = 0xc0081;
2660	/* mmSPI_SHADER_PGM_RSRC2_VS */
2661	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2662	ptr[i++] = 0x4b;
2663	ptr[i++] = 0x18;
2664
2665	/* mmSPI_VS_OUT_CONFIG */
2666	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2667	ptr[i++] = 0x1b1;
2668	ptr[i++] = 2;
2669
2670	/* mmSPI_SHADER_POS_FORMAT */
2671	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2672	ptr[i++] = 0x1c3;
2673	ptr[i++] = 4;
2674
2675	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2676	ptr[i++] = 0x4c;
2677	i += 2;
2678	ptr[i++] = 0x42000000;
2679	ptr[i++] = 0x42000000;
2680
2681	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2682	ptr[i++] = 0x50;
2683	i += 2;
2684	if (ps_type == PS_CONST) {
2685		i += 2;
2686	} else if (ps_type == PS_TEX) {
2687		ptr[i++] = 0x3f800000;
2688		ptr[i++] = 0x3f800000;
2689	}
2690
2691	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2692	ptr[i++] = 0x54;
2693	i += 4;
2694
2695	return i;
2696}
2697
2698static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
2699				   int ps_type,
2700				   uint64_t shader_addr)
2701{
2702	int i, j;
2703	const uint32_t *sh_registers;
2704	const uint32_t *context_registers;
2705	uint32_t num_sh_reg, num_context_reg;
2706
2707	if (ps_type == PS_CONST) {
2708		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
2709		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
2710		num_sh_reg = ps_num_sh_registers_gfx9;
2711		num_context_reg = ps_num_context_registers_gfx9;
2712	} else if (ps_type == PS_TEX) {
2713		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
2714		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
2715		num_sh_reg = ps_num_sh_registers_gfx9;
2716		num_context_reg = ps_num_context_registers_gfx9;
2717	}
2718
2719	i = 0;
2720
2721	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
2722	   0x2c08   SPI_SHADER_PGM_LO_PS
2723	   0x2c09   SPI_SHADER_PGM_HI_PS */
2724	shader_addr += 256 * 9;
2725	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2726	ptr[i++] = 0x7;
2727	ptr[i++] = 0xffff;
2728	ptr[i++] = shader_addr >> 8;
2729	ptr[i++] = shader_addr >> 40;
2730
2731	for (j = 0; j < num_sh_reg; j++) {
2732		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2733		ptr[i++] = sh_registers[j * 2] - 0x2c00;
2734		ptr[i++] = sh_registers[j * 2 + 1];
2735	}
2736
2737	for (j = 0; j < num_context_reg; j++) {
2738		if (context_registers[j * 2] != 0xA1C5) {
2739			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2740			ptr[i++] = context_registers[j * 2] - 0xa000;
2741			ptr[i++] = context_registers[j * 2 + 1];
2742		}
2743
2744		if (context_registers[j * 2] == 0xA1B4) {
2745			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2746			ptr[i++] = 0x1b3;
2747			ptr[i++] = 2;
2748		}
2749	}
2750
2751	return i;
2752}
2753
2754static int amdgpu_draw_draw(uint32_t *ptr)
2755{
2756	int i = 0;
2757
2758	/* mmIA_MULTI_VGT_PARAM */
2759	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2760	ptr[i++] = 0x40000258;
2761	ptr[i++] = 0xd00ff;
2762
2763	/* mmVGT_PRIMITIVE_TYPE */
2764	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2765	ptr[i++] = 0x10000242;
2766	ptr[i++] = 0x11;
2767
2768	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
2769	ptr[i++] = 3;
2770	ptr[i++] = 2;
2771
2772	return i;
2773}
2774
2775void amdgpu_memset_draw(amdgpu_device_handle device_handle,
2776			amdgpu_bo_handle bo_shader_ps,
2777			amdgpu_bo_handle bo_shader_vs,
2778			uint64_t mc_address_shader_ps,
2779			uint64_t mc_address_shader_vs,
2780			uint32_t ring_id)
2781{
2782	amdgpu_context_handle context_handle;
2783	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
2784	volatile unsigned char *ptr_dst;
2785	uint32_t *ptr_cmd;
2786	uint64_t mc_address_dst, mc_address_cmd;
2787	amdgpu_va_handle va_dst, va_cmd;
2788	int i, r;
2789	int bo_dst_size = 16384;
2790	int bo_cmd_size = 4096;
2791	struct amdgpu_cs_request ibs_request = {0};
2792	struct amdgpu_cs_ib_info ib_info = {0};
2793	struct amdgpu_cs_fence fence_status = {0};
2794	uint32_t expired;
2795	amdgpu_bo_list_handle bo_list;
2796
2797	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2798	CU_ASSERT_EQUAL(r, 0);
2799
2800	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2801					AMDGPU_GEM_DOMAIN_GTT, 0,
2802					&bo_cmd, (void **)&ptr_cmd,
2803					&mc_address_cmd, &va_cmd);
2804	CU_ASSERT_EQUAL(r, 0);
2805	memset(ptr_cmd, 0, bo_cmd_size);
2806
2807	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2808					AMDGPU_GEM_DOMAIN_VRAM, 0,
2809					&bo_dst, (void **)&ptr_dst,
2810					&mc_address_dst, &va_dst);
2811	CU_ASSERT_EQUAL(r, 0);
2812
2813	i = 0;
2814	i += amdgpu_draw_init(ptr_cmd + i);
2815
2816	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst);
2817
2818	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i);
2819
2820	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs);
2821
2822	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
2823
2824	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2825	ptr_cmd[i++] = 0xc;
2826	ptr_cmd[i++] = 0x33333333;
2827	ptr_cmd[i++] = 0x33333333;
2828	ptr_cmd[i++] = 0x33333333;
2829	ptr_cmd[i++] = 0x33333333;
2830
2831	i += amdgpu_draw_draw(ptr_cmd + i);
2832
2833	while (i & 7)
2834		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2835
2836	resources[0] = bo_dst;
2837	resources[1] = bo_shader_ps;
2838	resources[2] = bo_shader_vs;
2839	resources[3] = bo_cmd;
2840	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2841	CU_ASSERT_EQUAL(r, 0);
2842
2843	ib_info.ib_mc_address = mc_address_cmd;
2844	ib_info.size = i;
2845	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2846	ibs_request.ring = ring_id;
2847	ibs_request.resources = bo_list;
2848	ibs_request.number_of_ibs = 1;
2849	ibs_request.ibs = &ib_info;
2850	ibs_request.fence_info.handle = NULL;
2851
2852	/* submit CS */
2853	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2854	CU_ASSERT_EQUAL(r, 0);
2855
2856	r = amdgpu_bo_list_destroy(bo_list);
2857	CU_ASSERT_EQUAL(r, 0);
2858
2859	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2860	fence_status.ip_instance = 0;
2861	fence_status.ring = ring_id;
2862	fence_status.context = context_handle;
2863	fence_status.fence = ibs_request.seq_no;
2864
2865	/* wait for IB accomplished */
2866	r = amdgpu_cs_query_fence_status(&fence_status,
2867					 AMDGPU_TIMEOUT_INFINITE,
2868					 0, &expired);
2869	CU_ASSERT_EQUAL(r, 0);
2870	CU_ASSERT_EQUAL(expired, true);
2871
2872	/* verify if memset test result meets with expected */
2873	i = 0;
2874	while(i < bo_dst_size) {
2875		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
2876	}
2877
2878	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2879	CU_ASSERT_EQUAL(r, 0);
2880
2881	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2882	CU_ASSERT_EQUAL(r, 0);
2883
2884	r = amdgpu_cs_ctx_free(context_handle);
2885	CU_ASSERT_EQUAL(r, 0);
2886}
2887
2888static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
2889				    uint32_t ring)
2890{
2891	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
2892	void *ptr_shader_ps;
2893	void *ptr_shader_vs;
2894	uint64_t mc_address_shader_ps, mc_address_shader_vs;
2895	amdgpu_va_handle va_shader_ps, va_shader_vs;
2896	int r;
2897	int bo_shader_size = 4096;
2898
2899	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2900					AMDGPU_GEM_DOMAIN_VRAM, 0,
2901					&bo_shader_ps, &ptr_shader_ps,
2902					&mc_address_shader_ps, &va_shader_ps);
2903	CU_ASSERT_EQUAL(r, 0);
2904
2905	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2906					AMDGPU_GEM_DOMAIN_VRAM, 0,
2907					&bo_shader_vs, &ptr_shader_vs,
2908					&mc_address_shader_vs, &va_shader_vs);
2909	CU_ASSERT_EQUAL(r, 0);
2910
2911	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
2912	CU_ASSERT_EQUAL(r, 0);
2913
2914	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
2915	CU_ASSERT_EQUAL(r, 0);
2916
2917	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
2918			mc_address_shader_ps, mc_address_shader_vs, ring);
2919
2920	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
2921	CU_ASSERT_EQUAL(r, 0);
2922
2923	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
2924	CU_ASSERT_EQUAL(r, 0);
2925}
2926
2927static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
2928			       amdgpu_bo_handle bo_shader_ps,
2929			       amdgpu_bo_handle bo_shader_vs,
2930			       uint64_t mc_address_shader_ps,
2931			       uint64_t mc_address_shader_vs,
2932			       uint32_t ring)
2933{
2934	amdgpu_context_handle context_handle;
2935	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
2936	volatile unsigned char *ptr_dst;
2937	unsigned char *ptr_src;
2938	uint32_t *ptr_cmd;
2939	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
2940	amdgpu_va_handle va_dst, va_src, va_cmd;
2941	int i, r;
2942	int bo_size = 16384;
2943	int bo_cmd_size = 4096;
2944	struct amdgpu_cs_request ibs_request = {0};
2945	struct amdgpu_cs_ib_info ib_info= {0};
2946	uint32_t hang_state, hangs, expired;
2947	amdgpu_bo_list_handle bo_list;
2948	struct amdgpu_cs_fence fence_status = {0};
2949
2950	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2951	CU_ASSERT_EQUAL(r, 0);
2952
2953	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2954				    AMDGPU_GEM_DOMAIN_GTT, 0,
2955				    &bo_cmd, (void **)&ptr_cmd,
2956				    &mc_address_cmd, &va_cmd);
2957	CU_ASSERT_EQUAL(r, 0);
2958	memset(ptr_cmd, 0, bo_cmd_size);
2959
2960	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
2961					AMDGPU_GEM_DOMAIN_VRAM, 0,
2962					&bo_src, (void **)&ptr_src,
2963					&mc_address_src, &va_src);
2964	CU_ASSERT_EQUAL(r, 0);
2965
2966	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
2967					AMDGPU_GEM_DOMAIN_VRAM, 0,
2968					&bo_dst, (void **)&ptr_dst,
2969					&mc_address_dst, &va_dst);
2970	CU_ASSERT_EQUAL(r, 0);
2971
2972	memset(ptr_src, 0x55, bo_size);
2973
2974	i = 0;
2975	i += amdgpu_draw_init(ptr_cmd + i);
2976
2977	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst);
2978
2979	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i);
2980
2981	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs);
2982
2983	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
2984
2985	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
2986	ptr_cmd[i++] = 0xc;
2987	ptr_cmd[i++] = mc_address_src >> 8;
2988	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
2989	ptr_cmd[i++] = 0x7c01f;
2990	ptr_cmd[i++] = 0x90500fac;
2991	ptr_cmd[i++] = 0x3e000;
2992	i += 3;
2993
2994	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
2995	ptr_cmd[i++] = 0x14;
2996	ptr_cmd[i++] = 0x92;
2997	i += 3;
2998
2999	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3000	ptr_cmd[i++] = 0x191;
3001	ptr_cmd[i++] = 0;
3002
3003	i += amdgpu_draw_draw(ptr_cmd + i);
3004
3005	while (i & 7)
3006		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3007
3008	resources[0] = bo_dst;
3009	resources[1] = bo_src;
3010	resources[2] = bo_shader_ps;
3011	resources[3] = bo_shader_vs;
3012	resources[4] = bo_cmd;
3013	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3014	CU_ASSERT_EQUAL(r, 0);
3015
3016	ib_info.ib_mc_address = mc_address_cmd;
3017	ib_info.size = i;
3018	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3019	ibs_request.ring = ring;
3020	ibs_request.resources = bo_list;
3021	ibs_request.number_of_ibs = 1;
3022	ibs_request.ibs = &ib_info;
3023	ibs_request.fence_info.handle = NULL;
3024	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3025	CU_ASSERT_EQUAL(r, 0);
3026
3027	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3028	fence_status.ip_instance = 0;
3029	fence_status.ring = ring;
3030	fence_status.context = context_handle;
3031	fence_status.fence = ibs_request.seq_no;
3032
3033	/* wait for IB accomplished */
3034	r = amdgpu_cs_query_fence_status(&fence_status,
3035					 AMDGPU_TIMEOUT_INFINITE,
3036					 0, &expired);
3037	CU_ASSERT_EQUAL(r, 0);
3038	CU_ASSERT_EQUAL(expired, true);
3039
3040	/* verify if memcpy test result meets with expected */
3041	i = 0;
3042	while(i < bo_size) {
3043		CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3044		i++;
3045	}
3046
3047	r = amdgpu_bo_list_destroy(bo_list);
3048	CU_ASSERT_EQUAL(r, 0);
3049
3050	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3051	CU_ASSERT_EQUAL(r, 0);
3052	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3053	CU_ASSERT_EQUAL(r, 0);
3054
3055	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3056	CU_ASSERT_EQUAL(r, 0);
3057
3058	r = amdgpu_cs_ctx_free(context_handle);
3059	CU_ASSERT_EQUAL(r, 0);
3060}
3061
3062static void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring)
3063{
3064	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3065	void *ptr_shader_ps;
3066	void *ptr_shader_vs;
3067	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3068	amdgpu_va_handle va_shader_ps, va_shader_vs;
3069	int bo_shader_size = 4096;
3070	int r;
3071
3072	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3073					AMDGPU_GEM_DOMAIN_VRAM, 0,
3074					&bo_shader_ps, &ptr_shader_ps,
3075					&mc_address_shader_ps, &va_shader_ps);
3076	CU_ASSERT_EQUAL(r, 0);
3077
3078	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3079					AMDGPU_GEM_DOMAIN_VRAM, 0,
3080					&bo_shader_vs, &ptr_shader_vs,
3081					&mc_address_shader_vs, &va_shader_vs);
3082	CU_ASSERT_EQUAL(r, 0);
3083
3084	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_TEX);
3085	CU_ASSERT_EQUAL(r, 0);
3086
3087	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3088	CU_ASSERT_EQUAL(r, 0);
3089
3090	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3091			mc_address_shader_ps, mc_address_shader_vs, ring);
3092
3093	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3094	CU_ASSERT_EQUAL(r, 0);
3095
3096	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3097	CU_ASSERT_EQUAL(r, 0);
3098}
3099
3100static void amdgpu_draw_test(void)
3101{
3102	int r;
3103	struct drm_amdgpu_info_hw_ip info;
3104	uint32_t ring_id;
3105
3106	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3107	CU_ASSERT_EQUAL(r, 0);
3108
3109	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3110		amdgpu_memset_draw_test(device_handle, ring_id);
3111		amdgpu_memcpy_draw_test(device_handle, ring_id);
3112	}
3113}
3114