basic_tests.c revision 88f8a8d2
13f012e29Smrg/*
23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc.
33f012e29Smrg *
43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a
53f012e29Smrg * copy of this software and associated documentation files (the "Software"),
63f012e29Smrg * to deal in the Software without restriction, including without limitation
73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the
93f012e29Smrg * Software is furnished to do so, subject to the following conditions:
103f012e29Smrg *
113f012e29Smrg * The above copyright notice and this permission notice shall be included in
123f012e29Smrg * all copies or substantial portions of the Software.
133f012e29Smrg *
143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE.
213f012e29Smrg *
223f012e29Smrg*/
233f012e29Smrg
243f012e29Smrg#include <stdio.h>
253f012e29Smrg#include <stdlib.h>
263f012e29Smrg#include <unistd.h>
2788f8a8d2Smrg#include <sys/types.h>
2888f8a8d2Smrg#ifdef MAJOR_IN_SYSMACROS
2988f8a8d2Smrg#include <sys/sysmacros.h>
3088f8a8d2Smrg#endif
3188f8a8d2Smrg#include <sys/stat.h>
3288f8a8d2Smrg#include <fcntl.h>
333f012e29Smrg#ifdef HAVE_ALLOCA_H
343f012e29Smrg# include <alloca.h>
353f012e29Smrg#endif
3600a23bdaSmrg#include <sys/wait.h>
373f012e29Smrg
383f012e29Smrg#include "CUnit/Basic.h"
393f012e29Smrg
403f012e29Smrg#include "amdgpu_test.h"
413f012e29Smrg#include "amdgpu_drm.h"
427cdc0497Smrg#include "util_math.h"
433f012e29Smrg
443f012e29Smrgstatic  amdgpu_device_handle device_handle;
453f012e29Smrgstatic  uint32_t  major_version;
463f012e29Smrgstatic  uint32_t  minor_version;
47d8807b2fSmrgstatic  uint32_t  family_id;
483f012e29Smrg
493f012e29Smrgstatic void amdgpu_query_info_test(void);
503f012e29Smrgstatic void amdgpu_command_submission_gfx(void);
513f012e29Smrgstatic void amdgpu_command_submission_compute(void);
52d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void);
533f012e29Smrgstatic void amdgpu_command_submission_sdma(void);
543f012e29Smrgstatic void amdgpu_userptr_test(void);
553f012e29Smrgstatic void amdgpu_semaphore_test(void);
5600a23bdaSmrgstatic void amdgpu_sync_dependency_test(void);
5700a23bdaSmrgstatic void amdgpu_bo_eviction_test(void);
5888f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void);
5988f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void);
605324fb0dSmrgstatic void amdgpu_draw_test(void);
6188f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void);
623f012e29Smrg
633f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
643f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
653f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
6600a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
6700a23bdaSmrg				       unsigned ip_type,
6800a23bdaSmrg				       int instance, int pm4_dw, uint32_t *pm4_src,
6900a23bdaSmrg				       int res_cnt, amdgpu_bo_handle *resources,
7000a23bdaSmrg				       struct amdgpu_cs_ib_info *ib_info,
7100a23bdaSmrg				       struct amdgpu_cs_request *ibs_request);
7200a23bdaSmrg
733f012e29SmrgCU_TestInfo basic_tests[] = {
743f012e29Smrg	{ "Query Info Test",  amdgpu_query_info_test },
753f012e29Smrg	{ "Userptr Test",  amdgpu_userptr_test },
7600a23bdaSmrg	{ "bo eviction Test",  amdgpu_bo_eviction_test },
773f012e29Smrg	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
783f012e29Smrg	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
79d8807b2fSmrg	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
803f012e29Smrg	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
813f012e29Smrg	{ "SW semaphore Test",  amdgpu_semaphore_test },
8200a23bdaSmrg	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
8388f8a8d2Smrg	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
8488f8a8d2Smrg	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
855324fb0dSmrg	{ "Draw Test",  amdgpu_draw_test },
8688f8a8d2Smrg	{ "GPU reset Test", amdgpu_gpu_reset_test },
873f012e29Smrg	CU_TEST_INFO_NULL,
883f012e29Smrg};
893f012e29Smrg#define BUFFER_SIZE (8 * 1024)
903f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0
913f012e29Smrg#define SDMA_PKT_HEADER_op_mask   0x000000FF
923f012e29Smrg#define SDMA_PKT_HEADER_op_shift  0
933f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
943f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL  11
953f012e29Smrg#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
963f012e29Smrg	/* 0 = byte fill
973f012e29Smrg	 * 2 = DW fill
983f012e29Smrg	 */
993f012e29Smrg#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
1003f012e29Smrg					(((sub_op) & 0xFF) << 8) |	\
1013f012e29Smrg					(((op) & 0xFF) << 0))
1023f012e29Smrg#define	SDMA_OPCODE_WRITE				  2
1033f012e29Smrg#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
1043f012e29Smrg#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
1053f012e29Smrg
1063f012e29Smrg#define	SDMA_OPCODE_COPY				  1
1073f012e29Smrg#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
1083f012e29Smrg
1093f012e29Smrg#define GFX_COMPUTE_NOP  0xffff1000
1103f012e29Smrg#define SDMA_NOP  0x0
1113f012e29Smrg
1123f012e29Smrg/* PM4 */
1133f012e29Smrg#define	PACKET_TYPE0	0
1143f012e29Smrg#define	PACKET_TYPE1	1
1153f012e29Smrg#define	PACKET_TYPE2	2
1163f012e29Smrg#define	PACKET_TYPE3	3
1173f012e29Smrg
1183f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
1193f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
1203f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
1213f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
1223f012e29Smrg#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
1233f012e29Smrg			 ((reg) & 0xFFFF) |			\
1243f012e29Smrg			 ((n) & 0x3FFF) << 16)
1253f012e29Smrg#define CP_PACKET2			0x80000000
1263f012e29Smrg#define		PACKET2_PAD_SHIFT		0
1273f012e29Smrg#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
1283f012e29Smrg
1293f012e29Smrg#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
1303f012e29Smrg
1313f012e29Smrg#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
1323f012e29Smrg			 (((op) & 0xFF) << 8) |				\
1333f012e29Smrg			 ((n) & 0x3FFF) << 16)
1345324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
1353f012e29Smrg
1363f012e29Smrg/* Packet 3 types */
1373f012e29Smrg#define	PACKET3_NOP					0x10
1383f012e29Smrg
1393f012e29Smrg#define	PACKET3_WRITE_DATA				0x37
1403f012e29Smrg#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
1413f012e29Smrg		/* 0 - register
1423f012e29Smrg		 * 1 - memory (sync - via GRBM)
1433f012e29Smrg		 * 2 - gl2
1443f012e29Smrg		 * 3 - gds
1453f012e29Smrg		 * 4 - reserved
1463f012e29Smrg		 * 5 - memory (async - direct)
1473f012e29Smrg		 */
1483f012e29Smrg#define		WR_ONE_ADDR                             (1 << 16)
1493f012e29Smrg#define		WR_CONFIRM                              (1 << 20)
1503f012e29Smrg#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
1513f012e29Smrg		/* 0 - LRU
1523f012e29Smrg		 * 1 - Stream
1533f012e29Smrg		 */
1543f012e29Smrg#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
1553f012e29Smrg		/* 0 - me
1563f012e29Smrg		 * 1 - pfp
1573f012e29Smrg		 * 2 - ce
1583f012e29Smrg		 */
1593f012e29Smrg
1603f012e29Smrg#define	PACKET3_DMA_DATA				0x50
1613f012e29Smrg/* 1. header
1623f012e29Smrg * 2. CONTROL
1633f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0]
1643f012e29Smrg * 4. SRC_ADDR_HI [31:0]
1653f012e29Smrg * 5. DST_ADDR_LO [31:0]
1663f012e29Smrg * 6. DST_ADDR_HI [7:0]
1673f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
1683f012e29Smrg */
1693f012e29Smrg/* CONTROL */
1703f012e29Smrg#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
1713f012e29Smrg		/* 0 - ME
1723f012e29Smrg		 * 1 - PFP
1733f012e29Smrg		 */
1743f012e29Smrg#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
1753f012e29Smrg		/* 0 - LRU
1763f012e29Smrg		 * 1 - Stream
1773f012e29Smrg		 * 2 - Bypass
1783f012e29Smrg		 */
1793f012e29Smrg#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
1803f012e29Smrg#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
1813f012e29Smrg		/* 0 - DST_ADDR using DAS
1823f012e29Smrg		 * 1 - GDS
1833f012e29Smrg		 * 3 - DST_ADDR using L2
1843f012e29Smrg		 */
1853f012e29Smrg#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
1863f012e29Smrg		/* 0 - LRU
1873f012e29Smrg		 * 1 - Stream
1883f012e29Smrg		 * 2 - Bypass
1893f012e29Smrg		 */
1903f012e29Smrg#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
1913f012e29Smrg#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
1923f012e29Smrg		/* 0 - SRC_ADDR using SAS
1933f012e29Smrg		 * 1 - GDS
1943f012e29Smrg		 * 2 - DATA
1953f012e29Smrg		 * 3 - SRC_ADDR using L2
1963f012e29Smrg		 */
1973f012e29Smrg#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
1983f012e29Smrg/* COMMAND */
1993f012e29Smrg#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
2003f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
2013f012e29Smrg		/* 0 - none
2023f012e29Smrg		 * 1 - 8 in 16
2033f012e29Smrg		 * 2 - 8 in 32
2043f012e29Smrg		 * 3 - 8 in 64
2053f012e29Smrg		 */
2063f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
2073f012e29Smrg		/* 0 - none
2083f012e29Smrg		 * 1 - 8 in 16
2093f012e29Smrg		 * 2 - 8 in 32
2103f012e29Smrg		 * 3 - 8 in 64
2113f012e29Smrg		 */
2123f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
2133f012e29Smrg		/* 0 - memory
2143f012e29Smrg		 * 1 - register
2153f012e29Smrg		 */
2163f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
2173f012e29Smrg		/* 0 - memory
2183f012e29Smrg		 * 1 - register
2193f012e29Smrg		 */
2203f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
2213f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
2223f012e29Smrg#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
2233f012e29Smrg
224d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
225d8807b2fSmrg						(((b) & 0x1) << 26) |		\
226d8807b2fSmrg						(((t) & 0x1) << 23) |		\
227d8807b2fSmrg						(((s) & 0x1) << 22) |		\
228d8807b2fSmrg						(((cnt) & 0xFFFFF) << 0))
229d8807b2fSmrg#define	SDMA_OPCODE_COPY_SI	3
230d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI	13
231d8807b2fSmrg#define SDMA_NOP_SI  0xf
232d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000
233d8807b2fSmrg#define	PACKET3_DMA_DATA_SI	0x41
234d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
235d8807b2fSmrg		/* 0 - ME
236d8807b2fSmrg		 * 1 - PFP
237d8807b2fSmrg		 */
238d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
239d8807b2fSmrg		/* 0 - DST_ADDR using DAS
240d8807b2fSmrg		 * 1 - GDS
241d8807b2fSmrg		 * 3 - DST_ADDR using L2
242d8807b2fSmrg		 */
243d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
244d8807b2fSmrg		/* 0 - SRC_ADDR using SAS
245d8807b2fSmrg		 * 1 - GDS
246d8807b2fSmrg		 * 2 - DATA
247d8807b2fSmrg		 * 3 - SRC_ADDR using L2
248d8807b2fSmrg		 */
249d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
250d8807b2fSmrg
25100a23bdaSmrg
25200a23bdaSmrg#define PKT3_CONTEXT_CONTROL                   0x28
25300a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
25400a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
25500a23bdaSmrg#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
25600a23bdaSmrg
25700a23bdaSmrg#define PKT3_CLEAR_STATE                       0x12
25800a23bdaSmrg
25900a23bdaSmrg#define PKT3_SET_SH_REG                        0x76
26000a23bdaSmrg#define		PACKET3_SET_SH_REG_START			0x00002c00
26100a23bdaSmrg
26200a23bdaSmrg#define	PACKET3_DISPATCH_DIRECT				0x15
2635324fb0dSmrg#define PACKET3_EVENT_WRITE				0x46
2645324fb0dSmrg#define PACKET3_ACQUIRE_MEM				0x58
2655324fb0dSmrg#define PACKET3_SET_CONTEXT_REG				0x69
2665324fb0dSmrg#define PACKET3_SET_UCONFIG_REG				0x79
2675324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO				0x2D
26800a23bdaSmrg/* gfx 8 */
26900a23bdaSmrg#define mmCOMPUTE_PGM_LO                                                        0x2e0c
27000a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
27100a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
27200a23bdaSmrg#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
27300a23bdaSmrg#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
27400a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
27500a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
27600a23bdaSmrg
27700a23bdaSmrg
27800a23bdaSmrg
27900a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
28000a23bdaSmrg		      ((num & 0x0000ff00) << 8) | \
28100a23bdaSmrg		      ((num & 0x00ff0000) >> 8) | \
28200a23bdaSmrg		      ((num & 0x000000ff) << 24))
28300a23bdaSmrg
28400a23bdaSmrg
28500a23bdaSmrg/* Shader code
28600a23bdaSmrg * void main()
28700a23bdaSmrg{
28800a23bdaSmrg
28900a23bdaSmrg	float x = some_input;
29000a23bdaSmrg		for (unsigned i = 0; i < 1000000; i++)
29100a23bdaSmrg  	x = sin(x);
29200a23bdaSmrg
29300a23bdaSmrg	u[0] = 42u;
29400a23bdaSmrg}
29500a23bdaSmrg*/
29600a23bdaSmrg
29700a23bdaSmrgstatic  uint32_t shader_bin[] = {
29800a23bdaSmrg	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
29900a23bdaSmrg	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
30000a23bdaSmrg	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
30100a23bdaSmrg	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
30200a23bdaSmrg};
30300a23bdaSmrg
30400a23bdaSmrg#define CODE_OFFSET 512
30500a23bdaSmrg#define DATA_OFFSET 1024
30600a23bdaSmrg
3075324fb0dSmrgenum cs_type {
3085324fb0dSmrg	CS_BUFFERCLEAR,
3095324fb0dSmrg	CS_BUFFERCOPY
3105324fb0dSmrg};
3115324fb0dSmrg
3125324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = {
3135324fb0dSmrg    0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
3145324fb0dSmrg    0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
3155324fb0dSmrg    0xBF810000
3165324fb0dSmrg};
3175324fb0dSmrg
3185324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
3195324fb0dSmrg	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
3205324fb0dSmrg	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
3215324fb0dSmrg	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
3225324fb0dSmrg	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
3235324fb0dSmrg	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
3245324fb0dSmrg};
3255324fb0dSmrg
3265324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
3275324fb0dSmrg
3285324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = {
3295324fb0dSmrg    0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
3305324fb0dSmrg    0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
3315324fb0dSmrg};
3325324fb0dSmrg
3335324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = {
3345324fb0dSmrg	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
3355324fb0dSmrg	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
3365324fb0dSmrg	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
3375324fb0dSmrg	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
3385324fb0dSmrg	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
3395324fb0dSmrg	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
3405324fb0dSmrg	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
3415324fb0dSmrg	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
34288f8a8d2Smrg	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
3435324fb0dSmrg	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
3445324fb0dSmrg	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
3455324fb0dSmrg	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
3465324fb0dSmrg	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
3475324fb0dSmrg	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
3485324fb0dSmrg	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
34988f8a8d2Smrg	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
35088f8a8d2Smrg	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
3515324fb0dSmrg	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
3525324fb0dSmrg	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
3535324fb0dSmrg	0xc0017900, 0x24b, 0x0
3545324fb0dSmrg};
3555324fb0dSmrg
3565324fb0dSmrgenum ps_type {
3575324fb0dSmrg	PS_CONST,
3585324fb0dSmrg	PS_TEX
3595324fb0dSmrg};
3605324fb0dSmrg
3615324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = {
3625324fb0dSmrg    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
3635324fb0dSmrg    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
3645324fb0dSmrg    0xC4001C0F, 0x00000100, 0xBF810000
3655324fb0dSmrg};
3665324fb0dSmrg
3675324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
3685324fb0dSmrg
3695324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
3705324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
3715324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
3725324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
3735324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
3745324fb0dSmrg     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
3755324fb0dSmrg     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
3765324fb0dSmrg     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
3775324fb0dSmrg     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
3785324fb0dSmrg     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
3795324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
3805324fb0dSmrg    }
3815324fb0dSmrg};
3825324fb0dSmrg
3835324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
3845324fb0dSmrg    0x00000004
3855324fb0dSmrg};
3865324fb0dSmrg
3875324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2;
3885324fb0dSmrg
3895324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = {
3905324fb0dSmrg    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
3915324fb0dSmrg    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
3925324fb0dSmrg};
3935324fb0dSmrg
3945324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7;
3955324fb0dSmrg
3965324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = {
3975324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
3985324fb0dSmrg    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
3995324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
4005324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
4015324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
4025324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
4035324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
4045324fb0dSmrg};
4055324fb0dSmrg
4065324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = {
4075324fb0dSmrg    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
4085324fb0dSmrg    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
4095324fb0dSmrg    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
4105324fb0dSmrg    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
4115324fb0dSmrg    0x00000100, 0xBF810000
4125324fb0dSmrg};
4135324fb0dSmrg
4145324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
4155324fb0dSmrg    0x0000000B
4165324fb0dSmrg};
4175324fb0dSmrg
4185324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
4195324fb0dSmrg
4205324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
4215324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
4225324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
4235324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
4245324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
4255324fb0dSmrg     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4265324fb0dSmrg     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4275324fb0dSmrg     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4285324fb0dSmrg     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4295324fb0dSmrg     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4305324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
4315324fb0dSmrg    }
4325324fb0dSmrg};
4335324fb0dSmrg
4345324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = {
4355324fb0dSmrg    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
4365324fb0dSmrg    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
4375324fb0dSmrg};
4385324fb0dSmrg
4395324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = {
4405324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
4415324fb0dSmrg    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
4425324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
4435324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
4445324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
4455324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
4465324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
4475324fb0dSmrg};
4485324fb0dSmrg
4495324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
4505324fb0dSmrg    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
4515324fb0dSmrg    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
4525324fb0dSmrg    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
4535324fb0dSmrg    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
4545324fb0dSmrg    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
4555324fb0dSmrg    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
4565324fb0dSmrg    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
4575324fb0dSmrg    0xC400020F, 0x05060403, 0xBF810000
4585324fb0dSmrg};
4595324fb0dSmrg
4605324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = {
4615324fb0dSmrg	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
4625324fb0dSmrg	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
4635324fb0dSmrg	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
46488f8a8d2Smrg	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x1a,
4655324fb0dSmrg	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
4665324fb0dSmrg	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
4675324fb0dSmrg	0xc0026900, 0x292, 0x20, 0x60201b8,
4685324fb0dSmrg	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
4695324fb0dSmrg};
47000a23bdaSmrg
4717cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
4727cdc0497Smrg			unsigned alignment, unsigned heap, uint64_t alloc_flags,
4737cdc0497Smrg			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
4747cdc0497Smrg			uint64_t *mc_address,
4757cdc0497Smrg			amdgpu_va_handle *va_handle)
4767cdc0497Smrg{
4777cdc0497Smrg	struct amdgpu_bo_alloc_request request = {};
4787cdc0497Smrg	amdgpu_bo_handle buf_handle;
4797cdc0497Smrg	amdgpu_va_handle handle;
4807cdc0497Smrg	uint64_t vmc_addr;
4817cdc0497Smrg	int r;
4827cdc0497Smrg
4837cdc0497Smrg	request.alloc_size = size;
4847cdc0497Smrg	request.phys_alignment = alignment;
4857cdc0497Smrg	request.preferred_heap = heap;
4867cdc0497Smrg	request.flags = alloc_flags;
4877cdc0497Smrg
4887cdc0497Smrg	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
4897cdc0497Smrg	if (r)
4907cdc0497Smrg		return r;
4917cdc0497Smrg
4927cdc0497Smrg	r = amdgpu_va_range_alloc(dev,
4937cdc0497Smrg				  amdgpu_gpu_va_range_general,
4947cdc0497Smrg				  size, alignment, 0, &vmc_addr,
4957cdc0497Smrg				  &handle, 0);
4967cdc0497Smrg	if (r)
4977cdc0497Smrg		goto error_va_alloc;
4987cdc0497Smrg
4997cdc0497Smrg	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
5007cdc0497Smrg				   AMDGPU_VM_PAGE_READABLE |
5017cdc0497Smrg				   AMDGPU_VM_PAGE_WRITEABLE |
5027cdc0497Smrg				   AMDGPU_VM_PAGE_EXECUTABLE |
5037cdc0497Smrg				   mapping_flags,
5047cdc0497Smrg				   AMDGPU_VA_OP_MAP);
5057cdc0497Smrg	if (r)
5067cdc0497Smrg		goto error_va_map;
5077cdc0497Smrg
5087cdc0497Smrg	r = amdgpu_bo_cpu_map(buf_handle, cpu);
5097cdc0497Smrg	if (r)
5107cdc0497Smrg		goto error_cpu_map;
5117cdc0497Smrg
5127cdc0497Smrg	*bo = buf_handle;
5137cdc0497Smrg	*mc_address = vmc_addr;
5147cdc0497Smrg	*va_handle = handle;
5157cdc0497Smrg
5167cdc0497Smrg	return 0;
5177cdc0497Smrg
5187cdc0497Smrg error_cpu_map:
5197cdc0497Smrg	amdgpu_bo_cpu_unmap(buf_handle);
5207cdc0497Smrg
5217cdc0497Smrg error_va_map:
5227cdc0497Smrg	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
5237cdc0497Smrg
5247cdc0497Smrg error_va_alloc:
5257cdc0497Smrg	amdgpu_bo_free(buf_handle);
5267cdc0497Smrg	return r;
5277cdc0497Smrg}
5287cdc0497Smrg
5297cdc0497Smrg
5307cdc0497Smrg
5313f012e29Smrgint suite_basic_tests_init(void)
5323f012e29Smrg{
533d8807b2fSmrg	struct amdgpu_gpu_info gpu_info = {0};
5343f012e29Smrg	int r;
5353f012e29Smrg
5363f012e29Smrg	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
5373f012e29Smrg				   &minor_version, &device_handle);
5383f012e29Smrg
539d8807b2fSmrg	if (r) {
540037b3c26Smrg		if ((r == -EACCES) && (errno == EACCES))
541037b3c26Smrg			printf("\n\nError:%s. "
542037b3c26Smrg				"Hint:Try to run this test program as root.",
543037b3c26Smrg				strerror(errno));
5443f012e29Smrg		return CUE_SINIT_FAILED;
545037b3c26Smrg	}
546d8807b2fSmrg
547d8807b2fSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
548d8807b2fSmrg	if (r)
549d8807b2fSmrg		return CUE_SINIT_FAILED;
550d8807b2fSmrg
551d8807b2fSmrg	family_id = gpu_info.family_id;
552d8807b2fSmrg
553d8807b2fSmrg	return CUE_SUCCESS;
5543f012e29Smrg}
5553f012e29Smrg
5563f012e29Smrgint suite_basic_tests_clean(void)
5573f012e29Smrg{
5583f012e29Smrg	int r = amdgpu_device_deinitialize(device_handle);
5593f012e29Smrg
5603f012e29Smrg	if (r == 0)
5613f012e29Smrg		return CUE_SUCCESS;
5623f012e29Smrg	else
5633f012e29Smrg		return CUE_SCLEAN_FAILED;
5643f012e29Smrg}
5653f012e29Smrg
5663f012e29Smrgstatic void amdgpu_query_info_test(void)
5673f012e29Smrg{
5683f012e29Smrg	struct amdgpu_gpu_info gpu_info = {0};
5693f012e29Smrg	uint32_t version, feature;
5703f012e29Smrg	int r;
5713f012e29Smrg
5723f012e29Smrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
5733f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
5743f012e29Smrg
5753f012e29Smrg	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
5763f012e29Smrg					  0, &version, &feature);
5773f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
5783f012e29Smrg}
5793f012e29Smrg
5803f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void)
5813f012e29Smrg{
5823f012e29Smrg	amdgpu_context_handle context_handle;
5833f012e29Smrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
5843f012e29Smrg	void *ib_result_cpu, *ib_result_ce_cpu;
5853f012e29Smrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
5863f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
5873f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
5883f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
5893f012e29Smrg	uint32_t *ptr;
5903f012e29Smrg	uint32_t expired;
5913f012e29Smrg	amdgpu_bo_list_handle bo_list;
5923f012e29Smrg	amdgpu_va_handle va_handle, va_handle_ce;
593d8807b2fSmrg	int r, i = 0;
5943f012e29Smrg
5953f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
5963f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
5973f012e29Smrg
5983f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
5993f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
6003f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
6013f012e29Smrg				    &ib_result_mc_address, &va_handle);
6023f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6033f012e29Smrg
6043f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
6053f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
6063f012e29Smrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
6073f012e29Smrg				    &ib_result_ce_mc_address, &va_handle_ce);
6083f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6093f012e29Smrg
6103f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
6113f012e29Smrg			       ib_result_ce_handle, &bo_list);
6123f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6133f012e29Smrg
6143f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
6153f012e29Smrg
6163f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
6173f012e29Smrg	ptr = ib_result_ce_cpu;
618d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
619d8807b2fSmrg		ptr[i++] = 0xc0008900;
620d8807b2fSmrg		ptr[i++] = 0;
621d8807b2fSmrg	}
622d8807b2fSmrg	ptr[i++] = 0xc0008400;
623d8807b2fSmrg	ptr[i++] = 1;
6243f012e29Smrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
625d8807b2fSmrg	ib_info[0].size = i;
6263f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
6273f012e29Smrg
6283f012e29Smrg	/* IT_WAIT_ON_CE_COUNTER */
6293f012e29Smrg	ptr = ib_result_cpu;
6303f012e29Smrg	ptr[0] = 0xc0008600;
6313f012e29Smrg	ptr[1] = 0x00000001;
6323f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address;
6333f012e29Smrg	ib_info[1].size = 2;
6343f012e29Smrg
6353f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
6363f012e29Smrg	ibs_request.number_of_ibs = 2;
6373f012e29Smrg	ibs_request.ibs = ib_info;
6383f012e29Smrg	ibs_request.resources = bo_list;
6393f012e29Smrg	ibs_request.fence_info.handle = NULL;
6403f012e29Smrg
6413f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
6423f012e29Smrg
6433f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6443f012e29Smrg
6453f012e29Smrg	fence_status.context = context_handle;
6463f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
6473f012e29Smrg	fence_status.ip_instance = 0;
6483f012e29Smrg	fence_status.fence = ibs_request.seq_no;
6493f012e29Smrg
6503f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
6513f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
6523f012e29Smrg					 0, &expired);
6533f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6543f012e29Smrg
6553f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
6563f012e29Smrg				     ib_result_mc_address, 4096);
6573f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6583f012e29Smrg
6593f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
6603f012e29Smrg				     ib_result_ce_mc_address, 4096);
6613f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6623f012e29Smrg
6633f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
6643f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6653f012e29Smrg
6663f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
6673f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6683f012e29Smrg
6693f012e29Smrg}
6703f012e29Smrg
6713f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void)
6723f012e29Smrg{
6733f012e29Smrg	amdgpu_context_handle context_handle;
6743f012e29Smrg	amdgpu_bo_handle ib_result_handle;
6753f012e29Smrg	void *ib_result_cpu;
6763f012e29Smrg	uint64_t ib_result_mc_address;
6773f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
6783f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
6793f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
6803f012e29Smrg	uint32_t *ptr;
6813f012e29Smrg	uint32_t expired;
6823f012e29Smrg	amdgpu_bo_list_handle bo_list;
6833f012e29Smrg	amdgpu_va_handle va_handle;
684d8807b2fSmrg	int r, i = 0;
6853f012e29Smrg
6863f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
6873f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6883f012e29Smrg
6893f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
6903f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
6913f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
6923f012e29Smrg				    &ib_result_mc_address, &va_handle);
6933f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6943f012e29Smrg
6953f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
6963f012e29Smrg			       &bo_list);
6973f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6983f012e29Smrg
6993f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
7003f012e29Smrg
7013f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
7023f012e29Smrg	ptr = ib_result_cpu;
703d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
704d8807b2fSmrg		ptr[i++] = 0xc0008900;
705d8807b2fSmrg		ptr[i++] = 0;
706d8807b2fSmrg	}
707d8807b2fSmrg	ptr[i++] = 0xc0008400;
708d8807b2fSmrg	ptr[i++] = 1;
7093f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address;
710d8807b2fSmrg	ib_info[0].size = i;
7113f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
7123f012e29Smrg
7133f012e29Smrg	ptr = (uint32_t *)ib_result_cpu + 4;
7143f012e29Smrg	ptr[0] = 0xc0008600;
7153f012e29Smrg	ptr[1] = 0x00000001;
7163f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
7173f012e29Smrg	ib_info[1].size = 2;
7183f012e29Smrg
7193f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
7203f012e29Smrg	ibs_request.number_of_ibs = 2;
7213f012e29Smrg	ibs_request.ibs = ib_info;
7223f012e29Smrg	ibs_request.resources = bo_list;
7233f012e29Smrg	ibs_request.fence_info.handle = NULL;
7243f012e29Smrg
7253f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
7263f012e29Smrg
7273f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7283f012e29Smrg
7293f012e29Smrg	fence_status.context = context_handle;
7303f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
7313f012e29Smrg	fence_status.ip_instance = 0;
7323f012e29Smrg	fence_status.fence = ibs_request.seq_no;
7333f012e29Smrg
7343f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
7353f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
7363f012e29Smrg					 0, &expired);
7373f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7383f012e29Smrg
7393f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
7403f012e29Smrg				     ib_result_mc_address, 4096);
7413f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7423f012e29Smrg
7433f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
7443f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7453f012e29Smrg
7463f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
7473f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7483f012e29Smrg}
7493f012e29Smrg
7503f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void)
7513f012e29Smrg{
7523f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
7533f012e29Smrg}
7543f012e29Smrg
7553f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void)
7563f012e29Smrg{
7573f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
7583f012e29Smrg}
7593f012e29Smrg
7603f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void)
7613f012e29Smrg{
7623f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
7633f012e29Smrg}
7643f012e29Smrg
76500a23bdaSmrgstatic void amdgpu_bo_eviction_test(void)
76600a23bdaSmrg{
76700a23bdaSmrg	const int sdma_write_length = 1024;
76800a23bdaSmrg	const int pm4_dw = 256;
76900a23bdaSmrg	amdgpu_context_handle context_handle;
77000a23bdaSmrg	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
77100a23bdaSmrg	amdgpu_bo_handle *resources;
77200a23bdaSmrg	uint32_t *pm4;
77300a23bdaSmrg	struct amdgpu_cs_ib_info *ib_info;
77400a23bdaSmrg	struct amdgpu_cs_request *ibs_request;
77500a23bdaSmrg	uint64_t bo1_mc, bo2_mc;
77600a23bdaSmrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
77700a23bdaSmrg	int i, j, r, loop1, loop2;
77800a23bdaSmrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
77900a23bdaSmrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
78000a23bdaSmrg	struct amdgpu_heap_info vram_info, gtt_info;
78100a23bdaSmrg
78200a23bdaSmrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
78300a23bdaSmrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
78400a23bdaSmrg
78500a23bdaSmrg	ib_info = calloc(1, sizeof(*ib_info));
78600a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
78700a23bdaSmrg
78800a23bdaSmrg	ibs_request = calloc(1, sizeof(*ibs_request));
78900a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
79000a23bdaSmrg
79100a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
79200a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
79300a23bdaSmrg
79400a23bdaSmrg	/* prepare resource */
79500a23bdaSmrg	resources = calloc(4, sizeof(amdgpu_bo_handle));
79600a23bdaSmrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
79700a23bdaSmrg
79800a23bdaSmrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
79900a23bdaSmrg				   0, &vram_info);
80000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
80100a23bdaSmrg
80200a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
80300a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
80400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
80500a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
80600a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
80700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
80800a23bdaSmrg
80900a23bdaSmrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
81000a23bdaSmrg				   0, &gtt_info);
81100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
81200a23bdaSmrg
81300a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
81400a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
81500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
81600a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
81700a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
81800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
81900a23bdaSmrg
82000a23bdaSmrg
82100a23bdaSmrg
82200a23bdaSmrg	loop1 = loop2 = 0;
82300a23bdaSmrg	/* run 9 circle to test all mapping combination */
82400a23bdaSmrg	while(loop1 < 2) {
82500a23bdaSmrg		while(loop2 < 2) {
82600a23bdaSmrg			/* allocate UC bo1for sDMA use */
82700a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
82800a23bdaSmrg						    sdma_write_length, 4096,
82900a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
83000a23bdaSmrg						    gtt_flags[loop1], &bo1,
83100a23bdaSmrg						    (void**)&bo1_cpu, &bo1_mc,
83200a23bdaSmrg						    &bo1_va_handle);
83300a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
83400a23bdaSmrg
83500a23bdaSmrg			/* set bo1 */
83600a23bdaSmrg			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
83700a23bdaSmrg
83800a23bdaSmrg			/* allocate UC bo2 for sDMA use */
83900a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
84000a23bdaSmrg						    sdma_write_length, 4096,
84100a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
84200a23bdaSmrg						    gtt_flags[loop2], &bo2,
84300a23bdaSmrg						    (void**)&bo2_cpu, &bo2_mc,
84400a23bdaSmrg						    &bo2_va_handle);
84500a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
84600a23bdaSmrg
84700a23bdaSmrg			/* clear bo2 */
84800a23bdaSmrg			memset((void*)bo2_cpu, 0, sdma_write_length);
84900a23bdaSmrg
85000a23bdaSmrg			resources[0] = bo1;
85100a23bdaSmrg			resources[1] = bo2;
85200a23bdaSmrg			resources[2] = vram_max[loop2];
85300a23bdaSmrg			resources[3] = gtt_max[loop2];
85400a23bdaSmrg
85500a23bdaSmrg			/* fulfill PM4: test DMA copy linear */
85600a23bdaSmrg			i = j = 0;
85700a23bdaSmrg			if (family_id == AMDGPU_FAMILY_SI) {
85800a23bdaSmrg				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
85900a23bdaSmrg							  sdma_write_length);
86000a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
86100a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
86200a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
86300a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
86400a23bdaSmrg			} else {
86500a23bdaSmrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
86600a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
86700a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
86800a23bdaSmrg				else
86900a23bdaSmrg					pm4[i++] = sdma_write_length;
87000a23bdaSmrg				pm4[i++] = 0;
87100a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
87200a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
87300a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
87400a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
87500a23bdaSmrg			}
87600a23bdaSmrg
87700a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
87800a23bdaSmrg						   AMDGPU_HW_IP_DMA, 0,
87900a23bdaSmrg						   i, pm4,
88000a23bdaSmrg						   4, resources,
88100a23bdaSmrg						   ib_info, ibs_request);
88200a23bdaSmrg
88300a23bdaSmrg			/* verify if SDMA test result meets with expected */
88400a23bdaSmrg			i = 0;
88500a23bdaSmrg			while(i < sdma_write_length) {
88600a23bdaSmrg				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
88700a23bdaSmrg			}
88800a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
88900a23bdaSmrg						     sdma_write_length);
89000a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
89100a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
89200a23bdaSmrg						     sdma_write_length);
89300a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
89400a23bdaSmrg			loop2++;
89500a23bdaSmrg		}
89600a23bdaSmrg		loop2 = 0;
89700a23bdaSmrg		loop1++;
89800a23bdaSmrg	}
89900a23bdaSmrg	amdgpu_bo_free(vram_max[0]);
90000a23bdaSmrg	amdgpu_bo_free(vram_max[1]);
90100a23bdaSmrg	amdgpu_bo_free(gtt_max[0]);
90200a23bdaSmrg	amdgpu_bo_free(gtt_max[1]);
90300a23bdaSmrg	/* clean resources */
90400a23bdaSmrg	free(resources);
90500a23bdaSmrg	free(ibs_request);
90600a23bdaSmrg	free(ib_info);
90700a23bdaSmrg	free(pm4);
90800a23bdaSmrg
90900a23bdaSmrg	/* end of test */
91000a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle);
91100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
91200a23bdaSmrg}
91300a23bdaSmrg
91400a23bdaSmrg
9153f012e29Smrgstatic void amdgpu_command_submission_gfx(void)
9163f012e29Smrg{
9173f012e29Smrg	/* write data using the CP */
9183f012e29Smrg	amdgpu_command_submission_gfx_cp_write_data();
9193f012e29Smrg	/* const fill using the CP */
9203f012e29Smrg	amdgpu_command_submission_gfx_cp_const_fill();
9213f012e29Smrg	/* copy data using the CP */
9223f012e29Smrg	amdgpu_command_submission_gfx_cp_copy_data();
9233f012e29Smrg	/* separate IB buffers for multi-IB submission */
9243f012e29Smrg	amdgpu_command_submission_gfx_separate_ibs();
9253f012e29Smrg	/* shared IB buffer for multi-IB submission */
9263f012e29Smrg	amdgpu_command_submission_gfx_shared_ib();
9273f012e29Smrg}
9283f012e29Smrg
9293f012e29Smrgstatic void amdgpu_semaphore_test(void)
9303f012e29Smrg{
9313f012e29Smrg	amdgpu_context_handle context_handle[2];
9323f012e29Smrg	amdgpu_semaphore_handle sem;
9333f012e29Smrg	amdgpu_bo_handle ib_result_handle[2];
9343f012e29Smrg	void *ib_result_cpu[2];
9353f012e29Smrg	uint64_t ib_result_mc_address[2];
9363f012e29Smrg	struct amdgpu_cs_request ibs_request[2] = {0};
9373f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2] = {0};
9383f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
9393f012e29Smrg	uint32_t *ptr;
9403f012e29Smrg	uint32_t expired;
941d8807b2fSmrg	uint32_t sdma_nop, gfx_nop;
9423f012e29Smrg	amdgpu_bo_list_handle bo_list[2];
9433f012e29Smrg	amdgpu_va_handle va_handle[2];
9443f012e29Smrg	int r, i;
9453f012e29Smrg
946d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI) {
947d8807b2fSmrg		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
948d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP_SI;
949d8807b2fSmrg	} else {
950d8807b2fSmrg		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
951d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP;
952d8807b2fSmrg	}
953d8807b2fSmrg
9543f012e29Smrg	r = amdgpu_cs_create_semaphore(&sem);
9553f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9563f012e29Smrg	for (i = 0; i < 2; i++) {
9573f012e29Smrg		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
9583f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
9593f012e29Smrg
9603f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
9613f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
9623f012e29Smrg					    &ib_result_handle[i], &ib_result_cpu[i],
9633f012e29Smrg					    &ib_result_mc_address[i], &va_handle[i]);
9643f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
9653f012e29Smrg
9663f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
9673f012e29Smrg				       NULL, &bo_list[i]);
9683f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
9693f012e29Smrg	}
9703f012e29Smrg
9713f012e29Smrg	/* 1. same context different engine */
9723f012e29Smrg	ptr = ib_result_cpu[0];
973d8807b2fSmrg	ptr[0] = sdma_nop;
9743f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
9753f012e29Smrg	ib_info[0].size = 1;
9763f012e29Smrg
9773f012e29Smrg	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
9783f012e29Smrg	ibs_request[0].number_of_ibs = 1;
9793f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
9803f012e29Smrg	ibs_request[0].resources = bo_list[0];
9813f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
9823f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
9833f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9843f012e29Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
9853f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9863f012e29Smrg
9873f012e29Smrg	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
9883f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9893f012e29Smrg	ptr = ib_result_cpu[1];
990d8807b2fSmrg	ptr[0] = gfx_nop;
9913f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
9923f012e29Smrg	ib_info[1].size = 1;
9933f012e29Smrg
9943f012e29Smrg	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
9953f012e29Smrg	ibs_request[1].number_of_ibs = 1;
9963f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
9973f012e29Smrg	ibs_request[1].resources = bo_list[1];
9983f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
9993f012e29Smrg
10003f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
10013f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10023f012e29Smrg
10033f012e29Smrg	fence_status.context = context_handle[0];
10043f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
10053f012e29Smrg	fence_status.ip_instance = 0;
10063f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
10073f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
10083f012e29Smrg					 500000000, 0, &expired);
10093f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10103f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
10113f012e29Smrg
10123f012e29Smrg	/* 2. same engine different context */
10133f012e29Smrg	ptr = ib_result_cpu[0];
1014d8807b2fSmrg	ptr[0] = gfx_nop;
10153f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
10163f012e29Smrg	ib_info[0].size = 1;
10173f012e29Smrg
10183f012e29Smrg	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
10193f012e29Smrg	ibs_request[0].number_of_ibs = 1;
10203f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
10213f012e29Smrg	ibs_request[0].resources = bo_list[0];
10223f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
10233f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
10243f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10253f012e29Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
10263f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10273f012e29Smrg
10283f012e29Smrg	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
10293f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10303f012e29Smrg	ptr = ib_result_cpu[1];
1031d8807b2fSmrg	ptr[0] = gfx_nop;
10323f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
10333f012e29Smrg	ib_info[1].size = 1;
10343f012e29Smrg
10353f012e29Smrg	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
10363f012e29Smrg	ibs_request[1].number_of_ibs = 1;
10373f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
10383f012e29Smrg	ibs_request[1].resources = bo_list[1];
10393f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
10403f012e29Smrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
10413f012e29Smrg
10423f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10433f012e29Smrg
10443f012e29Smrg	fence_status.context = context_handle[1];
10453f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
10463f012e29Smrg	fence_status.ip_instance = 0;
10473f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
10483f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
10493f012e29Smrg					 500000000, 0, &expired);
10503f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10513f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
1052d8807b2fSmrg
10533f012e29Smrg	for (i = 0; i < 2; i++) {
10543f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
10553f012e29Smrg					     ib_result_mc_address[i], 4096);
10563f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10573f012e29Smrg
10583f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list[i]);
10593f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10603f012e29Smrg
10613f012e29Smrg		r = amdgpu_cs_ctx_free(context_handle[i]);
10623f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10633f012e29Smrg	}
10643f012e29Smrg
10653f012e29Smrg	r = amdgpu_cs_destroy_semaphore(sem);
10663f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10673f012e29Smrg}
10683f012e29Smrg
10693f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void)
10703f012e29Smrg{
10713f012e29Smrg	amdgpu_context_handle context_handle;
10723f012e29Smrg	amdgpu_bo_handle ib_result_handle;
10733f012e29Smrg	void *ib_result_cpu;
10743f012e29Smrg	uint64_t ib_result_mc_address;
10753f012e29Smrg	struct amdgpu_cs_request ibs_request;
10763f012e29Smrg	struct amdgpu_cs_ib_info ib_info;
10773f012e29Smrg	struct amdgpu_cs_fence fence_status;
10783f012e29Smrg	uint32_t *ptr;
10793f012e29Smrg	uint32_t expired;
108000a23bdaSmrg	int r, instance;
10813f012e29Smrg	amdgpu_bo_list_handle bo_list;
10823f012e29Smrg	amdgpu_va_handle va_handle;
1083d8807b2fSmrg	struct drm_amdgpu_info_hw_ip info;
1084d8807b2fSmrg
1085d8807b2fSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1086d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
10873f012e29Smrg
10883f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
10893f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10903f012e29Smrg
1091d8807b2fSmrg	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
10923f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
10933f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
10943f012e29Smrg					    &ib_result_handle, &ib_result_cpu,
10953f012e29Smrg					    &ib_result_mc_address, &va_handle);
10963f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10973f012e29Smrg
10983f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
10993f012e29Smrg				       &bo_list);
11003f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11013f012e29Smrg
11023f012e29Smrg		ptr = ib_result_cpu;
1103d8807b2fSmrg		memset(ptr, 0, 16);
1104d8807b2fSmrg		ptr[0]=PACKET3(PACKET3_NOP, 14);
11053f012e29Smrg
11063f012e29Smrg		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
11073f012e29Smrg		ib_info.ib_mc_address = ib_result_mc_address;
11083f012e29Smrg		ib_info.size = 16;
11093f012e29Smrg
11103f012e29Smrg		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
11113f012e29Smrg		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
11123f012e29Smrg		ibs_request.ring = instance;
11133f012e29Smrg		ibs_request.number_of_ibs = 1;
11143f012e29Smrg		ibs_request.ibs = &ib_info;
11153f012e29Smrg		ibs_request.resources = bo_list;
11163f012e29Smrg		ibs_request.fence_info.handle = NULL;
11173f012e29Smrg
11183f012e29Smrg		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
11193f012e29Smrg		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
11203f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11213f012e29Smrg
11223f012e29Smrg		fence_status.context = context_handle;
11233f012e29Smrg		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
11243f012e29Smrg		fence_status.ip_instance = 0;
11253f012e29Smrg		fence_status.ring = instance;
11263f012e29Smrg		fence_status.fence = ibs_request.seq_no;
11273f012e29Smrg
11283f012e29Smrg		r = amdgpu_cs_query_fence_status(&fence_status,
11293f012e29Smrg						 AMDGPU_TIMEOUT_INFINITE,
11303f012e29Smrg						 0, &expired);
11313f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11323f012e29Smrg
11333f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list);
11343f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11353f012e29Smrg
11363f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
11373f012e29Smrg					     ib_result_mc_address, 4096);
11383f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11393f012e29Smrg	}
11403f012e29Smrg
11413f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
11423f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11433f012e29Smrg}
11443f012e29Smrg
11453f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void)
11463f012e29Smrg{
11473f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
11483f012e29Smrg}
11493f012e29Smrg
11503f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void)
11513f012e29Smrg{
11523f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
11533f012e29Smrg}
11543f012e29Smrg
11553f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void)
11563f012e29Smrg{
11573f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
11583f012e29Smrg}
11593f012e29Smrg
11603f012e29Smrgstatic void amdgpu_command_submission_compute(void)
11613f012e29Smrg{
11623f012e29Smrg	/* write data using the CP */
11633f012e29Smrg	amdgpu_command_submission_compute_cp_write_data();
11643f012e29Smrg	/* const fill using the CP */
11653f012e29Smrg	amdgpu_command_submission_compute_cp_const_fill();
11663f012e29Smrg	/* copy data using the CP */
11673f012e29Smrg	amdgpu_command_submission_compute_cp_copy_data();
11683f012e29Smrg	/* nop test */
11693f012e29Smrg	amdgpu_command_submission_compute_nop();
11703f012e29Smrg}
11713f012e29Smrg
11723f012e29Smrg/*
11733f012e29Smrg * caller need create/release:
11743f012e29Smrg * pm4_src, resources, ib_info, and ibs_request
11753f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished
11763f012e29Smrg */
11773f012e29Smrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
11783f012e29Smrg				       unsigned ip_type,
11793f012e29Smrg				       int instance, int pm4_dw, uint32_t *pm4_src,
11803f012e29Smrg				       int res_cnt, amdgpu_bo_handle *resources,
11813f012e29Smrg				       struct amdgpu_cs_ib_info *ib_info,
11823f012e29Smrg				       struct amdgpu_cs_request *ibs_request)
11833f012e29Smrg{
11843f012e29Smrg	int r;
11853f012e29Smrg	uint32_t expired;
11863f012e29Smrg	uint32_t *ring_ptr;
11873f012e29Smrg	amdgpu_bo_handle ib_result_handle;
11883f012e29Smrg	void *ib_result_cpu;
11893f012e29Smrg	uint64_t ib_result_mc_address;
11903f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
11913f012e29Smrg	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
11923f012e29Smrg	amdgpu_va_handle va_handle;
11933f012e29Smrg
11943f012e29Smrg	/* prepare CS */
11953f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
11963f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
11973f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
11983f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
11993f012e29Smrg	CU_ASSERT_TRUE(pm4_dw <= 1024);
12003f012e29Smrg
12013f012e29Smrg	/* allocate IB */
12023f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
12033f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
12043f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
12053f012e29Smrg				    &ib_result_mc_address, &va_handle);
12063f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12073f012e29Smrg
12083f012e29Smrg	/* copy PM4 packet to ring from caller */
12093f012e29Smrg	ring_ptr = ib_result_cpu;
12103f012e29Smrg	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
12113f012e29Smrg
12123f012e29Smrg	ib_info->ib_mc_address = ib_result_mc_address;
12133f012e29Smrg	ib_info->size = pm4_dw;
12143f012e29Smrg
12153f012e29Smrg	ibs_request->ip_type = ip_type;
12163f012e29Smrg	ibs_request->ring = instance;
12173f012e29Smrg	ibs_request->number_of_ibs = 1;
12183f012e29Smrg	ibs_request->ibs = ib_info;
12193f012e29Smrg	ibs_request->fence_info.handle = NULL;
12203f012e29Smrg
12213f012e29Smrg	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
12223f012e29Smrg	all_res[res_cnt] = ib_result_handle;
12233f012e29Smrg
12243f012e29Smrg	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
12253f012e29Smrg				  NULL, &ibs_request->resources);
12263f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12273f012e29Smrg
12283f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
12293f012e29Smrg
12303f012e29Smrg	/* submit CS */
12313f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
12323f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12333f012e29Smrg
12343f012e29Smrg	r = amdgpu_bo_list_destroy(ibs_request->resources);
12353f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12363f012e29Smrg
12373f012e29Smrg	fence_status.ip_type = ip_type;
12383f012e29Smrg	fence_status.ip_instance = 0;
12393f012e29Smrg	fence_status.ring = ibs_request->ring;
12403f012e29Smrg	fence_status.context = context_handle;
12413f012e29Smrg	fence_status.fence = ibs_request->seq_no;
12423f012e29Smrg
12433f012e29Smrg	/* wait for IB accomplished */
12443f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
12453f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
12463f012e29Smrg					 0, &expired);
12473f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12483f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
12493f012e29Smrg
12503f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
12513f012e29Smrg				     ib_result_mc_address, 4096);
12523f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12533f012e29Smrg}
12543f012e29Smrg
12553f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
12563f012e29Smrg{
12573f012e29Smrg	const int sdma_write_length = 128;
12583f012e29Smrg	const int pm4_dw = 256;
12593f012e29Smrg	amdgpu_context_handle context_handle;
12603f012e29Smrg	amdgpu_bo_handle bo;
12613f012e29Smrg	amdgpu_bo_handle *resources;
12623f012e29Smrg	uint32_t *pm4;
12633f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
12643f012e29Smrg	struct amdgpu_cs_request *ibs_request;
12653f012e29Smrg	uint64_t bo_mc;
12663f012e29Smrg	volatile uint32_t *bo_cpu;
126700a23bdaSmrg	int i, j, r, loop, ring_id;
12683f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
12693f012e29Smrg	amdgpu_va_handle va_handle;
127000a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
12713f012e29Smrg
12723f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
12733f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
12743f012e29Smrg
12753f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
12763f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
12773f012e29Smrg
12783f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
12793f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
12803f012e29Smrg
128100a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
128200a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
128300a23bdaSmrg
12843f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
12853f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12863f012e29Smrg
12873f012e29Smrg	/* prepare resource */
12883f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
12893f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
12903f012e29Smrg
129100a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
129200a23bdaSmrg		loop = 0;
129300a23bdaSmrg		while(loop < 2) {
129400a23bdaSmrg			/* allocate UC bo for sDMA use */
129500a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
129600a23bdaSmrg						    sdma_write_length * sizeof(uint32_t),
129700a23bdaSmrg						    4096, AMDGPU_GEM_DOMAIN_GTT,
129800a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
129900a23bdaSmrg						    &bo_mc, &va_handle);
130000a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
13013f012e29Smrg
130200a23bdaSmrg			/* clear bo */
130300a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
13043f012e29Smrg
130500a23bdaSmrg			resources[0] = bo;
13063f012e29Smrg
130700a23bdaSmrg			/* fulfill PM4: test DMA write-linear */
130800a23bdaSmrg			i = j = 0;
130900a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
131000a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI)
131100a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
131200a23bdaSmrg								  sdma_write_length);
131300a23bdaSmrg				else
131400a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
131500a23bdaSmrg							       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
131600a23bdaSmrg				pm4[i++] = 0xffffffff & bo_mc;
131700a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
131800a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
131900a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
132000a23bdaSmrg				else if (family_id != AMDGPU_FAMILY_SI)
132100a23bdaSmrg					pm4[i++] = sdma_write_length;
132200a23bdaSmrg				while(j++ < sdma_write_length)
132300a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
132400a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
132500a23bdaSmrg				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
132600a23bdaSmrg				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
132700a23bdaSmrg				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
132800a23bdaSmrg				pm4[i++] = 0xfffffffc & bo_mc;
132900a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
133000a23bdaSmrg				while(j++ < sdma_write_length)
133100a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
133200a23bdaSmrg			}
13333f012e29Smrg
133400a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
133500a23bdaSmrg						   ip_type, ring_id,
133600a23bdaSmrg						   i, pm4,
133700a23bdaSmrg						   1, resources,
133800a23bdaSmrg						   ib_info, ibs_request);
13393f012e29Smrg
134000a23bdaSmrg			/* verify if SDMA test result meets with expected */
134100a23bdaSmrg			i = 0;
134200a23bdaSmrg			while(i < sdma_write_length) {
134300a23bdaSmrg				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
134400a23bdaSmrg			}
13453f012e29Smrg
134600a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
134700a23bdaSmrg						     sdma_write_length * sizeof(uint32_t));
134800a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
134900a23bdaSmrg			loop++;
13503f012e29Smrg		}
13513f012e29Smrg	}
13523f012e29Smrg	/* clean resources */
13533f012e29Smrg	free(resources);
13543f012e29Smrg	free(ibs_request);
13553f012e29Smrg	free(ib_info);
13563f012e29Smrg	free(pm4);
13573f012e29Smrg
13583f012e29Smrg	/* end of test */
13593f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
13603f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13613f012e29Smrg}
13623f012e29Smrg
13633f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void)
13643f012e29Smrg{
13653f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
13663f012e29Smrg}
13673f012e29Smrg
13683f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
13693f012e29Smrg{
13703f012e29Smrg	const int sdma_write_length = 1024 * 1024;
13713f012e29Smrg	const int pm4_dw = 256;
13723f012e29Smrg	amdgpu_context_handle context_handle;
13733f012e29Smrg	amdgpu_bo_handle bo;
13743f012e29Smrg	amdgpu_bo_handle *resources;
13753f012e29Smrg	uint32_t *pm4;
13763f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
13773f012e29Smrg	struct amdgpu_cs_request *ibs_request;
13783f012e29Smrg	uint64_t bo_mc;
13793f012e29Smrg	volatile uint32_t *bo_cpu;
138000a23bdaSmrg	int i, j, r, loop, ring_id;
13813f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
13823f012e29Smrg	amdgpu_va_handle va_handle;
138300a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
13843f012e29Smrg
13853f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
13863f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
13873f012e29Smrg
13883f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
13893f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
13903f012e29Smrg
13913f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
13923f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
13933f012e29Smrg
139400a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
139500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
139600a23bdaSmrg
13973f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
13983f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13993f012e29Smrg
14003f012e29Smrg	/* prepare resource */
14013f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
14023f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
14033f012e29Smrg
140400a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
140500a23bdaSmrg		loop = 0;
140600a23bdaSmrg		while(loop < 2) {
140700a23bdaSmrg			/* allocate UC bo for sDMA use */
140800a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
140900a23bdaSmrg						    sdma_write_length, 4096,
141000a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
141100a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
141200a23bdaSmrg						    &bo_mc, &va_handle);
141300a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
14143f012e29Smrg
141500a23bdaSmrg			/* clear bo */
141600a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length);
14173f012e29Smrg
141800a23bdaSmrg			resources[0] = bo;
14193f012e29Smrg
142000a23bdaSmrg			/* fulfill PM4: test DMA const fill */
142100a23bdaSmrg			i = j = 0;
142200a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
142300a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
142400a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
142500a23bdaSmrg								  0, 0, 0,
142600a23bdaSmrg								  sdma_write_length / 4);
142700a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
142800a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
142900a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
143000a23bdaSmrg				} else {
143100a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
143200a23bdaSmrg							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
143300a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
143400a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
143500a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
143600a23bdaSmrg					if (family_id >= AMDGPU_FAMILY_AI)
143700a23bdaSmrg						pm4[i++] = sdma_write_length - 1;
143800a23bdaSmrg					else
143900a23bdaSmrg						pm4[i++] = sdma_write_length;
144000a23bdaSmrg				}
144100a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
144200a23bdaSmrg				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
144300a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
144400a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
144500a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
144600a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
144700a23bdaSmrg						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
144800a23bdaSmrg						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
144900a23bdaSmrg						   PACKET3_DMA_DATA_SI_CP_SYNC;
145000a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
145100a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1452d8807b2fSmrg					pm4[i++] = sdma_write_length;
145300a23bdaSmrg				} else {
145400a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
145500a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
145600a23bdaSmrg						   PACKET3_DMA_DATA_DST_SEL(0) |
145700a23bdaSmrg						   PACKET3_DMA_DATA_SRC_SEL(2) |
145800a23bdaSmrg						   PACKET3_DMA_DATA_CP_SYNC;
145900a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
146000a23bdaSmrg					pm4[i++] = 0;
146100a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
146200a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
146300a23bdaSmrg					pm4[i++] = sdma_write_length;
146400a23bdaSmrg				}
1465d8807b2fSmrg			}
14663f012e29Smrg
146700a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
146800a23bdaSmrg						   ip_type, ring_id,
146900a23bdaSmrg						   i, pm4,
147000a23bdaSmrg						   1, resources,
147100a23bdaSmrg						   ib_info, ibs_request);
14723f012e29Smrg
147300a23bdaSmrg			/* verify if SDMA test result meets with expected */
147400a23bdaSmrg			i = 0;
147500a23bdaSmrg			while(i < (sdma_write_length / 4)) {
147600a23bdaSmrg				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
147700a23bdaSmrg			}
14783f012e29Smrg
147900a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
148000a23bdaSmrg						     sdma_write_length);
148100a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
148200a23bdaSmrg			loop++;
148300a23bdaSmrg		}
14843f012e29Smrg	}
14853f012e29Smrg	/* clean resources */
14863f012e29Smrg	free(resources);
14873f012e29Smrg	free(ibs_request);
14883f012e29Smrg	free(ib_info);
14893f012e29Smrg	free(pm4);
14903f012e29Smrg
14913f012e29Smrg	/* end of test */
14923f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
14933f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
14943f012e29Smrg}
14953f012e29Smrg
14963f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void)
14973f012e29Smrg{
14983f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
14993f012e29Smrg}
15003f012e29Smrg
15013f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
15023f012e29Smrg{
15033f012e29Smrg	const int sdma_write_length = 1024;
15043f012e29Smrg	const int pm4_dw = 256;
15053f012e29Smrg	amdgpu_context_handle context_handle;
15063f012e29Smrg	amdgpu_bo_handle bo1, bo2;
15073f012e29Smrg	amdgpu_bo_handle *resources;
15083f012e29Smrg	uint32_t *pm4;
15093f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
15103f012e29Smrg	struct amdgpu_cs_request *ibs_request;
15113f012e29Smrg	uint64_t bo1_mc, bo2_mc;
15123f012e29Smrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
151300a23bdaSmrg	int i, j, r, loop1, loop2, ring_id;
15143f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
15153f012e29Smrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
151600a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
15173f012e29Smrg
15183f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
15193f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
15203f012e29Smrg
15213f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
15223f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
15233f012e29Smrg
15243f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
15253f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
15263f012e29Smrg
152700a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
152800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
152900a23bdaSmrg
15303f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
15313f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15323f012e29Smrg
15333f012e29Smrg	/* prepare resource */
15343f012e29Smrg	resources = calloc(2, sizeof(amdgpu_bo_handle));
15353f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
15363f012e29Smrg
153700a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
153800a23bdaSmrg		loop1 = loop2 = 0;
153900a23bdaSmrg		/* run 9 circle to test all mapping combination */
154000a23bdaSmrg		while(loop1 < 2) {
154100a23bdaSmrg			while(loop2 < 2) {
154200a23bdaSmrg				/* allocate UC bo1for sDMA use */
154300a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
154400a23bdaSmrg							    sdma_write_length, 4096,
154500a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
154600a23bdaSmrg							    gtt_flags[loop1], &bo1,
154700a23bdaSmrg							    (void**)&bo1_cpu, &bo1_mc,
154800a23bdaSmrg							    &bo1_va_handle);
154900a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
155000a23bdaSmrg
155100a23bdaSmrg				/* set bo1 */
155200a23bdaSmrg				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
155300a23bdaSmrg
155400a23bdaSmrg				/* allocate UC bo2 for sDMA use */
155500a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
155600a23bdaSmrg							    sdma_write_length, 4096,
155700a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
155800a23bdaSmrg							    gtt_flags[loop2], &bo2,
155900a23bdaSmrg							    (void**)&bo2_cpu, &bo2_mc,
156000a23bdaSmrg							    &bo2_va_handle);
156100a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
156200a23bdaSmrg
156300a23bdaSmrg				/* clear bo2 */
156400a23bdaSmrg				memset((void*)bo2_cpu, 0, sdma_write_length);
156500a23bdaSmrg
156600a23bdaSmrg				resources[0] = bo1;
156700a23bdaSmrg				resources[1] = bo2;
156800a23bdaSmrg
156900a23bdaSmrg				/* fulfill PM4: test DMA copy linear */
157000a23bdaSmrg				i = j = 0;
157100a23bdaSmrg				if (ip_type == AMDGPU_HW_IP_DMA) {
157200a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
157300a23bdaSmrg						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
157400a23bdaSmrg									  0, 0, 0,
157500a23bdaSmrg									  sdma_write_length);
157600a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
157700a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
157800a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
157900a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
158000a23bdaSmrg					} else {
158100a23bdaSmrg						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
158200a23bdaSmrg								       SDMA_COPY_SUB_OPCODE_LINEAR,
158300a23bdaSmrg								       0);
158400a23bdaSmrg						if (family_id >= AMDGPU_FAMILY_AI)
158500a23bdaSmrg							pm4[i++] = sdma_write_length - 1;
158600a23bdaSmrg						else
158700a23bdaSmrg							pm4[i++] = sdma_write_length;
158800a23bdaSmrg						pm4[i++] = 0;
158900a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
159000a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
159100a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
159200a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
159300a23bdaSmrg					}
159400a23bdaSmrg				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
159500a23bdaSmrg					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
159600a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
159700a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
159800a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
159900a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
160000a23bdaSmrg							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
160100a23bdaSmrg							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
160200a23bdaSmrg							   PACKET3_DMA_DATA_SI_CP_SYNC |
160300a23bdaSmrg							   (0xffff00000000 & bo1_mc) >> 32;
160400a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
160500a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1606d8807b2fSmrg						pm4[i++] = sdma_write_length;
160700a23bdaSmrg					} else {
160800a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
160900a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
161000a23bdaSmrg							   PACKET3_DMA_DATA_DST_SEL(0) |
161100a23bdaSmrg							   PACKET3_DMA_DATA_SRC_SEL(0) |
161200a23bdaSmrg							   PACKET3_DMA_DATA_CP_SYNC;
161300a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
161400a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
161500a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
161600a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
161700a23bdaSmrg						pm4[i++] = sdma_write_length;
161800a23bdaSmrg					}
1619d8807b2fSmrg				}
16203f012e29Smrg
162100a23bdaSmrg				amdgpu_test_exec_cs_helper(context_handle,
162200a23bdaSmrg							   ip_type, ring_id,
162300a23bdaSmrg							   i, pm4,
162400a23bdaSmrg							   2, resources,
162500a23bdaSmrg							   ib_info, ibs_request);
16263f012e29Smrg
162700a23bdaSmrg				/* verify if SDMA test result meets with expected */
162800a23bdaSmrg				i = 0;
162900a23bdaSmrg				while(i < sdma_write_length) {
163000a23bdaSmrg					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
163100a23bdaSmrg				}
163200a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
163300a23bdaSmrg							     sdma_write_length);
163400a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
163500a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
163600a23bdaSmrg							     sdma_write_length);
163700a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
163800a23bdaSmrg				loop2++;
16393f012e29Smrg			}
164000a23bdaSmrg			loop1++;
16413f012e29Smrg		}
16423f012e29Smrg	}
16433f012e29Smrg	/* clean resources */
16443f012e29Smrg	free(resources);
16453f012e29Smrg	free(ibs_request);
16463f012e29Smrg	free(ib_info);
16473f012e29Smrg	free(pm4);
16483f012e29Smrg
16493f012e29Smrg	/* end of test */
16503f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
16513f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
16523f012e29Smrg}
16533f012e29Smrg
16543f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void)
16553f012e29Smrg{
16563f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
16573f012e29Smrg}
16583f012e29Smrg
16593f012e29Smrgstatic void amdgpu_command_submission_sdma(void)
16603f012e29Smrg{
16613f012e29Smrg	amdgpu_command_submission_sdma_write_linear();
16623f012e29Smrg	amdgpu_command_submission_sdma_const_fill();
16633f012e29Smrg	amdgpu_command_submission_sdma_copy_linear();
16643f012e29Smrg}
16653f012e29Smrg
1666d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1667d8807b2fSmrg{
1668d8807b2fSmrg	amdgpu_context_handle context_handle;
1669d8807b2fSmrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1670d8807b2fSmrg	void *ib_result_cpu, *ib_result_ce_cpu;
1671d8807b2fSmrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1672d8807b2fSmrg	struct amdgpu_cs_request ibs_request[2] = {0};
1673d8807b2fSmrg	struct amdgpu_cs_ib_info ib_info[2];
1674d8807b2fSmrg	struct amdgpu_cs_fence fence_status[2] = {0};
1675d8807b2fSmrg	uint32_t *ptr;
1676d8807b2fSmrg	uint32_t expired;
1677d8807b2fSmrg	amdgpu_bo_list_handle bo_list;
1678d8807b2fSmrg	amdgpu_va_handle va_handle, va_handle_ce;
1679d8807b2fSmrg	int r;
1680d8807b2fSmrg	int i = 0, ib_cs_num = 2;
1681d8807b2fSmrg
1682d8807b2fSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1683d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1684d8807b2fSmrg
1685d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1686d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
1687d8807b2fSmrg				    &ib_result_handle, &ib_result_cpu,
1688d8807b2fSmrg				    &ib_result_mc_address, &va_handle);
1689d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1690d8807b2fSmrg
1691d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1692d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
1693d8807b2fSmrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
1694d8807b2fSmrg				    &ib_result_ce_mc_address, &va_handle_ce);
1695d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1696d8807b2fSmrg
1697d8807b2fSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1698d8807b2fSmrg			       ib_result_ce_handle, &bo_list);
1699d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1700d8807b2fSmrg
1701d8807b2fSmrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1702d8807b2fSmrg
1703d8807b2fSmrg	/* IT_SET_CE_DE_COUNTERS */
1704d8807b2fSmrg	ptr = ib_result_ce_cpu;
1705d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
1706d8807b2fSmrg		ptr[i++] = 0xc0008900;
1707d8807b2fSmrg		ptr[i++] = 0;
1708d8807b2fSmrg	}
1709d8807b2fSmrg	ptr[i++] = 0xc0008400;
1710d8807b2fSmrg	ptr[i++] = 1;
1711d8807b2fSmrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1712d8807b2fSmrg	ib_info[0].size = i;
1713d8807b2fSmrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1714d8807b2fSmrg
1715d8807b2fSmrg	/* IT_WAIT_ON_CE_COUNTER */
1716d8807b2fSmrg	ptr = ib_result_cpu;
1717d8807b2fSmrg	ptr[0] = 0xc0008600;
1718d8807b2fSmrg	ptr[1] = 0x00000001;
1719d8807b2fSmrg	ib_info[1].ib_mc_address = ib_result_mc_address;
1720d8807b2fSmrg	ib_info[1].size = 2;
1721d8807b2fSmrg
1722d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
1723d8807b2fSmrg		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1724d8807b2fSmrg		ibs_request[i].number_of_ibs = 2;
1725d8807b2fSmrg		ibs_request[i].ibs = ib_info;
1726d8807b2fSmrg		ibs_request[i].resources = bo_list;
1727d8807b2fSmrg		ibs_request[i].fence_info.handle = NULL;
1728d8807b2fSmrg	}
1729d8807b2fSmrg
1730d8807b2fSmrg	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1731d8807b2fSmrg
1732d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1733d8807b2fSmrg
1734d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
1735d8807b2fSmrg		fence_status[i].context = context_handle;
1736d8807b2fSmrg		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1737d8807b2fSmrg		fence_status[i].fence = ibs_request[i].seq_no;
1738d8807b2fSmrg	}
1739d8807b2fSmrg
1740d8807b2fSmrg	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1741d8807b2fSmrg				AMDGPU_TIMEOUT_INFINITE,
1742d8807b2fSmrg				&expired, NULL);
1743d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1744d8807b2fSmrg
1745d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1746d8807b2fSmrg				     ib_result_mc_address, 4096);
1747d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1748d8807b2fSmrg
1749d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1750d8807b2fSmrg				     ib_result_ce_mc_address, 4096);
1751d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1752d8807b2fSmrg
1753d8807b2fSmrg	r = amdgpu_bo_list_destroy(bo_list);
1754d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1755d8807b2fSmrg
1756d8807b2fSmrg	r = amdgpu_cs_ctx_free(context_handle);
1757d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1758d8807b2fSmrg}
1759d8807b2fSmrg
1760d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void)
1761d8807b2fSmrg{
1762d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(true);
1763d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(false);
1764d8807b2fSmrg}
1765d8807b2fSmrg
17663f012e29Smrgstatic void amdgpu_userptr_test(void)
17673f012e29Smrg{
17683f012e29Smrg	int i, r, j;
17693f012e29Smrg	uint32_t *pm4 = NULL;
17703f012e29Smrg	uint64_t bo_mc;
17713f012e29Smrg	void *ptr = NULL;
17723f012e29Smrg	int pm4_dw = 256;
17733f012e29Smrg	int sdma_write_length = 4;
17743f012e29Smrg	amdgpu_bo_handle handle;
17753f012e29Smrg	amdgpu_context_handle context_handle;
17763f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
17773f012e29Smrg	struct amdgpu_cs_request *ibs_request;
17783f012e29Smrg	amdgpu_bo_handle buf_handle;
17793f012e29Smrg	amdgpu_va_handle va_handle;
17803f012e29Smrg
17813f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
17823f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
17833f012e29Smrg
17843f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
17853f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
17863f012e29Smrg
17873f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
17883f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
17893f012e29Smrg
17903f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
17913f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17923f012e29Smrg
17933f012e29Smrg	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
17943f012e29Smrg	CU_ASSERT_NOT_EQUAL(ptr, NULL);
17953f012e29Smrg	memset(ptr, 0, BUFFER_SIZE);
17963f012e29Smrg
17973f012e29Smrg	r = amdgpu_create_bo_from_user_mem(device_handle,
17983f012e29Smrg					   ptr, BUFFER_SIZE, &buf_handle);
17993f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18003f012e29Smrg
18013f012e29Smrg	r = amdgpu_va_range_alloc(device_handle,
18023f012e29Smrg				  amdgpu_gpu_va_range_general,
18033f012e29Smrg				  BUFFER_SIZE, 1, 0, &bo_mc,
18043f012e29Smrg				  &va_handle, 0);
18053f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18063f012e29Smrg
18073f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
18083f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18093f012e29Smrg
18103f012e29Smrg	handle = buf_handle;
18113f012e29Smrg
18123f012e29Smrg	j = i = 0;
1813d8807b2fSmrg
1814d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI)
1815d8807b2fSmrg		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1816d8807b2fSmrg				sdma_write_length);
1817d8807b2fSmrg	else
1818d8807b2fSmrg		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1819d8807b2fSmrg				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
18203f012e29Smrg	pm4[i++] = 0xffffffff & bo_mc;
18213f012e29Smrg	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1822d8807b2fSmrg	if (family_id >= AMDGPU_FAMILY_AI)
1823d8807b2fSmrg		pm4[i++] = sdma_write_length - 1;
1824d8807b2fSmrg	else if (family_id != AMDGPU_FAMILY_SI)
1825d8807b2fSmrg		pm4[i++] = sdma_write_length;
18263f012e29Smrg
18273f012e29Smrg	while (j++ < sdma_write_length)
18283f012e29Smrg		pm4[i++] = 0xdeadbeaf;
18293f012e29Smrg
183000a23bdaSmrg	if (!fork()) {
183100a23bdaSmrg		pm4[0] = 0x0;
183200a23bdaSmrg		exit(0);
183300a23bdaSmrg	}
183400a23bdaSmrg
18353f012e29Smrg	amdgpu_test_exec_cs_helper(context_handle,
18363f012e29Smrg				   AMDGPU_HW_IP_DMA, 0,
18373f012e29Smrg				   i, pm4,
18383f012e29Smrg				   1, &handle,
18393f012e29Smrg				   ib_info, ibs_request);
18403f012e29Smrg	i = 0;
18413f012e29Smrg	while (i < sdma_write_length) {
18423f012e29Smrg		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
18433f012e29Smrg	}
18443f012e29Smrg	free(ibs_request);
18453f012e29Smrg	free(ib_info);
18463f012e29Smrg	free(pm4);
18473f012e29Smrg
18483f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
18493f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18503f012e29Smrg	r = amdgpu_va_range_free(va_handle);
18513f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18523f012e29Smrg	r = amdgpu_bo_free(buf_handle);
18533f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18543f012e29Smrg	free(ptr);
18553f012e29Smrg
18563f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
18573f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
185800a23bdaSmrg
185900a23bdaSmrg	wait(NULL);
186000a23bdaSmrg}
186100a23bdaSmrg
186200a23bdaSmrgstatic void amdgpu_sync_dependency_test(void)
186300a23bdaSmrg{
186400a23bdaSmrg	amdgpu_context_handle context_handle[2];
186500a23bdaSmrg	amdgpu_bo_handle ib_result_handle;
186600a23bdaSmrg	void *ib_result_cpu;
186700a23bdaSmrg	uint64_t ib_result_mc_address;
186800a23bdaSmrg	struct amdgpu_cs_request ibs_request;
186900a23bdaSmrg	struct amdgpu_cs_ib_info ib_info;
187000a23bdaSmrg	struct amdgpu_cs_fence fence_status;
187100a23bdaSmrg	uint32_t expired;
187200a23bdaSmrg	int i, j, r;
187300a23bdaSmrg	amdgpu_bo_list_handle bo_list;
187400a23bdaSmrg	amdgpu_va_handle va_handle;
187500a23bdaSmrg	static uint32_t *ptr;
187600a23bdaSmrg	uint64_t seq_no;
187700a23bdaSmrg
187800a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
187900a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
188000a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
188100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
188200a23bdaSmrg
188300a23bdaSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
188400a23bdaSmrg			AMDGPU_GEM_DOMAIN_GTT, 0,
188500a23bdaSmrg						    &ib_result_handle, &ib_result_cpu,
188600a23bdaSmrg						    &ib_result_mc_address, &va_handle);
188700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
188800a23bdaSmrg
188900a23bdaSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
189000a23bdaSmrg			       &bo_list);
189100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
189200a23bdaSmrg
189300a23bdaSmrg	ptr = ib_result_cpu;
189400a23bdaSmrg	i = 0;
189500a23bdaSmrg
189600a23bdaSmrg	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
189700a23bdaSmrg
189800a23bdaSmrg	/* Dispatch minimal init config and verify it's executed */
189900a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
190000a23bdaSmrg	ptr[i++] = 0x80000000;
190100a23bdaSmrg	ptr[i++] = 0x80000000;
190200a23bdaSmrg
190300a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
190400a23bdaSmrg	ptr[i++] = 0x80000000;
190500a23bdaSmrg
190600a23bdaSmrg
190700a23bdaSmrg	/* Program compute regs */
190800a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
190900a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
191000a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
191100a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
191200a23bdaSmrg
191300a23bdaSmrg
191400a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
191500a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
191600a23bdaSmrg	/*
191700a23bdaSmrg	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
191800a23bdaSmrg	                                      SGPRS = 1
191900a23bdaSmrg	                                      PRIORITY = 0
192000a23bdaSmrg	                                      FLOAT_MODE = 192 (0xc0)
192100a23bdaSmrg	                                      PRIV = 0
192200a23bdaSmrg	                                      DX10_CLAMP = 1
192300a23bdaSmrg	                                      DEBUG_MODE = 0
192400a23bdaSmrg	                                      IEEE_MODE = 0
192500a23bdaSmrg	                                      BULKY = 0
192600a23bdaSmrg	                                      CDBG_USER = 0
192700a23bdaSmrg	 *
192800a23bdaSmrg	 */
192900a23bdaSmrg	ptr[i++] = 0x002c0040;
193000a23bdaSmrg
193100a23bdaSmrg
193200a23bdaSmrg	/*
193300a23bdaSmrg	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
193400a23bdaSmrg	                                      USER_SGPR = 8
193500a23bdaSmrg	                                      TRAP_PRESENT = 0
193600a23bdaSmrg	                                      TGID_X_EN = 0
193700a23bdaSmrg	                                      TGID_Y_EN = 0
193800a23bdaSmrg	                                      TGID_Z_EN = 0
193900a23bdaSmrg	                                      TG_SIZE_EN = 0
194000a23bdaSmrg	                                      TIDIG_COMP_CNT = 0
194100a23bdaSmrg	                                      EXCP_EN_MSB = 0
194200a23bdaSmrg	                                      LDS_SIZE = 0
194300a23bdaSmrg	                                      EXCP_EN = 0
194400a23bdaSmrg	 *
194500a23bdaSmrg	 */
194600a23bdaSmrg	ptr[i++] = 0x00000010;
194700a23bdaSmrg
194800a23bdaSmrg
194900a23bdaSmrg/*
195000a23bdaSmrg * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
195100a23bdaSmrg                                         WAVESIZE = 0
195200a23bdaSmrg *
195300a23bdaSmrg */
195400a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
195500a23bdaSmrg	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
195600a23bdaSmrg	ptr[i++] = 0x00000100;
195700a23bdaSmrg
195800a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
195900a23bdaSmrg	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
196000a23bdaSmrg	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
196100a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
196200a23bdaSmrg
196300a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
196400a23bdaSmrg	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
196500a23bdaSmrg	ptr[i++] = 0;
196600a23bdaSmrg
196700a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
196800a23bdaSmrg	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
196900a23bdaSmrg	ptr[i++] = 1;
197000a23bdaSmrg	ptr[i++] = 1;
197100a23bdaSmrg	ptr[i++] = 1;
197200a23bdaSmrg
197300a23bdaSmrg
197400a23bdaSmrg	/* Dispatch */
197500a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
197600a23bdaSmrg	ptr[i++] = 1;
197700a23bdaSmrg	ptr[i++] = 1;
197800a23bdaSmrg	ptr[i++] = 1;
197900a23bdaSmrg	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
198000a23bdaSmrg
198100a23bdaSmrg
198200a23bdaSmrg	while (i & 7)
198300a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
198400a23bdaSmrg
198500a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
198600a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address;
198700a23bdaSmrg	ib_info.size = i;
198800a23bdaSmrg
198900a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
199000a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
199100a23bdaSmrg	ibs_request.ring = 0;
199200a23bdaSmrg	ibs_request.number_of_ibs = 1;
199300a23bdaSmrg	ibs_request.ibs = &ib_info;
199400a23bdaSmrg	ibs_request.resources = bo_list;
199500a23bdaSmrg	ibs_request.fence_info.handle = NULL;
199600a23bdaSmrg
199700a23bdaSmrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
199800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
199900a23bdaSmrg	seq_no = ibs_request.seq_no;
200000a23bdaSmrg
200100a23bdaSmrg
200200a23bdaSmrg
200300a23bdaSmrg	/* Prepare second command with dependency on the first */
200400a23bdaSmrg	j = i;
200500a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
200600a23bdaSmrg	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
200700a23bdaSmrg	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
200800a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
200900a23bdaSmrg	ptr[i++] = 99;
201000a23bdaSmrg
201100a23bdaSmrg	while (i & 7)
201200a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
201300a23bdaSmrg
201400a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
201500a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
201600a23bdaSmrg	ib_info.size = i - j;
201700a23bdaSmrg
201800a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
201900a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
202000a23bdaSmrg	ibs_request.ring = 0;
202100a23bdaSmrg	ibs_request.number_of_ibs = 1;
202200a23bdaSmrg	ibs_request.ibs = &ib_info;
202300a23bdaSmrg	ibs_request.resources = bo_list;
202400a23bdaSmrg	ibs_request.fence_info.handle = NULL;
202500a23bdaSmrg
202600a23bdaSmrg	ibs_request.number_of_dependencies = 1;
202700a23bdaSmrg
202800a23bdaSmrg	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
202900a23bdaSmrg	ibs_request.dependencies[0].context = context_handle[1];
203000a23bdaSmrg	ibs_request.dependencies[0].ip_instance = 0;
203100a23bdaSmrg	ibs_request.dependencies[0].ring = 0;
203200a23bdaSmrg	ibs_request.dependencies[0].fence = seq_no;
203300a23bdaSmrg
203400a23bdaSmrg
203500a23bdaSmrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
203600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
203700a23bdaSmrg
203800a23bdaSmrg
203900a23bdaSmrg	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
204000a23bdaSmrg	fence_status.context = context_handle[0];
204100a23bdaSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
204200a23bdaSmrg	fence_status.ip_instance = 0;
204300a23bdaSmrg	fence_status.ring = 0;
204400a23bdaSmrg	fence_status.fence = ibs_request.seq_no;
204500a23bdaSmrg
204600a23bdaSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
204700a23bdaSmrg		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
204800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
204900a23bdaSmrg
205000a23bdaSmrg	/* Expect the second command to wait for shader to complete */
205100a23bdaSmrg	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
205200a23bdaSmrg
205300a23bdaSmrg	r = amdgpu_bo_list_destroy(bo_list);
205400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
205500a23bdaSmrg
205600a23bdaSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
205700a23bdaSmrg				     ib_result_mc_address, 4096);
205800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
205900a23bdaSmrg
206000a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[0]);
206100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
206200a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[1]);
206300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
206400a23bdaSmrg
206500a23bdaSmrg	free(ibs_request.dependencies);
20663f012e29Smrg}
20675324fb0dSmrg
20685324fb0dSmrgstatic int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
20695324fb0dSmrg					   int cs_type)
20705324fb0dSmrg{
20715324fb0dSmrg	uint32_t shader_size;
20725324fb0dSmrg	const uint32_t *shader;
20735324fb0dSmrg
20745324fb0dSmrg	switch (cs_type) {
20755324fb0dSmrg		case CS_BUFFERCLEAR:
20765324fb0dSmrg			shader = bufferclear_cs_shader_gfx9;
20775324fb0dSmrg			shader_size = sizeof(bufferclear_cs_shader_gfx9);
20785324fb0dSmrg			break;
20795324fb0dSmrg		case CS_BUFFERCOPY:
20805324fb0dSmrg			shader = buffercopy_cs_shader_gfx9;
20815324fb0dSmrg			shader_size = sizeof(buffercopy_cs_shader_gfx9);
20825324fb0dSmrg			break;
20835324fb0dSmrg		default:
20845324fb0dSmrg			return -1;
20855324fb0dSmrg			break;
20865324fb0dSmrg	}
20875324fb0dSmrg
20885324fb0dSmrg	memcpy(ptr, shader, shader_size);
20895324fb0dSmrg	return 0;
20905324fb0dSmrg}
20915324fb0dSmrg
20925324fb0dSmrgstatic int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
20935324fb0dSmrg{
20945324fb0dSmrg	int i = 0;
20955324fb0dSmrg
20965324fb0dSmrg	/* Write context control and load shadowing register if necessary */
20975324fb0dSmrg	if (ip_type == AMDGPU_HW_IP_GFX) {
20985324fb0dSmrg		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
20995324fb0dSmrg		ptr[i++] = 0x80000000;
21005324fb0dSmrg		ptr[i++] = 0x80000000;
21015324fb0dSmrg	}
21025324fb0dSmrg
21035324fb0dSmrg	/* Issue commands to set default compute state. */
21045324fb0dSmrg	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
21055324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
21065324fb0dSmrg	ptr[i++] = 0x204;
21075324fb0dSmrg	i += 3;
210888f8a8d2Smrg
21095324fb0dSmrg	/* clear mmCOMPUTE_TMPRING_SIZE */
21105324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
21115324fb0dSmrg	ptr[i++] = 0x218;
21125324fb0dSmrg	ptr[i++] = 0;
21135324fb0dSmrg
21145324fb0dSmrg	return i;
21155324fb0dSmrg}
21165324fb0dSmrg
21175324fb0dSmrgstatic int amdgpu_dispatch_write_cumask(uint32_t *ptr)
21185324fb0dSmrg{
21195324fb0dSmrg	int i = 0;
21205324fb0dSmrg
21215324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
21225324fb0dSmrg	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
21235324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
21245324fb0dSmrg	ptr[i++] = 0x216;
21255324fb0dSmrg	ptr[i++] = 0xffffffff;
21265324fb0dSmrg	ptr[i++] = 0xffffffff;
21275324fb0dSmrg	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
21285324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
21295324fb0dSmrg	ptr[i++] = 0x219;
21305324fb0dSmrg	ptr[i++] = 0xffffffff;
21315324fb0dSmrg	ptr[i++] = 0xffffffff;
21325324fb0dSmrg
21335324fb0dSmrg	return i;
21345324fb0dSmrg}
21355324fb0dSmrg
21365324fb0dSmrgstatic int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
21375324fb0dSmrg{
21385324fb0dSmrg	int i, j;
21395324fb0dSmrg
21405324fb0dSmrg	i = 0;
21415324fb0dSmrg
21425324fb0dSmrg	/* Writes shader state to HW */
21435324fb0dSmrg	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
21445324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
21455324fb0dSmrg	ptr[i++] = 0x20c;
21465324fb0dSmrg	ptr[i++] = (shader_addr >> 8);
21475324fb0dSmrg	ptr[i++] = (shader_addr >> 40);
21485324fb0dSmrg	/* write sh regs*/
21495324fb0dSmrg	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
21505324fb0dSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
21515324fb0dSmrg		/* - Gfx9ShRegBase */
21525324fb0dSmrg		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
21535324fb0dSmrg		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
21545324fb0dSmrg	}
21555324fb0dSmrg
21565324fb0dSmrg	return i;
21575324fb0dSmrg}
21585324fb0dSmrg
21595324fb0dSmrgstatic void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
21605324fb0dSmrg					 uint32_t ip_type,
21615324fb0dSmrg					 uint32_t ring)
21625324fb0dSmrg{
21635324fb0dSmrg	amdgpu_context_handle context_handle;
21645324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
21655324fb0dSmrg	volatile unsigned char *ptr_dst;
21665324fb0dSmrg	void *ptr_shader;
21675324fb0dSmrg	uint32_t *ptr_cmd;
21685324fb0dSmrg	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
21695324fb0dSmrg	amdgpu_va_handle va_dst, va_shader, va_cmd;
21705324fb0dSmrg	int i, r;
21715324fb0dSmrg	int bo_dst_size = 16384;
21725324fb0dSmrg	int bo_shader_size = 4096;
21735324fb0dSmrg	int bo_cmd_size = 4096;
21745324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
21755324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
21765324fb0dSmrg	amdgpu_bo_list_handle bo_list;
21775324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
21785324fb0dSmrg	uint32_t expired;
21795324fb0dSmrg
21805324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
21815324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
21825324fb0dSmrg
21835324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
21845324fb0dSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
21855324fb0dSmrg					&bo_cmd, (void **)&ptr_cmd,
21865324fb0dSmrg					&mc_address_cmd, &va_cmd);
21875324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
21885324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
21895324fb0dSmrg
21905324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
21915324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
21925324fb0dSmrg					&bo_shader, &ptr_shader,
21935324fb0dSmrg					&mc_address_shader, &va_shader);
21945324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
219588f8a8d2Smrg	memset(ptr_shader, 0, bo_shader_size);
21965324fb0dSmrg
21975324fb0dSmrg	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
21985324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
21995324fb0dSmrg
22005324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
22015324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
22025324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
22035324fb0dSmrg					&mc_address_dst, &va_dst);
22045324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22055324fb0dSmrg
22065324fb0dSmrg	i = 0;
22075324fb0dSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
22085324fb0dSmrg
22095324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
22105324fb0dSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
22115324fb0dSmrg
22125324fb0dSmrg	/* Writes shader state to HW */
22135324fb0dSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
22145324fb0dSmrg
22155324fb0dSmrg	/* Write constant data */
22165324fb0dSmrg	/* Writes the UAV constant data to the SGPRs. */
22175324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
22185324fb0dSmrg	ptr_cmd[i++] = 0x240;
22195324fb0dSmrg	ptr_cmd[i++] = mc_address_dst;
22205324fb0dSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
22215324fb0dSmrg	ptr_cmd[i++] = 0x400;
22225324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
22235324fb0dSmrg
22245324fb0dSmrg	/* Sets a range of pixel shader constants */
22255324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
22265324fb0dSmrg	ptr_cmd[i++] = 0x244;
22275324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
22285324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
22295324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
22305324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
22315324fb0dSmrg
223288f8a8d2Smrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
223388f8a8d2Smrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
223488f8a8d2Smrg	ptr_cmd[i++] = 0x215;
223588f8a8d2Smrg	ptr_cmd[i++] = 0;
223688f8a8d2Smrg
22375324fb0dSmrg	/* dispatch direct command */
22385324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
22395324fb0dSmrg	ptr_cmd[i++] = 0x10;
22405324fb0dSmrg	ptr_cmd[i++] = 1;
22415324fb0dSmrg	ptr_cmd[i++] = 1;
22425324fb0dSmrg	ptr_cmd[i++] = 1;
22435324fb0dSmrg
22445324fb0dSmrg	while (i & 7)
22455324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
22465324fb0dSmrg
22475324fb0dSmrg	resources[0] = bo_dst;
22485324fb0dSmrg	resources[1] = bo_shader;
22495324fb0dSmrg	resources[2] = bo_cmd;
22505324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
22515324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22525324fb0dSmrg
22535324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
22545324fb0dSmrg	ib_info.size = i;
22555324fb0dSmrg	ibs_request.ip_type = ip_type;
22565324fb0dSmrg	ibs_request.ring = ring;
22575324fb0dSmrg	ibs_request.resources = bo_list;
22585324fb0dSmrg	ibs_request.number_of_ibs = 1;
22595324fb0dSmrg	ibs_request.ibs = &ib_info;
22605324fb0dSmrg	ibs_request.fence_info.handle = NULL;
22615324fb0dSmrg
22625324fb0dSmrg	/* submit CS */
22635324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
22645324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22655324fb0dSmrg
22665324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
22675324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22685324fb0dSmrg
22695324fb0dSmrg	fence_status.ip_type = ip_type;
22705324fb0dSmrg	fence_status.ip_instance = 0;
22715324fb0dSmrg	fence_status.ring = ring;
22725324fb0dSmrg	fence_status.context = context_handle;
22735324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
22745324fb0dSmrg
22755324fb0dSmrg	/* wait for IB accomplished */
22765324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
22775324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
22785324fb0dSmrg					 0, &expired);
22795324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22805324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
22815324fb0dSmrg
22825324fb0dSmrg	/* verify if memset test result meets with expected */
22835324fb0dSmrg	i = 0;
22845324fb0dSmrg	while(i < bo_dst_size) {
22855324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
22865324fb0dSmrg	}
22875324fb0dSmrg
22885324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
22895324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22905324fb0dSmrg
22915324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
22925324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22935324fb0dSmrg
22945324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
22955324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22965324fb0dSmrg
22975324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
22985324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22995324fb0dSmrg}
23005324fb0dSmrg
23015324fb0dSmrgstatic void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
23025324fb0dSmrg					uint32_t ip_type,
23035324fb0dSmrg					uint32_t ring)
23045324fb0dSmrg{
23055324fb0dSmrg	amdgpu_context_handle context_handle;
23065324fb0dSmrg	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
23075324fb0dSmrg	volatile unsigned char *ptr_dst;
23085324fb0dSmrg	void *ptr_shader;
23095324fb0dSmrg	unsigned char *ptr_src;
23105324fb0dSmrg	uint32_t *ptr_cmd;
23115324fb0dSmrg	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
23125324fb0dSmrg	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
23135324fb0dSmrg	int i, r;
23145324fb0dSmrg	int bo_dst_size = 16384;
23155324fb0dSmrg	int bo_shader_size = 4096;
23165324fb0dSmrg	int bo_cmd_size = 4096;
23175324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
23185324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
23195324fb0dSmrg	uint32_t expired;
23205324fb0dSmrg	amdgpu_bo_list_handle bo_list;
23215324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
23225324fb0dSmrg
23235324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
23245324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23255324fb0dSmrg
23265324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
23275324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
23285324fb0dSmrg				    &bo_cmd, (void **)&ptr_cmd,
23295324fb0dSmrg				    &mc_address_cmd, &va_cmd);
23305324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23315324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
23325324fb0dSmrg
23335324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
23345324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
23355324fb0dSmrg					&bo_shader, &ptr_shader,
23365324fb0dSmrg					&mc_address_shader, &va_shader);
23375324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
233888f8a8d2Smrg	memset(ptr_shader, 0, bo_shader_size);
23395324fb0dSmrg
23405324fb0dSmrg	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCOPY );
23415324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23425324fb0dSmrg
23435324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
23445324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
23455324fb0dSmrg					&bo_src, (void **)&ptr_src,
23465324fb0dSmrg					&mc_address_src, &va_src);
23475324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23485324fb0dSmrg
23495324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
23505324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
23515324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
23525324fb0dSmrg					&mc_address_dst, &va_dst);
23535324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23545324fb0dSmrg
23555324fb0dSmrg	memset(ptr_src, 0x55, bo_dst_size);
23565324fb0dSmrg
23575324fb0dSmrg	i = 0;
23585324fb0dSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
23595324fb0dSmrg
23605324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
23615324fb0dSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
23625324fb0dSmrg
23635324fb0dSmrg	/* Writes shader state to HW */
23645324fb0dSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
23655324fb0dSmrg
23665324fb0dSmrg	/* Write constant data */
23675324fb0dSmrg	/* Writes the texture resource constants data to the SGPRs */
23685324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
23695324fb0dSmrg	ptr_cmd[i++] = 0x240;
23705324fb0dSmrg	ptr_cmd[i++] = mc_address_src;
23715324fb0dSmrg	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
23725324fb0dSmrg	ptr_cmd[i++] = 0x400;
23735324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
23745324fb0dSmrg
23755324fb0dSmrg	/* Writes the UAV constant data to the SGPRs. */
23765324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
23775324fb0dSmrg	ptr_cmd[i++] = 0x244;
23785324fb0dSmrg	ptr_cmd[i++] = mc_address_dst;
23795324fb0dSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
23805324fb0dSmrg	ptr_cmd[i++] = 0x400;
23815324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
23825324fb0dSmrg
238388f8a8d2Smrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
238488f8a8d2Smrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
238588f8a8d2Smrg	ptr_cmd[i++] = 0x215;
238688f8a8d2Smrg	ptr_cmd[i++] = 0;
238788f8a8d2Smrg
23885324fb0dSmrg	/* dispatch direct command */
23895324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
23905324fb0dSmrg	ptr_cmd[i++] = 0x10;
23915324fb0dSmrg	ptr_cmd[i++] = 1;
23925324fb0dSmrg	ptr_cmd[i++] = 1;
23935324fb0dSmrg	ptr_cmd[i++] = 1;
23945324fb0dSmrg
23955324fb0dSmrg	while (i & 7)
23965324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
23975324fb0dSmrg
23985324fb0dSmrg	resources[0] = bo_shader;
23995324fb0dSmrg	resources[1] = bo_src;
24005324fb0dSmrg	resources[2] = bo_dst;
24015324fb0dSmrg	resources[3] = bo_cmd;
24025324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
24035324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24045324fb0dSmrg
24055324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
24065324fb0dSmrg	ib_info.size = i;
24075324fb0dSmrg	ibs_request.ip_type = ip_type;
24085324fb0dSmrg	ibs_request.ring = ring;
24095324fb0dSmrg	ibs_request.resources = bo_list;
24105324fb0dSmrg	ibs_request.number_of_ibs = 1;
24115324fb0dSmrg	ibs_request.ibs = &ib_info;
24125324fb0dSmrg	ibs_request.fence_info.handle = NULL;
24135324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
24145324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24155324fb0dSmrg
24165324fb0dSmrg	fence_status.ip_type = ip_type;
24175324fb0dSmrg	fence_status.ip_instance = 0;
24185324fb0dSmrg	fence_status.ring = ring;
24195324fb0dSmrg	fence_status.context = context_handle;
24205324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
24215324fb0dSmrg
24225324fb0dSmrg	/* wait for IB accomplished */
24235324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
24245324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
24255324fb0dSmrg					 0, &expired);
24265324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24275324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
24285324fb0dSmrg
24295324fb0dSmrg	/* verify if memcpy test result meets with expected */
24305324fb0dSmrg	i = 0;
24315324fb0dSmrg	while(i < bo_dst_size) {
24325324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
24335324fb0dSmrg		i++;
24345324fb0dSmrg	}
24355324fb0dSmrg
24365324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
24375324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24385324fb0dSmrg
24395324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
24405324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24415324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
24425324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24435324fb0dSmrg
24445324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
24455324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24465324fb0dSmrg
24475324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
24485324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24495324fb0dSmrg
24505324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
24515324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24525324fb0dSmrg}
245388f8a8d2Smrg
245488f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void)
24555324fb0dSmrg{
24565324fb0dSmrg	int r;
24575324fb0dSmrg	struct drm_amdgpu_info_hw_ip info;
24585324fb0dSmrg	uint32_t ring_id;
24595324fb0dSmrg
24605324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
24615324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
246288f8a8d2Smrg	if (!info.available_rings)
246388f8a8d2Smrg		printf("SKIP ... as there's no compute ring\n");
24645324fb0dSmrg
24655324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
24665324fb0dSmrg		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
24675324fb0dSmrg		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
24685324fb0dSmrg	}
246988f8a8d2Smrg}
247088f8a8d2Smrg
247188f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void)
247288f8a8d2Smrg{
247388f8a8d2Smrg	int r;
247488f8a8d2Smrg	struct drm_amdgpu_info_hw_ip info;
247588f8a8d2Smrg	uint32_t ring_id;
24765324fb0dSmrg
24775324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
24785324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
247988f8a8d2Smrg	if (!info.available_rings)
248088f8a8d2Smrg		printf("SKIP ... as there's no graphics ring\n");
24815324fb0dSmrg
24825324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
24835324fb0dSmrg		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
24845324fb0dSmrg		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
24855324fb0dSmrg	}
24865324fb0dSmrg}
24875324fb0dSmrg
24885324fb0dSmrgstatic int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
24895324fb0dSmrg{
24905324fb0dSmrg	int i;
24915324fb0dSmrg	uint32_t shader_offset= 256;
24925324fb0dSmrg	uint32_t mem_offset, patch_code_offset;
24935324fb0dSmrg	uint32_t shader_size, patchinfo_code_size;
24945324fb0dSmrg	const uint32_t *shader;
24955324fb0dSmrg	const uint32_t *patchinfo_code;
24965324fb0dSmrg	const uint32_t *patchcode_offset;
24975324fb0dSmrg
24985324fb0dSmrg	switch (ps_type) {
24995324fb0dSmrg		case PS_CONST:
25005324fb0dSmrg			shader = ps_const_shader_gfx9;
25015324fb0dSmrg			shader_size = sizeof(ps_const_shader_gfx9);
25025324fb0dSmrg			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
25035324fb0dSmrg			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
25045324fb0dSmrg			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
25055324fb0dSmrg			break;
25065324fb0dSmrg		case PS_TEX:
25075324fb0dSmrg			shader = ps_tex_shader_gfx9;
25085324fb0dSmrg			shader_size = sizeof(ps_tex_shader_gfx9);
25095324fb0dSmrg			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
25105324fb0dSmrg			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
25115324fb0dSmrg			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
25125324fb0dSmrg			break;
25135324fb0dSmrg		default:
25145324fb0dSmrg			return -1;
25155324fb0dSmrg			break;
25165324fb0dSmrg	}
25175324fb0dSmrg
25185324fb0dSmrg	/* write main shader program */
25195324fb0dSmrg	for (i = 0 ; i < 10; i++) {
25205324fb0dSmrg		mem_offset = i * shader_offset;
25215324fb0dSmrg		memcpy(ptr + mem_offset, shader, shader_size);
25225324fb0dSmrg	}
25235324fb0dSmrg
25245324fb0dSmrg	/* overwrite patch codes */
25255324fb0dSmrg	for (i = 0 ; i < 10; i++) {
25265324fb0dSmrg		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
25275324fb0dSmrg		patch_code_offset = i * patchinfo_code_size;
25285324fb0dSmrg		memcpy(ptr + mem_offset,
25295324fb0dSmrg			patchinfo_code + patch_code_offset,
25305324fb0dSmrg			patchinfo_code_size * sizeof(uint32_t));
25315324fb0dSmrg	}
25325324fb0dSmrg
25335324fb0dSmrg	return 0;
25345324fb0dSmrg}
25355324fb0dSmrg
25365324fb0dSmrg/* load RectPosTexFast_VS */
25375324fb0dSmrgstatic int amdgpu_draw_load_vs_shader(uint8_t *ptr)
25385324fb0dSmrg{
25395324fb0dSmrg	const uint32_t *shader;
25405324fb0dSmrg	uint32_t shader_size;
25415324fb0dSmrg
25425324fb0dSmrg	shader = vs_RectPosTexFast_shader_gfx9;
25435324fb0dSmrg	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
25445324fb0dSmrg
25455324fb0dSmrg	memcpy(ptr, shader, shader_size);
25465324fb0dSmrg
25475324fb0dSmrg	return 0;
25485324fb0dSmrg}
25495324fb0dSmrg
25505324fb0dSmrgstatic int amdgpu_draw_init(uint32_t *ptr)
25515324fb0dSmrg{
25525324fb0dSmrg	int i = 0;
25535324fb0dSmrg	const uint32_t *preamblecache_ptr;
25545324fb0dSmrg	uint32_t preamblecache_size;
25555324fb0dSmrg
25565324fb0dSmrg	/* Write context control and load shadowing register if necessary */
25575324fb0dSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
25585324fb0dSmrg	ptr[i++] = 0x80000000;
25595324fb0dSmrg	ptr[i++] = 0x80000000;
25605324fb0dSmrg
25615324fb0dSmrg	preamblecache_ptr = preamblecache_gfx9;
25625324fb0dSmrg	preamblecache_size = sizeof(preamblecache_gfx9);
25635324fb0dSmrg
25645324fb0dSmrg	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
25655324fb0dSmrg	return i + preamblecache_size/sizeof(uint32_t);
25665324fb0dSmrg}
25675324fb0dSmrg
25685324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
25695324fb0dSmrg							 uint64_t dst_addr)
25705324fb0dSmrg{
25715324fb0dSmrg	int i = 0;
25725324fb0dSmrg
25735324fb0dSmrg	/* setup color buffer */
25745324fb0dSmrg	/* offset   reg
25755324fb0dSmrg	   0xA318   CB_COLOR0_BASE
25765324fb0dSmrg	   0xA319   CB_COLOR0_BASE_EXT
25775324fb0dSmrg	   0xA31A   CB_COLOR0_ATTRIB2
25785324fb0dSmrg	   0xA31B   CB_COLOR0_VIEW
25795324fb0dSmrg	   0xA31C   CB_COLOR0_INFO
25805324fb0dSmrg	   0xA31D   CB_COLOR0_ATTRIB
25815324fb0dSmrg	   0xA31E   CB_COLOR0_DCC_CONTROL
25825324fb0dSmrg	   0xA31F   CB_COLOR0_CMASK
25835324fb0dSmrg	   0xA320   CB_COLOR0_CMASK_BASE_EXT
25845324fb0dSmrg	   0xA321   CB_COLOR0_FMASK
25855324fb0dSmrg	   0xA322   CB_COLOR0_FMASK_BASE_EXT
25865324fb0dSmrg	   0xA323   CB_COLOR0_CLEAR_WORD0
25875324fb0dSmrg	   0xA324   CB_COLOR0_CLEAR_WORD1
25885324fb0dSmrg	   0xA325   CB_COLOR0_DCC_BASE
25895324fb0dSmrg	   0xA326   CB_COLOR0_DCC_BASE_EXT */
25905324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
25915324fb0dSmrg	ptr[i++] = 0x318;
25925324fb0dSmrg	ptr[i++] = dst_addr >> 8;
25935324fb0dSmrg	ptr[i++] = dst_addr >> 40;
25945324fb0dSmrg	ptr[i++] = 0x7c01f;
25955324fb0dSmrg	ptr[i++] = 0;
25965324fb0dSmrg	ptr[i++] = 0x50438;
25975324fb0dSmrg	ptr[i++] = 0x10140000;
25985324fb0dSmrg	i += 9;
25995324fb0dSmrg
26005324fb0dSmrg	/* mmCB_MRT0_EPITCH */
26015324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26025324fb0dSmrg	ptr[i++] = 0x1e8;
26035324fb0dSmrg	ptr[i++] = 0x1f;
26045324fb0dSmrg
26055324fb0dSmrg	/* 0xA32B   CB_COLOR1_BASE */
26065324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26075324fb0dSmrg	ptr[i++] = 0x32b;
26085324fb0dSmrg	ptr[i++] = 0;
26095324fb0dSmrg
26105324fb0dSmrg	/* 0xA33A   CB_COLOR1_BASE */
26115324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26125324fb0dSmrg	ptr[i++] = 0x33a;
26135324fb0dSmrg	ptr[i++] = 0;
26145324fb0dSmrg
26155324fb0dSmrg	/* SPI_SHADER_COL_FORMAT */
26165324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26175324fb0dSmrg	ptr[i++] = 0x1c5;
26185324fb0dSmrg	ptr[i++] = 9;
26195324fb0dSmrg
26205324fb0dSmrg	/* Setup depth buffer */
26215324fb0dSmrg	/* mmDB_Z_INFO */
26225324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
26235324fb0dSmrg	ptr[i++] = 0xe;
26245324fb0dSmrg	i += 2;
26255324fb0dSmrg
26265324fb0dSmrg	return i;
26275324fb0dSmrg}
26285324fb0dSmrg
26295324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr)
26305324fb0dSmrg{
26315324fb0dSmrg	int i = 0;
26325324fb0dSmrg	const uint32_t *cached_cmd_ptr;
26335324fb0dSmrg	uint32_t cached_cmd_size;
26345324fb0dSmrg
26355324fb0dSmrg	/* mmPA_SC_TILE_STEERING_OVERRIDE */
26365324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26375324fb0dSmrg	ptr[i++] = 0xd7;
26385324fb0dSmrg	ptr[i++] = 0;
26395324fb0dSmrg
26405324fb0dSmrg	ptr[i++] = 0xffff1000;
26415324fb0dSmrg	ptr[i++] = 0xc0021000;
26425324fb0dSmrg
26435324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26445324fb0dSmrg	ptr[i++] = 0xd7;
26455324fb0dSmrg	ptr[i++] = 1;
26465324fb0dSmrg
26475324fb0dSmrg	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
26485324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
26495324fb0dSmrg	ptr[i++] = 0x2fe;
26505324fb0dSmrg	i += 16;
26515324fb0dSmrg
26525324fb0dSmrg	/* mmPA_SC_CENTROID_PRIORITY_0 */
26535324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
26545324fb0dSmrg	ptr[i++] = 0x2f5;
26555324fb0dSmrg	i += 2;
26565324fb0dSmrg
26575324fb0dSmrg	cached_cmd_ptr = cached_cmd_gfx9;
26585324fb0dSmrg	cached_cmd_size = sizeof(cached_cmd_gfx9);
26595324fb0dSmrg
26605324fb0dSmrg	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
26615324fb0dSmrg	i += cached_cmd_size/sizeof(uint32_t);
26625324fb0dSmrg
26635324fb0dSmrg	return i;
26645324fb0dSmrg}
26655324fb0dSmrg
26665324fb0dSmrgstatic int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
26675324fb0dSmrg						  int ps_type,
26685324fb0dSmrg						  uint64_t shader_addr)
26695324fb0dSmrg{
26705324fb0dSmrg	int i = 0;
26715324fb0dSmrg
26725324fb0dSmrg	/* mmPA_CL_VS_OUT_CNTL */
26735324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26745324fb0dSmrg	ptr[i++] = 0x207;
26755324fb0dSmrg	ptr[i++] = 0;
26765324fb0dSmrg
26775324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC3_VS */
26785324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
26795324fb0dSmrg	ptr[i++] = 0x46;
26805324fb0dSmrg	ptr[i++] = 0xffff;
26815324fb0dSmrg
26825324fb0dSmrg	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
26835324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
26845324fb0dSmrg	ptr[i++] = 0x48;
26855324fb0dSmrg	ptr[i++] = shader_addr >> 8;
26865324fb0dSmrg	ptr[i++] = shader_addr >> 40;
26875324fb0dSmrg
26885324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC1_VS */
26895324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
26905324fb0dSmrg	ptr[i++] = 0x4a;
26915324fb0dSmrg	ptr[i++] = 0xc0081;
26925324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC2_VS */
26935324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
26945324fb0dSmrg	ptr[i++] = 0x4b;
26955324fb0dSmrg	ptr[i++] = 0x18;
26965324fb0dSmrg
26975324fb0dSmrg	/* mmSPI_VS_OUT_CONFIG */
26985324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26995324fb0dSmrg	ptr[i++] = 0x1b1;
27005324fb0dSmrg	ptr[i++] = 2;
27015324fb0dSmrg
27025324fb0dSmrg	/* mmSPI_SHADER_POS_FORMAT */
27035324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
27045324fb0dSmrg	ptr[i++] = 0x1c3;
27055324fb0dSmrg	ptr[i++] = 4;
27065324fb0dSmrg
27075324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
27085324fb0dSmrg	ptr[i++] = 0x4c;
27095324fb0dSmrg	i += 2;
27105324fb0dSmrg	ptr[i++] = 0x42000000;
27115324fb0dSmrg	ptr[i++] = 0x42000000;
27125324fb0dSmrg
27135324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
27145324fb0dSmrg	ptr[i++] = 0x50;
27155324fb0dSmrg	i += 2;
27165324fb0dSmrg	if (ps_type == PS_CONST) {
27175324fb0dSmrg		i += 2;
27185324fb0dSmrg	} else if (ps_type == PS_TEX) {
27195324fb0dSmrg		ptr[i++] = 0x3f800000;
27205324fb0dSmrg		ptr[i++] = 0x3f800000;
27215324fb0dSmrg	}
27225324fb0dSmrg
27235324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
27245324fb0dSmrg	ptr[i++] = 0x54;
27255324fb0dSmrg	i += 4;
27265324fb0dSmrg
27275324fb0dSmrg	return i;
27285324fb0dSmrg}
27295324fb0dSmrg
27305324fb0dSmrgstatic int amdgpu_draw_ps_write2hw(uint32_t *ptr,
27315324fb0dSmrg				   int ps_type,
27325324fb0dSmrg				   uint64_t shader_addr)
27335324fb0dSmrg{
27345324fb0dSmrg	int i, j;
27355324fb0dSmrg	const uint32_t *sh_registers;
27365324fb0dSmrg	const uint32_t *context_registers;
27375324fb0dSmrg	uint32_t num_sh_reg, num_context_reg;
27385324fb0dSmrg
27395324fb0dSmrg	if (ps_type == PS_CONST) {
27405324fb0dSmrg		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
27415324fb0dSmrg		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
27425324fb0dSmrg		num_sh_reg = ps_num_sh_registers_gfx9;
27435324fb0dSmrg		num_context_reg = ps_num_context_registers_gfx9;
27445324fb0dSmrg	} else if (ps_type == PS_TEX) {
27455324fb0dSmrg		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
27465324fb0dSmrg		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
27475324fb0dSmrg		num_sh_reg = ps_num_sh_registers_gfx9;
27485324fb0dSmrg		num_context_reg = ps_num_context_registers_gfx9;
27495324fb0dSmrg	}
27505324fb0dSmrg
27515324fb0dSmrg	i = 0;
27525324fb0dSmrg
27535324fb0dSmrg	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
27545324fb0dSmrg	   0x2c08   SPI_SHADER_PGM_LO_PS
27555324fb0dSmrg	   0x2c09   SPI_SHADER_PGM_HI_PS */
27565324fb0dSmrg	shader_addr += 256 * 9;
27575324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
27585324fb0dSmrg	ptr[i++] = 0x7;
27595324fb0dSmrg	ptr[i++] = 0xffff;
27605324fb0dSmrg	ptr[i++] = shader_addr >> 8;
27615324fb0dSmrg	ptr[i++] = shader_addr >> 40;
27625324fb0dSmrg
27635324fb0dSmrg	for (j = 0; j < num_sh_reg; j++) {
27645324fb0dSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
27655324fb0dSmrg		ptr[i++] = sh_registers[j * 2] - 0x2c00;
27665324fb0dSmrg		ptr[i++] = sh_registers[j * 2 + 1];
27675324fb0dSmrg	}
27685324fb0dSmrg
27695324fb0dSmrg	for (j = 0; j < num_context_reg; j++) {
27705324fb0dSmrg		if (context_registers[j * 2] != 0xA1C5) {
27715324fb0dSmrg			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
27725324fb0dSmrg			ptr[i++] = context_registers[j * 2] - 0xa000;
27735324fb0dSmrg			ptr[i++] = context_registers[j * 2 + 1];
27745324fb0dSmrg		}
27755324fb0dSmrg
27765324fb0dSmrg		if (context_registers[j * 2] == 0xA1B4) {
27775324fb0dSmrg			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
27785324fb0dSmrg			ptr[i++] = 0x1b3;
27795324fb0dSmrg			ptr[i++] = 2;
27805324fb0dSmrg		}
27815324fb0dSmrg	}
27825324fb0dSmrg
27835324fb0dSmrg	return i;
27845324fb0dSmrg}
27855324fb0dSmrg
27865324fb0dSmrgstatic int amdgpu_draw_draw(uint32_t *ptr)
27875324fb0dSmrg{
27885324fb0dSmrg	int i = 0;
27895324fb0dSmrg
27905324fb0dSmrg	/* mmIA_MULTI_VGT_PARAM */
27915324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
27925324fb0dSmrg	ptr[i++] = 0x40000258;
27935324fb0dSmrg	ptr[i++] = 0xd00ff;
27945324fb0dSmrg
27955324fb0dSmrg	/* mmVGT_PRIMITIVE_TYPE */
27965324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
27975324fb0dSmrg	ptr[i++] = 0x10000242;
27985324fb0dSmrg	ptr[i++] = 0x11;
27995324fb0dSmrg
28005324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
28015324fb0dSmrg	ptr[i++] = 3;
28025324fb0dSmrg	ptr[i++] = 2;
28035324fb0dSmrg
28045324fb0dSmrg	return i;
28055324fb0dSmrg}
28065324fb0dSmrg
28075324fb0dSmrgvoid amdgpu_memset_draw(amdgpu_device_handle device_handle,
28085324fb0dSmrg			amdgpu_bo_handle bo_shader_ps,
28095324fb0dSmrg			amdgpu_bo_handle bo_shader_vs,
28105324fb0dSmrg			uint64_t mc_address_shader_ps,
28115324fb0dSmrg			uint64_t mc_address_shader_vs,
28125324fb0dSmrg			uint32_t ring_id)
28135324fb0dSmrg{
28145324fb0dSmrg	amdgpu_context_handle context_handle;
28155324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
28165324fb0dSmrg	volatile unsigned char *ptr_dst;
28175324fb0dSmrg	uint32_t *ptr_cmd;
28185324fb0dSmrg	uint64_t mc_address_dst, mc_address_cmd;
28195324fb0dSmrg	amdgpu_va_handle va_dst, va_cmd;
28205324fb0dSmrg	int i, r;
28215324fb0dSmrg	int bo_dst_size = 16384;
28225324fb0dSmrg	int bo_cmd_size = 4096;
28235324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
28245324fb0dSmrg	struct amdgpu_cs_ib_info ib_info = {0};
28255324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
28265324fb0dSmrg	uint32_t expired;
28275324fb0dSmrg	amdgpu_bo_list_handle bo_list;
28285324fb0dSmrg
28295324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
28305324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28315324fb0dSmrg
28325324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
28335324fb0dSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
28345324fb0dSmrg					&bo_cmd, (void **)&ptr_cmd,
28355324fb0dSmrg					&mc_address_cmd, &va_cmd);
28365324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28375324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
28385324fb0dSmrg
28395324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
28405324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
28415324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
28425324fb0dSmrg					&mc_address_dst, &va_dst);
28435324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28445324fb0dSmrg
28455324fb0dSmrg	i = 0;
28465324fb0dSmrg	i += amdgpu_draw_init(ptr_cmd + i);
28475324fb0dSmrg
28485324fb0dSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst);
28495324fb0dSmrg
28505324fb0dSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i);
28515324fb0dSmrg
28525324fb0dSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs);
28535324fb0dSmrg
28545324fb0dSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
28555324fb0dSmrg
28565324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
28575324fb0dSmrg	ptr_cmd[i++] = 0xc;
28585324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
28595324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
28605324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
28615324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
28625324fb0dSmrg
28635324fb0dSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
28645324fb0dSmrg
28655324fb0dSmrg	while (i & 7)
28665324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
28675324fb0dSmrg
28685324fb0dSmrg	resources[0] = bo_dst;
28695324fb0dSmrg	resources[1] = bo_shader_ps;
28705324fb0dSmrg	resources[2] = bo_shader_vs;
28715324fb0dSmrg	resources[3] = bo_cmd;
28725324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
28735324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28745324fb0dSmrg
28755324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
28765324fb0dSmrg	ib_info.size = i;
28775324fb0dSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
28785324fb0dSmrg	ibs_request.ring = ring_id;
28795324fb0dSmrg	ibs_request.resources = bo_list;
28805324fb0dSmrg	ibs_request.number_of_ibs = 1;
28815324fb0dSmrg	ibs_request.ibs = &ib_info;
28825324fb0dSmrg	ibs_request.fence_info.handle = NULL;
28835324fb0dSmrg
28845324fb0dSmrg	/* submit CS */
28855324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
28865324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28875324fb0dSmrg
28885324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
28895324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28905324fb0dSmrg
28915324fb0dSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
28925324fb0dSmrg	fence_status.ip_instance = 0;
28935324fb0dSmrg	fence_status.ring = ring_id;
28945324fb0dSmrg	fence_status.context = context_handle;
28955324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
28965324fb0dSmrg
28975324fb0dSmrg	/* wait for IB accomplished */
28985324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
28995324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
29005324fb0dSmrg					 0, &expired);
29015324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29025324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
29035324fb0dSmrg
29045324fb0dSmrg	/* verify if memset test result meets with expected */
29055324fb0dSmrg	i = 0;
29065324fb0dSmrg	while(i < bo_dst_size) {
29075324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
29085324fb0dSmrg	}
29095324fb0dSmrg
29105324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
29115324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29125324fb0dSmrg
29135324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
29145324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29155324fb0dSmrg
29165324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
29175324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29185324fb0dSmrg}
29195324fb0dSmrg
29205324fb0dSmrgstatic void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
29215324fb0dSmrg				    uint32_t ring)
29225324fb0dSmrg{
29235324fb0dSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
29245324fb0dSmrg	void *ptr_shader_ps;
29255324fb0dSmrg	void *ptr_shader_vs;
29265324fb0dSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
29275324fb0dSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
29285324fb0dSmrg	int r;
29295324fb0dSmrg	int bo_shader_size = 4096;
29305324fb0dSmrg
29315324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
29325324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
29335324fb0dSmrg					&bo_shader_ps, &ptr_shader_ps,
29345324fb0dSmrg					&mc_address_shader_ps, &va_shader_ps);
29355324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
293688f8a8d2Smrg	memset(ptr_shader_ps, 0, bo_shader_size);
29375324fb0dSmrg
29385324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
29395324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
29405324fb0dSmrg					&bo_shader_vs, &ptr_shader_vs,
29415324fb0dSmrg					&mc_address_shader_vs, &va_shader_vs);
29425324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
294388f8a8d2Smrg	memset(ptr_shader_vs, 0, bo_shader_size);
29445324fb0dSmrg
29455324fb0dSmrg	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
29465324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29475324fb0dSmrg
29485324fb0dSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
29495324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29505324fb0dSmrg
29515324fb0dSmrg	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
29525324fb0dSmrg			mc_address_shader_ps, mc_address_shader_vs, ring);
29535324fb0dSmrg
29545324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
29555324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29565324fb0dSmrg
29575324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
29585324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29595324fb0dSmrg}
29605324fb0dSmrg
29615324fb0dSmrgstatic void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
29625324fb0dSmrg			       amdgpu_bo_handle bo_shader_ps,
29635324fb0dSmrg			       amdgpu_bo_handle bo_shader_vs,
29645324fb0dSmrg			       uint64_t mc_address_shader_ps,
29655324fb0dSmrg			       uint64_t mc_address_shader_vs,
29665324fb0dSmrg			       uint32_t ring)
29675324fb0dSmrg{
29685324fb0dSmrg	amdgpu_context_handle context_handle;
29695324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
29705324fb0dSmrg	volatile unsigned char *ptr_dst;
29715324fb0dSmrg	unsigned char *ptr_src;
29725324fb0dSmrg	uint32_t *ptr_cmd;
29735324fb0dSmrg	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
29745324fb0dSmrg	amdgpu_va_handle va_dst, va_src, va_cmd;
29755324fb0dSmrg	int i, r;
29765324fb0dSmrg	int bo_size = 16384;
29775324fb0dSmrg	int bo_cmd_size = 4096;
29785324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
29795324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
29805324fb0dSmrg	uint32_t hang_state, hangs, expired;
29815324fb0dSmrg	amdgpu_bo_list_handle bo_list;
29825324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
29835324fb0dSmrg
29845324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
29855324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29865324fb0dSmrg
29875324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
29885324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
29895324fb0dSmrg				    &bo_cmd, (void **)&ptr_cmd,
29905324fb0dSmrg				    &mc_address_cmd, &va_cmd);
29915324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29925324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
29935324fb0dSmrg
29945324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
29955324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
29965324fb0dSmrg					&bo_src, (void **)&ptr_src,
29975324fb0dSmrg					&mc_address_src, &va_src);
29985324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29995324fb0dSmrg
30005324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
30015324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
30025324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
30035324fb0dSmrg					&mc_address_dst, &va_dst);
30045324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30055324fb0dSmrg
30065324fb0dSmrg	memset(ptr_src, 0x55, bo_size);
30075324fb0dSmrg
30085324fb0dSmrg	i = 0;
30095324fb0dSmrg	i += amdgpu_draw_init(ptr_cmd + i);
30105324fb0dSmrg
30115324fb0dSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst);
30125324fb0dSmrg
30135324fb0dSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i);
30145324fb0dSmrg
30155324fb0dSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs);
30165324fb0dSmrg
30175324fb0dSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
30185324fb0dSmrg
30195324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
30205324fb0dSmrg	ptr_cmd[i++] = 0xc;
30215324fb0dSmrg	ptr_cmd[i++] = mc_address_src >> 8;
30225324fb0dSmrg	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
30235324fb0dSmrg	ptr_cmd[i++] = 0x7c01f;
30245324fb0dSmrg	ptr_cmd[i++] = 0x90500fac;
30255324fb0dSmrg	ptr_cmd[i++] = 0x3e000;
30265324fb0dSmrg	i += 3;
30275324fb0dSmrg
30285324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
30295324fb0dSmrg	ptr_cmd[i++] = 0x14;
30305324fb0dSmrg	ptr_cmd[i++] = 0x92;
30315324fb0dSmrg	i += 3;
30325324fb0dSmrg
303388f8a8d2Smrg	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
30345324fb0dSmrg	ptr_cmd[i++] = 0x191;
30355324fb0dSmrg	ptr_cmd[i++] = 0;
30365324fb0dSmrg
30375324fb0dSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
30385324fb0dSmrg
30395324fb0dSmrg	while (i & 7)
30405324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
30415324fb0dSmrg
30425324fb0dSmrg	resources[0] = bo_dst;
30435324fb0dSmrg	resources[1] = bo_src;
30445324fb0dSmrg	resources[2] = bo_shader_ps;
30455324fb0dSmrg	resources[3] = bo_shader_vs;
30465324fb0dSmrg	resources[4] = bo_cmd;
30475324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
30485324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30495324fb0dSmrg
30505324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
30515324fb0dSmrg	ib_info.size = i;
30525324fb0dSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
30535324fb0dSmrg	ibs_request.ring = ring;
30545324fb0dSmrg	ibs_request.resources = bo_list;
30555324fb0dSmrg	ibs_request.number_of_ibs = 1;
30565324fb0dSmrg	ibs_request.ibs = &ib_info;
30575324fb0dSmrg	ibs_request.fence_info.handle = NULL;
30585324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
30595324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30605324fb0dSmrg
30615324fb0dSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
30625324fb0dSmrg	fence_status.ip_instance = 0;
30635324fb0dSmrg	fence_status.ring = ring;
30645324fb0dSmrg	fence_status.context = context_handle;
30655324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
30665324fb0dSmrg
30675324fb0dSmrg	/* wait for IB accomplished */
30685324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
30695324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
30705324fb0dSmrg					 0, &expired);
30715324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30725324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
30735324fb0dSmrg
30745324fb0dSmrg	/* verify if memcpy test result meets with expected */
30755324fb0dSmrg	i = 0;
30765324fb0dSmrg	while(i < bo_size) {
30775324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
30785324fb0dSmrg		i++;
30795324fb0dSmrg	}
30805324fb0dSmrg
30815324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
30825324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30835324fb0dSmrg
30845324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
30855324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30865324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
30875324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30885324fb0dSmrg
30895324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
30905324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30915324fb0dSmrg
30925324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
30935324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30945324fb0dSmrg}
30955324fb0dSmrg
30965324fb0dSmrgstatic void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring)
30975324fb0dSmrg{
30985324fb0dSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
30995324fb0dSmrg	void *ptr_shader_ps;
31005324fb0dSmrg	void *ptr_shader_vs;
31015324fb0dSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
31025324fb0dSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
31035324fb0dSmrg	int bo_shader_size = 4096;
31045324fb0dSmrg	int r;
31055324fb0dSmrg
31065324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
31075324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
31085324fb0dSmrg					&bo_shader_ps, &ptr_shader_ps,
31095324fb0dSmrg					&mc_address_shader_ps, &va_shader_ps);
31105324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
311188f8a8d2Smrg	memset(ptr_shader_ps, 0, bo_shader_size);
31125324fb0dSmrg
31135324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
31145324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
31155324fb0dSmrg					&bo_shader_vs, &ptr_shader_vs,
31165324fb0dSmrg					&mc_address_shader_vs, &va_shader_vs);
31175324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
311888f8a8d2Smrg	memset(ptr_shader_vs, 0, bo_shader_size);
31195324fb0dSmrg
31205324fb0dSmrg	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_TEX);
31215324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
31225324fb0dSmrg
31235324fb0dSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
31245324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
31255324fb0dSmrg
31265324fb0dSmrg	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
31275324fb0dSmrg			mc_address_shader_ps, mc_address_shader_vs, ring);
31285324fb0dSmrg
31295324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
31305324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
31315324fb0dSmrg
31325324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
31335324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
31345324fb0dSmrg}
31355324fb0dSmrg
31365324fb0dSmrgstatic void amdgpu_draw_test(void)
31375324fb0dSmrg{
31385324fb0dSmrg	int r;
31395324fb0dSmrg	struct drm_amdgpu_info_hw_ip info;
31405324fb0dSmrg	uint32_t ring_id;
31415324fb0dSmrg
31425324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
31435324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
314488f8a8d2Smrg	if (!info.available_rings)
314588f8a8d2Smrg		printf("SKIP ... as there's no graphics ring\n");
31465324fb0dSmrg
31475324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
31485324fb0dSmrg		amdgpu_memset_draw_test(device_handle, ring_id);
31495324fb0dSmrg		amdgpu_memcpy_draw_test(device_handle, ring_id);
31505324fb0dSmrg	}
31515324fb0dSmrg}
315288f8a8d2Smrg
315388f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void)
315488f8a8d2Smrg{
315588f8a8d2Smrg	int r;
315688f8a8d2Smrg	char debugfs_path[256], tmp[10];
315788f8a8d2Smrg	int fd;
315888f8a8d2Smrg	struct stat sbuf;
315988f8a8d2Smrg	amdgpu_context_handle context_handle;
316088f8a8d2Smrg	uint32_t hang_state, hangs;
316188f8a8d2Smrg
316288f8a8d2Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
316388f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
316488f8a8d2Smrg
316588f8a8d2Smrg	r = fstat(drm_amdgpu[0], &sbuf);
316688f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
316788f8a8d2Smrg
316888f8a8d2Smrg	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
316988f8a8d2Smrg	fd = open(debugfs_path, O_RDONLY);
317088f8a8d2Smrg	CU_ASSERT(fd >= 0);
317188f8a8d2Smrg
317288f8a8d2Smrg	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
317388f8a8d2Smrg	CU_ASSERT(r > 0);
317488f8a8d2Smrg
317588f8a8d2Smrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
317688f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
317788f8a8d2Smrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
317888f8a8d2Smrg
317988f8a8d2Smrg	close(fd);
318088f8a8d2Smrg	r = amdgpu_cs_ctx_free(context_handle);
318188f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
318288f8a8d2Smrg
318388f8a8d2Smrg	amdgpu_compute_dispatch_test();
318488f8a8d2Smrg	amdgpu_gfx_dispatch_test();
318588f8a8d2Smrg}
3186