basic_tests.c revision 9bd392ad
13f012e29Smrg/*
23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc.
33f012e29Smrg *
43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a
53f012e29Smrg * copy of this software and associated documentation files (the "Software"),
63f012e29Smrg * to deal in the Software without restriction, including without limitation
73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the
93f012e29Smrg * Software is furnished to do so, subject to the following conditions:
103f012e29Smrg *
113f012e29Smrg * The above copyright notice and this permission notice shall be included in
123f012e29Smrg * all copies or substantial portions of the Software.
133f012e29Smrg *
143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE.
213f012e29Smrg *
223f012e29Smrg*/
233f012e29Smrg
243f012e29Smrg#include <stdio.h>
253f012e29Smrg#include <stdlib.h>
263f012e29Smrg#include <unistd.h>
2788f8a8d2Smrg#include <sys/types.h>
2888f8a8d2Smrg#ifdef MAJOR_IN_SYSMACROS
2988f8a8d2Smrg#include <sys/sysmacros.h>
3088f8a8d2Smrg#endif
3188f8a8d2Smrg#include <sys/stat.h>
3288f8a8d2Smrg#include <fcntl.h>
339bd392adSmrg#if HAVE_ALLOCA_H
343f012e29Smrg# include <alloca.h>
353f012e29Smrg#endif
3600a23bdaSmrg#include <sys/wait.h>
373f012e29Smrg
383f012e29Smrg#include "CUnit/Basic.h"
393f012e29Smrg
403f012e29Smrg#include "amdgpu_test.h"
413f012e29Smrg#include "amdgpu_drm.h"
427cdc0497Smrg#include "util_math.h"
433f012e29Smrg
443f012e29Smrgstatic  amdgpu_device_handle device_handle;
453f012e29Smrgstatic  uint32_t  major_version;
463f012e29Smrgstatic  uint32_t  minor_version;
47d8807b2fSmrgstatic  uint32_t  family_id;
483f012e29Smrg
493f012e29Smrgstatic void amdgpu_query_info_test(void);
503f012e29Smrgstatic void amdgpu_command_submission_gfx(void);
513f012e29Smrgstatic void amdgpu_command_submission_compute(void);
52d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void);
533f012e29Smrgstatic void amdgpu_command_submission_sdma(void);
543f012e29Smrgstatic void amdgpu_userptr_test(void);
553f012e29Smrgstatic void amdgpu_semaphore_test(void);
5600a23bdaSmrgstatic void amdgpu_sync_dependency_test(void);
5700a23bdaSmrgstatic void amdgpu_bo_eviction_test(void);
5888f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void);
5988f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void);
605324fb0dSmrgstatic void amdgpu_draw_test(void);
6188f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void);
623f012e29Smrg
633f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
643f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
653f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
6600a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
6700a23bdaSmrg				       unsigned ip_type,
6800a23bdaSmrg				       int instance, int pm4_dw, uint32_t *pm4_src,
6900a23bdaSmrg				       int res_cnt, amdgpu_bo_handle *resources,
7000a23bdaSmrg				       struct amdgpu_cs_ib_info *ib_info,
7100a23bdaSmrg				       struct amdgpu_cs_request *ibs_request);
7200a23bdaSmrg
733f012e29SmrgCU_TestInfo basic_tests[] = {
743f012e29Smrg	{ "Query Info Test",  amdgpu_query_info_test },
753f012e29Smrg	{ "Userptr Test",  amdgpu_userptr_test },
7600a23bdaSmrg	{ "bo eviction Test",  amdgpu_bo_eviction_test },
773f012e29Smrg	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
783f012e29Smrg	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
79d8807b2fSmrg	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
803f012e29Smrg	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
813f012e29Smrg	{ "SW semaphore Test",  amdgpu_semaphore_test },
8200a23bdaSmrg	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
8388f8a8d2Smrg	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
8488f8a8d2Smrg	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
855324fb0dSmrg	{ "Draw Test",  amdgpu_draw_test },
8688f8a8d2Smrg	{ "GPU reset Test", amdgpu_gpu_reset_test },
873f012e29Smrg	CU_TEST_INFO_NULL,
883f012e29Smrg};
899bd392adSmrg#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
903f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0
913f012e29Smrg#define SDMA_PKT_HEADER_op_mask   0x000000FF
923f012e29Smrg#define SDMA_PKT_HEADER_op_shift  0
933f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
943f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL  11
953f012e29Smrg#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
963f012e29Smrg	/* 0 = byte fill
973f012e29Smrg	 * 2 = DW fill
983f012e29Smrg	 */
993f012e29Smrg#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
1003f012e29Smrg					(((sub_op) & 0xFF) << 8) |	\
1013f012e29Smrg					(((op) & 0xFF) << 0))
1023f012e29Smrg#define	SDMA_OPCODE_WRITE				  2
1033f012e29Smrg#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
1043f012e29Smrg#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
1053f012e29Smrg
1063f012e29Smrg#define	SDMA_OPCODE_COPY				  1
1073f012e29Smrg#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
1083f012e29Smrg
1093f012e29Smrg#define GFX_COMPUTE_NOP  0xffff1000
1103f012e29Smrg#define SDMA_NOP  0x0
1113f012e29Smrg
1123f012e29Smrg/* PM4 */
1133f012e29Smrg#define	PACKET_TYPE0	0
1143f012e29Smrg#define	PACKET_TYPE1	1
1153f012e29Smrg#define	PACKET_TYPE2	2
1163f012e29Smrg#define	PACKET_TYPE3	3
1173f012e29Smrg
1183f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
1193f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
1203f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
1213f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
1223f012e29Smrg#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
1233f012e29Smrg			 ((reg) & 0xFFFF) |			\
1243f012e29Smrg			 ((n) & 0x3FFF) << 16)
1253f012e29Smrg#define CP_PACKET2			0x80000000
1263f012e29Smrg#define		PACKET2_PAD_SHIFT		0
1273f012e29Smrg#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
1283f012e29Smrg
1293f012e29Smrg#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
1303f012e29Smrg
1313f012e29Smrg#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
1323f012e29Smrg			 (((op) & 0xFF) << 8) |				\
1333f012e29Smrg			 ((n) & 0x3FFF) << 16)
1345324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
1353f012e29Smrg
1363f012e29Smrg/* Packet 3 types */
1373f012e29Smrg#define	PACKET3_NOP					0x10
1383f012e29Smrg
1393f012e29Smrg#define	PACKET3_WRITE_DATA				0x37
1403f012e29Smrg#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
1413f012e29Smrg		/* 0 - register
1423f012e29Smrg		 * 1 - memory (sync - via GRBM)
1433f012e29Smrg		 * 2 - gl2
1443f012e29Smrg		 * 3 - gds
1453f012e29Smrg		 * 4 - reserved
1463f012e29Smrg		 * 5 - memory (async - direct)
1473f012e29Smrg		 */
1483f012e29Smrg#define		WR_ONE_ADDR                             (1 << 16)
1493f012e29Smrg#define		WR_CONFIRM                              (1 << 20)
1503f012e29Smrg#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
1513f012e29Smrg		/* 0 - LRU
1523f012e29Smrg		 * 1 - Stream
1533f012e29Smrg		 */
1543f012e29Smrg#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
1553f012e29Smrg		/* 0 - me
1563f012e29Smrg		 * 1 - pfp
1573f012e29Smrg		 * 2 - ce
1583f012e29Smrg		 */
1593f012e29Smrg
1603f012e29Smrg#define	PACKET3_DMA_DATA				0x50
1613f012e29Smrg/* 1. header
1623f012e29Smrg * 2. CONTROL
1633f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0]
1643f012e29Smrg * 4. SRC_ADDR_HI [31:0]
1653f012e29Smrg * 5. DST_ADDR_LO [31:0]
1663f012e29Smrg * 6. DST_ADDR_HI [7:0]
1673f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
1683f012e29Smrg */
1693f012e29Smrg/* CONTROL */
1703f012e29Smrg#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
1713f012e29Smrg		/* 0 - ME
1723f012e29Smrg		 * 1 - PFP
1733f012e29Smrg		 */
1743f012e29Smrg#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
1753f012e29Smrg		/* 0 - LRU
1763f012e29Smrg		 * 1 - Stream
1773f012e29Smrg		 * 2 - Bypass
1783f012e29Smrg		 */
1793f012e29Smrg#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
1803f012e29Smrg#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
1813f012e29Smrg		/* 0 - DST_ADDR using DAS
1823f012e29Smrg		 * 1 - GDS
1833f012e29Smrg		 * 3 - DST_ADDR using L2
1843f012e29Smrg		 */
1853f012e29Smrg#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
1863f012e29Smrg		/* 0 - LRU
1873f012e29Smrg		 * 1 - Stream
1883f012e29Smrg		 * 2 - Bypass
1893f012e29Smrg		 */
1903f012e29Smrg#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
1913f012e29Smrg#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
1923f012e29Smrg		/* 0 - SRC_ADDR using SAS
1933f012e29Smrg		 * 1 - GDS
1943f012e29Smrg		 * 2 - DATA
1953f012e29Smrg		 * 3 - SRC_ADDR using L2
1963f012e29Smrg		 */
1973f012e29Smrg#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
1983f012e29Smrg/* COMMAND */
1993f012e29Smrg#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
2003f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
2013f012e29Smrg		/* 0 - none
2023f012e29Smrg		 * 1 - 8 in 16
2033f012e29Smrg		 * 2 - 8 in 32
2043f012e29Smrg		 * 3 - 8 in 64
2053f012e29Smrg		 */
2063f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
2073f012e29Smrg		/* 0 - none
2083f012e29Smrg		 * 1 - 8 in 16
2093f012e29Smrg		 * 2 - 8 in 32
2103f012e29Smrg		 * 3 - 8 in 64
2113f012e29Smrg		 */
2123f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
2133f012e29Smrg		/* 0 - memory
2143f012e29Smrg		 * 1 - register
2153f012e29Smrg		 */
2163f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
2173f012e29Smrg		/* 0 - memory
2183f012e29Smrg		 * 1 - register
2193f012e29Smrg		 */
2203f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
2213f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
2223f012e29Smrg#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
2233f012e29Smrg
224d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
225d8807b2fSmrg						(((b) & 0x1) << 26) |		\
226d8807b2fSmrg						(((t) & 0x1) << 23) |		\
227d8807b2fSmrg						(((s) & 0x1) << 22) |		\
228d8807b2fSmrg						(((cnt) & 0xFFFFF) << 0))
229d8807b2fSmrg#define	SDMA_OPCODE_COPY_SI	3
230d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI	13
231d8807b2fSmrg#define SDMA_NOP_SI  0xf
232d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000
233d8807b2fSmrg#define	PACKET3_DMA_DATA_SI	0x41
234d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
235d8807b2fSmrg		/* 0 - ME
236d8807b2fSmrg		 * 1 - PFP
237d8807b2fSmrg		 */
238d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
239d8807b2fSmrg		/* 0 - DST_ADDR using DAS
240d8807b2fSmrg		 * 1 - GDS
241d8807b2fSmrg		 * 3 - DST_ADDR using L2
242d8807b2fSmrg		 */
243d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
244d8807b2fSmrg		/* 0 - SRC_ADDR using SAS
245d8807b2fSmrg		 * 1 - GDS
246d8807b2fSmrg		 * 2 - DATA
247d8807b2fSmrg		 * 3 - SRC_ADDR using L2
248d8807b2fSmrg		 */
249d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
250d8807b2fSmrg
25100a23bdaSmrg
25200a23bdaSmrg#define PKT3_CONTEXT_CONTROL                   0x28
25300a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
25400a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
25500a23bdaSmrg#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
25600a23bdaSmrg
25700a23bdaSmrg#define PKT3_CLEAR_STATE                       0x12
25800a23bdaSmrg
25900a23bdaSmrg#define PKT3_SET_SH_REG                        0x76
26000a23bdaSmrg#define		PACKET3_SET_SH_REG_START			0x00002c00
26100a23bdaSmrg
26200a23bdaSmrg#define	PACKET3_DISPATCH_DIRECT				0x15
2635324fb0dSmrg#define PACKET3_EVENT_WRITE				0x46
2645324fb0dSmrg#define PACKET3_ACQUIRE_MEM				0x58
2655324fb0dSmrg#define PACKET3_SET_CONTEXT_REG				0x69
2665324fb0dSmrg#define PACKET3_SET_UCONFIG_REG				0x79
2675324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO				0x2D
26800a23bdaSmrg/* gfx 8 */
26900a23bdaSmrg#define mmCOMPUTE_PGM_LO                                                        0x2e0c
27000a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
27100a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
27200a23bdaSmrg#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
27300a23bdaSmrg#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
27400a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
27500a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
27600a23bdaSmrg
27700a23bdaSmrg
27800a23bdaSmrg
27900a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
28000a23bdaSmrg		      ((num & 0x0000ff00) << 8) | \
28100a23bdaSmrg		      ((num & 0x00ff0000) >> 8) | \
28200a23bdaSmrg		      ((num & 0x000000ff) << 24))
28300a23bdaSmrg
28400a23bdaSmrg
28500a23bdaSmrg/* Shader code
28600a23bdaSmrg * void main()
28700a23bdaSmrg{
28800a23bdaSmrg
28900a23bdaSmrg	float x = some_input;
29000a23bdaSmrg		for (unsigned i = 0; i < 1000000; i++)
29100a23bdaSmrg  	x = sin(x);
29200a23bdaSmrg
29300a23bdaSmrg	u[0] = 42u;
29400a23bdaSmrg}
29500a23bdaSmrg*/
29600a23bdaSmrg
29700a23bdaSmrgstatic  uint32_t shader_bin[] = {
29800a23bdaSmrg	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
29900a23bdaSmrg	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
30000a23bdaSmrg	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
30100a23bdaSmrg	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
30200a23bdaSmrg};
30300a23bdaSmrg
30400a23bdaSmrg#define CODE_OFFSET 512
30500a23bdaSmrg#define DATA_OFFSET 1024
30600a23bdaSmrg
3075324fb0dSmrgenum cs_type {
3085324fb0dSmrg	CS_BUFFERCLEAR,
3099bd392adSmrg	CS_BUFFERCOPY,
3109bd392adSmrg	CS_HANG,
3119bd392adSmrg	CS_HANG_SLOW
3125324fb0dSmrg};
3135324fb0dSmrg
3145324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = {
3155324fb0dSmrg    0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
3165324fb0dSmrg    0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
3175324fb0dSmrg    0xBF810000
3185324fb0dSmrg};
3195324fb0dSmrg
3205324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
3215324fb0dSmrg	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
3225324fb0dSmrg	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
3235324fb0dSmrg	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
3245324fb0dSmrg	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
3255324fb0dSmrg	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
3265324fb0dSmrg};
3275324fb0dSmrg
3285324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
3295324fb0dSmrg
3305324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = {
3315324fb0dSmrg    0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
3325324fb0dSmrg    0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
3335324fb0dSmrg};
3345324fb0dSmrg
3355324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = {
3365324fb0dSmrg	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
3375324fb0dSmrg	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
3385324fb0dSmrg	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
3395324fb0dSmrg	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
3405324fb0dSmrg	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
3415324fb0dSmrg	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
3425324fb0dSmrg	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
3435324fb0dSmrg	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
34488f8a8d2Smrg	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
3455324fb0dSmrg	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
3465324fb0dSmrg	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
3475324fb0dSmrg	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
3485324fb0dSmrg	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
3495324fb0dSmrg	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
3505324fb0dSmrg	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
35188f8a8d2Smrg	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
35288f8a8d2Smrg	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
3535324fb0dSmrg	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
3545324fb0dSmrg	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
3555324fb0dSmrg	0xc0017900, 0x24b, 0x0
3565324fb0dSmrg};
3575324fb0dSmrg
3585324fb0dSmrgenum ps_type {
3595324fb0dSmrg	PS_CONST,
3609bd392adSmrg	PS_TEX,
3619bd392adSmrg	PS_HANG,
3629bd392adSmrg	PS_HANG_SLOW
3635324fb0dSmrg};
3645324fb0dSmrg
3655324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = {
3665324fb0dSmrg    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
3675324fb0dSmrg    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
3685324fb0dSmrg    0xC4001C0F, 0x00000100, 0xBF810000
3695324fb0dSmrg};
3705324fb0dSmrg
3715324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
3725324fb0dSmrg
3735324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
3745324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
3755324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
3765324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
3775324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
3785324fb0dSmrg     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
3795324fb0dSmrg     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
3805324fb0dSmrg     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
3815324fb0dSmrg     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
3825324fb0dSmrg     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
3835324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
3845324fb0dSmrg    }
3855324fb0dSmrg};
3865324fb0dSmrg
3875324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
3885324fb0dSmrg    0x00000004
3895324fb0dSmrg};
3905324fb0dSmrg
3915324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2;
3925324fb0dSmrg
3935324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = {
3945324fb0dSmrg    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
3955324fb0dSmrg    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
3965324fb0dSmrg};
3975324fb0dSmrg
3985324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7;
3995324fb0dSmrg
4005324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = {
4015324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
4025324fb0dSmrg    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
4035324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
4045324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
4055324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
4065324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
4075324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
4085324fb0dSmrg};
4095324fb0dSmrg
4105324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = {
4115324fb0dSmrg    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
4125324fb0dSmrg    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
4135324fb0dSmrg    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
4145324fb0dSmrg    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
4155324fb0dSmrg    0x00000100, 0xBF810000
4165324fb0dSmrg};
4175324fb0dSmrg
4185324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
4195324fb0dSmrg    0x0000000B
4205324fb0dSmrg};
4215324fb0dSmrg
4225324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
4235324fb0dSmrg
4245324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
4255324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
4265324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
4275324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
4285324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
4295324fb0dSmrg     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4305324fb0dSmrg     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4315324fb0dSmrg     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4325324fb0dSmrg     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4335324fb0dSmrg     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4345324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
4355324fb0dSmrg    }
4365324fb0dSmrg};
4375324fb0dSmrg
4385324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = {
4395324fb0dSmrg    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
4405324fb0dSmrg    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
4415324fb0dSmrg};
4425324fb0dSmrg
4435324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = {
4445324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
4455324fb0dSmrg    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
4465324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
4475324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
4485324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
4495324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
4505324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
4515324fb0dSmrg};
4525324fb0dSmrg
4535324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
4545324fb0dSmrg    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
4555324fb0dSmrg    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
4565324fb0dSmrg    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
4575324fb0dSmrg    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
4585324fb0dSmrg    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
4595324fb0dSmrg    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
4605324fb0dSmrg    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
4615324fb0dSmrg    0xC400020F, 0x05060403, 0xBF810000
4625324fb0dSmrg};
4635324fb0dSmrg
4645324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = {
4655324fb0dSmrg	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
4665324fb0dSmrg	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
4675324fb0dSmrg	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
4689bd392adSmrg	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
4695324fb0dSmrg	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
4705324fb0dSmrg	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
4715324fb0dSmrg	0xc0026900, 0x292, 0x20, 0x60201b8,
4725324fb0dSmrg	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
4735324fb0dSmrg};
47400a23bdaSmrg
4759bd392adSmrgunsigned int memcpy_ps_hang[] = {
4769bd392adSmrg        0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
4779bd392adSmrg        0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
4789bd392adSmrg        0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
4799bd392adSmrg        0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
4809bd392adSmrg        0xF800180F, 0x03020100, 0xBF810000
4819bd392adSmrg};
4829bd392adSmrg
4839bd392adSmrgstruct amdgpu_test_shader {
4849bd392adSmrg	uint32_t *shader;
4859bd392adSmrg	uint32_t header_length;
4869bd392adSmrg	uint32_t body_length;
4879bd392adSmrg	uint32_t foot_length;
4889bd392adSmrg};
4899bd392adSmrg
4909bd392adSmrgunsigned int memcpy_cs_hang_slow_ai_codes[] = {
4919bd392adSmrg    0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
4929bd392adSmrg    0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
4939bd392adSmrg};
4949bd392adSmrg
4959bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
4969bd392adSmrg        memcpy_cs_hang_slow_ai_codes,
4979bd392adSmrg        4,
4989bd392adSmrg        3,
4999bd392adSmrg        1
5009bd392adSmrg};
5019bd392adSmrg
5029bd392adSmrgunsigned int memcpy_cs_hang_slow_rv_codes[] = {
5039bd392adSmrg    0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
5049bd392adSmrg    0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
5059bd392adSmrg};
5069bd392adSmrg
5079bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
5089bd392adSmrg        memcpy_cs_hang_slow_rv_codes,
5099bd392adSmrg        4,
5109bd392adSmrg        3,
5119bd392adSmrg        1
5129bd392adSmrg};
5139bd392adSmrg
5149bd392adSmrgunsigned int memcpy_ps_hang_slow_ai_codes[] = {
5159bd392adSmrg        0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
5169bd392adSmrg        0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
5179bd392adSmrg        0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
5189bd392adSmrg        0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
5199bd392adSmrg        0x03020100, 0xbf810000
5209bd392adSmrg};
5219bd392adSmrg
5229bd392adSmrgstruct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
5239bd392adSmrg        memcpy_ps_hang_slow_ai_codes,
5249bd392adSmrg        7,
5259bd392adSmrg        2,
5269bd392adSmrg        9
5279bd392adSmrg};
5289bd392adSmrg
5297cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
5307cdc0497Smrg			unsigned alignment, unsigned heap, uint64_t alloc_flags,
5317cdc0497Smrg			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
5327cdc0497Smrg			uint64_t *mc_address,
5337cdc0497Smrg			amdgpu_va_handle *va_handle)
5347cdc0497Smrg{
5357cdc0497Smrg	struct amdgpu_bo_alloc_request request = {};
5367cdc0497Smrg	amdgpu_bo_handle buf_handle;
5377cdc0497Smrg	amdgpu_va_handle handle;
5387cdc0497Smrg	uint64_t vmc_addr;
5397cdc0497Smrg	int r;
5407cdc0497Smrg
5417cdc0497Smrg	request.alloc_size = size;
5427cdc0497Smrg	request.phys_alignment = alignment;
5437cdc0497Smrg	request.preferred_heap = heap;
5447cdc0497Smrg	request.flags = alloc_flags;
5457cdc0497Smrg
5467cdc0497Smrg	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
5477cdc0497Smrg	if (r)
5487cdc0497Smrg		return r;
5497cdc0497Smrg
5507cdc0497Smrg	r = amdgpu_va_range_alloc(dev,
5517cdc0497Smrg				  amdgpu_gpu_va_range_general,
5527cdc0497Smrg				  size, alignment, 0, &vmc_addr,
5537cdc0497Smrg				  &handle, 0);
5547cdc0497Smrg	if (r)
5557cdc0497Smrg		goto error_va_alloc;
5567cdc0497Smrg
5577cdc0497Smrg	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
5587cdc0497Smrg				   AMDGPU_VM_PAGE_READABLE |
5597cdc0497Smrg				   AMDGPU_VM_PAGE_WRITEABLE |
5607cdc0497Smrg				   AMDGPU_VM_PAGE_EXECUTABLE |
5617cdc0497Smrg				   mapping_flags,
5627cdc0497Smrg				   AMDGPU_VA_OP_MAP);
5637cdc0497Smrg	if (r)
5647cdc0497Smrg		goto error_va_map;
5657cdc0497Smrg
5667cdc0497Smrg	r = amdgpu_bo_cpu_map(buf_handle, cpu);
5677cdc0497Smrg	if (r)
5687cdc0497Smrg		goto error_cpu_map;
5697cdc0497Smrg
5707cdc0497Smrg	*bo = buf_handle;
5717cdc0497Smrg	*mc_address = vmc_addr;
5727cdc0497Smrg	*va_handle = handle;
5737cdc0497Smrg
5747cdc0497Smrg	return 0;
5757cdc0497Smrg
5767cdc0497Smrg error_cpu_map:
5777cdc0497Smrg	amdgpu_bo_cpu_unmap(buf_handle);
5787cdc0497Smrg
5797cdc0497Smrg error_va_map:
5807cdc0497Smrg	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
5817cdc0497Smrg
5827cdc0497Smrg error_va_alloc:
5837cdc0497Smrg	amdgpu_bo_free(buf_handle);
5847cdc0497Smrg	return r;
5857cdc0497Smrg}
5867cdc0497Smrg
5877cdc0497Smrg
5887cdc0497Smrg
5893f012e29Smrgint suite_basic_tests_init(void)
5903f012e29Smrg{
591d8807b2fSmrg	struct amdgpu_gpu_info gpu_info = {0};
5923f012e29Smrg	int r;
5933f012e29Smrg
5943f012e29Smrg	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
5953f012e29Smrg				   &minor_version, &device_handle);
5963f012e29Smrg
597d8807b2fSmrg	if (r) {
598037b3c26Smrg		if ((r == -EACCES) && (errno == EACCES))
599037b3c26Smrg			printf("\n\nError:%s. "
600037b3c26Smrg				"Hint:Try to run this test program as root.",
601037b3c26Smrg				strerror(errno));
6023f012e29Smrg		return CUE_SINIT_FAILED;
603037b3c26Smrg	}
604d8807b2fSmrg
605d8807b2fSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
606d8807b2fSmrg	if (r)
607d8807b2fSmrg		return CUE_SINIT_FAILED;
608d8807b2fSmrg
609d8807b2fSmrg	family_id = gpu_info.family_id;
610d8807b2fSmrg
611d8807b2fSmrg	return CUE_SUCCESS;
6123f012e29Smrg}
6133f012e29Smrg
6143f012e29Smrgint suite_basic_tests_clean(void)
6153f012e29Smrg{
6163f012e29Smrg	int r = amdgpu_device_deinitialize(device_handle);
6173f012e29Smrg
6183f012e29Smrg	if (r == 0)
6193f012e29Smrg		return CUE_SUCCESS;
6203f012e29Smrg	else
6213f012e29Smrg		return CUE_SCLEAN_FAILED;
6223f012e29Smrg}
6233f012e29Smrg
6243f012e29Smrgstatic void amdgpu_query_info_test(void)
6253f012e29Smrg{
6263f012e29Smrg	struct amdgpu_gpu_info gpu_info = {0};
6273f012e29Smrg	uint32_t version, feature;
6283f012e29Smrg	int r;
6293f012e29Smrg
6303f012e29Smrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
6313f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6323f012e29Smrg
6333f012e29Smrg	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
6343f012e29Smrg					  0, &version, &feature);
6353f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6363f012e29Smrg}
6373f012e29Smrg
6383f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void)
6393f012e29Smrg{
6403f012e29Smrg	amdgpu_context_handle context_handle;
6413f012e29Smrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
6423f012e29Smrg	void *ib_result_cpu, *ib_result_ce_cpu;
6433f012e29Smrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
6443f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
6453f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
6463f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
6473f012e29Smrg	uint32_t *ptr;
6483f012e29Smrg	uint32_t expired;
6493f012e29Smrg	amdgpu_bo_list_handle bo_list;
6503f012e29Smrg	amdgpu_va_handle va_handle, va_handle_ce;
651d8807b2fSmrg	int r, i = 0;
6523f012e29Smrg
6533f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
6543f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6553f012e29Smrg
6563f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
6573f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
6583f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
6593f012e29Smrg				    &ib_result_mc_address, &va_handle);
6603f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6613f012e29Smrg
6623f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
6633f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
6643f012e29Smrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
6653f012e29Smrg				    &ib_result_ce_mc_address, &va_handle_ce);
6663f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6673f012e29Smrg
6683f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
6693f012e29Smrg			       ib_result_ce_handle, &bo_list);
6703f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6713f012e29Smrg
6723f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
6733f012e29Smrg
6743f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
6753f012e29Smrg	ptr = ib_result_ce_cpu;
676d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
677d8807b2fSmrg		ptr[i++] = 0xc0008900;
678d8807b2fSmrg		ptr[i++] = 0;
679d8807b2fSmrg	}
680d8807b2fSmrg	ptr[i++] = 0xc0008400;
681d8807b2fSmrg	ptr[i++] = 1;
6823f012e29Smrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
683d8807b2fSmrg	ib_info[0].size = i;
6843f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
6853f012e29Smrg
6863f012e29Smrg	/* IT_WAIT_ON_CE_COUNTER */
6873f012e29Smrg	ptr = ib_result_cpu;
6883f012e29Smrg	ptr[0] = 0xc0008600;
6893f012e29Smrg	ptr[1] = 0x00000001;
6903f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address;
6913f012e29Smrg	ib_info[1].size = 2;
6923f012e29Smrg
6933f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
6943f012e29Smrg	ibs_request.number_of_ibs = 2;
6953f012e29Smrg	ibs_request.ibs = ib_info;
6963f012e29Smrg	ibs_request.resources = bo_list;
6973f012e29Smrg	ibs_request.fence_info.handle = NULL;
6983f012e29Smrg
6993f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
7003f012e29Smrg
7013f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7023f012e29Smrg
7033f012e29Smrg	fence_status.context = context_handle;
7043f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
7053f012e29Smrg	fence_status.ip_instance = 0;
7063f012e29Smrg	fence_status.fence = ibs_request.seq_no;
7073f012e29Smrg
7083f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
7093f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
7103f012e29Smrg					 0, &expired);
7113f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7123f012e29Smrg
7133f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
7143f012e29Smrg				     ib_result_mc_address, 4096);
7153f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7163f012e29Smrg
7173f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
7183f012e29Smrg				     ib_result_ce_mc_address, 4096);
7193f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7203f012e29Smrg
7213f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
7223f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7233f012e29Smrg
7243f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
7253f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7263f012e29Smrg
7273f012e29Smrg}
7283f012e29Smrg
7293f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void)
7303f012e29Smrg{
7313f012e29Smrg	amdgpu_context_handle context_handle;
7323f012e29Smrg	amdgpu_bo_handle ib_result_handle;
7333f012e29Smrg	void *ib_result_cpu;
7343f012e29Smrg	uint64_t ib_result_mc_address;
7353f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
7363f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
7373f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
7383f012e29Smrg	uint32_t *ptr;
7393f012e29Smrg	uint32_t expired;
7403f012e29Smrg	amdgpu_bo_list_handle bo_list;
7413f012e29Smrg	amdgpu_va_handle va_handle;
742d8807b2fSmrg	int r, i = 0;
7433f012e29Smrg
7443f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
7453f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7463f012e29Smrg
7473f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
7483f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
7493f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
7503f012e29Smrg				    &ib_result_mc_address, &va_handle);
7513f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7523f012e29Smrg
7533f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
7543f012e29Smrg			       &bo_list);
7553f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7563f012e29Smrg
7573f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
7583f012e29Smrg
7593f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
7603f012e29Smrg	ptr = ib_result_cpu;
761d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
762d8807b2fSmrg		ptr[i++] = 0xc0008900;
763d8807b2fSmrg		ptr[i++] = 0;
764d8807b2fSmrg	}
765d8807b2fSmrg	ptr[i++] = 0xc0008400;
766d8807b2fSmrg	ptr[i++] = 1;
7673f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address;
768d8807b2fSmrg	ib_info[0].size = i;
7693f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
7703f012e29Smrg
7713f012e29Smrg	ptr = (uint32_t *)ib_result_cpu + 4;
7723f012e29Smrg	ptr[0] = 0xc0008600;
7733f012e29Smrg	ptr[1] = 0x00000001;
7743f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
7753f012e29Smrg	ib_info[1].size = 2;
7763f012e29Smrg
7773f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
7783f012e29Smrg	ibs_request.number_of_ibs = 2;
7793f012e29Smrg	ibs_request.ibs = ib_info;
7803f012e29Smrg	ibs_request.resources = bo_list;
7813f012e29Smrg	ibs_request.fence_info.handle = NULL;
7823f012e29Smrg
7833f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
7843f012e29Smrg
7853f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7863f012e29Smrg
7873f012e29Smrg	fence_status.context = context_handle;
7883f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
7893f012e29Smrg	fence_status.ip_instance = 0;
7903f012e29Smrg	fence_status.fence = ibs_request.seq_no;
7913f012e29Smrg
7923f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
7933f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
7943f012e29Smrg					 0, &expired);
7953f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7963f012e29Smrg
7973f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
7983f012e29Smrg				     ib_result_mc_address, 4096);
7993f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8003f012e29Smrg
8013f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
8023f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8033f012e29Smrg
8043f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
8053f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8063f012e29Smrg}
8073f012e29Smrg
8083f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void)
8093f012e29Smrg{
8103f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
8113f012e29Smrg}
8123f012e29Smrg
8133f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void)
8143f012e29Smrg{
8153f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
8163f012e29Smrg}
8173f012e29Smrg
8183f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void)
8193f012e29Smrg{
8203f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
8213f012e29Smrg}
8223f012e29Smrg
82300a23bdaSmrgstatic void amdgpu_bo_eviction_test(void)
82400a23bdaSmrg{
82500a23bdaSmrg	const int sdma_write_length = 1024;
82600a23bdaSmrg	const int pm4_dw = 256;
82700a23bdaSmrg	amdgpu_context_handle context_handle;
82800a23bdaSmrg	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
82900a23bdaSmrg	amdgpu_bo_handle *resources;
83000a23bdaSmrg	uint32_t *pm4;
83100a23bdaSmrg	struct amdgpu_cs_ib_info *ib_info;
83200a23bdaSmrg	struct amdgpu_cs_request *ibs_request;
83300a23bdaSmrg	uint64_t bo1_mc, bo2_mc;
83400a23bdaSmrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
83500a23bdaSmrg	int i, j, r, loop1, loop2;
83600a23bdaSmrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
83700a23bdaSmrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
83800a23bdaSmrg	struct amdgpu_heap_info vram_info, gtt_info;
83900a23bdaSmrg
84000a23bdaSmrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
84100a23bdaSmrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
84200a23bdaSmrg
84300a23bdaSmrg	ib_info = calloc(1, sizeof(*ib_info));
84400a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
84500a23bdaSmrg
84600a23bdaSmrg	ibs_request = calloc(1, sizeof(*ibs_request));
84700a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
84800a23bdaSmrg
84900a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
85000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
85100a23bdaSmrg
85200a23bdaSmrg	/* prepare resource */
85300a23bdaSmrg	resources = calloc(4, sizeof(amdgpu_bo_handle));
85400a23bdaSmrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
85500a23bdaSmrg
85600a23bdaSmrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
85700a23bdaSmrg				   0, &vram_info);
85800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
85900a23bdaSmrg
86000a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
86100a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
86200a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
86300a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
86400a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
86500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
86600a23bdaSmrg
86700a23bdaSmrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
86800a23bdaSmrg				   0, &gtt_info);
86900a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
87000a23bdaSmrg
87100a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
87200a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
87300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
87400a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
87500a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
87600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
87700a23bdaSmrg
87800a23bdaSmrg
87900a23bdaSmrg
88000a23bdaSmrg	loop1 = loop2 = 0;
88100a23bdaSmrg	/* run 9 circle to test all mapping combination */
88200a23bdaSmrg	while(loop1 < 2) {
88300a23bdaSmrg		while(loop2 < 2) {
88400a23bdaSmrg			/* allocate UC bo1for sDMA use */
88500a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
88600a23bdaSmrg						    sdma_write_length, 4096,
88700a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
88800a23bdaSmrg						    gtt_flags[loop1], &bo1,
88900a23bdaSmrg						    (void**)&bo1_cpu, &bo1_mc,
89000a23bdaSmrg						    &bo1_va_handle);
89100a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
89200a23bdaSmrg
89300a23bdaSmrg			/* set bo1 */
89400a23bdaSmrg			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
89500a23bdaSmrg
89600a23bdaSmrg			/* allocate UC bo2 for sDMA use */
89700a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
89800a23bdaSmrg						    sdma_write_length, 4096,
89900a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
90000a23bdaSmrg						    gtt_flags[loop2], &bo2,
90100a23bdaSmrg						    (void**)&bo2_cpu, &bo2_mc,
90200a23bdaSmrg						    &bo2_va_handle);
90300a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
90400a23bdaSmrg
90500a23bdaSmrg			/* clear bo2 */
90600a23bdaSmrg			memset((void*)bo2_cpu, 0, sdma_write_length);
90700a23bdaSmrg
90800a23bdaSmrg			resources[0] = bo1;
90900a23bdaSmrg			resources[1] = bo2;
91000a23bdaSmrg			resources[2] = vram_max[loop2];
91100a23bdaSmrg			resources[3] = gtt_max[loop2];
91200a23bdaSmrg
91300a23bdaSmrg			/* fulfill PM4: test DMA copy linear */
91400a23bdaSmrg			i = j = 0;
91500a23bdaSmrg			if (family_id == AMDGPU_FAMILY_SI) {
91600a23bdaSmrg				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
91700a23bdaSmrg							  sdma_write_length);
91800a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
91900a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
92000a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
92100a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
92200a23bdaSmrg			} else {
92300a23bdaSmrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
92400a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
92500a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
92600a23bdaSmrg				else
92700a23bdaSmrg					pm4[i++] = sdma_write_length;
92800a23bdaSmrg				pm4[i++] = 0;
92900a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
93000a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
93100a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
93200a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
93300a23bdaSmrg			}
93400a23bdaSmrg
93500a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
93600a23bdaSmrg						   AMDGPU_HW_IP_DMA, 0,
93700a23bdaSmrg						   i, pm4,
93800a23bdaSmrg						   4, resources,
93900a23bdaSmrg						   ib_info, ibs_request);
94000a23bdaSmrg
94100a23bdaSmrg			/* verify if SDMA test result meets with expected */
94200a23bdaSmrg			i = 0;
94300a23bdaSmrg			while(i < sdma_write_length) {
94400a23bdaSmrg				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
94500a23bdaSmrg			}
94600a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
94700a23bdaSmrg						     sdma_write_length);
94800a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
94900a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
95000a23bdaSmrg						     sdma_write_length);
95100a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
95200a23bdaSmrg			loop2++;
95300a23bdaSmrg		}
95400a23bdaSmrg		loop2 = 0;
95500a23bdaSmrg		loop1++;
95600a23bdaSmrg	}
95700a23bdaSmrg	amdgpu_bo_free(vram_max[0]);
95800a23bdaSmrg	amdgpu_bo_free(vram_max[1]);
95900a23bdaSmrg	amdgpu_bo_free(gtt_max[0]);
96000a23bdaSmrg	amdgpu_bo_free(gtt_max[1]);
96100a23bdaSmrg	/* clean resources */
96200a23bdaSmrg	free(resources);
96300a23bdaSmrg	free(ibs_request);
96400a23bdaSmrg	free(ib_info);
96500a23bdaSmrg	free(pm4);
96600a23bdaSmrg
96700a23bdaSmrg	/* end of test */
96800a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle);
96900a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
97000a23bdaSmrg}
97100a23bdaSmrg
97200a23bdaSmrg
9733f012e29Smrgstatic void amdgpu_command_submission_gfx(void)
9743f012e29Smrg{
9753f012e29Smrg	/* write data using the CP */
9763f012e29Smrg	amdgpu_command_submission_gfx_cp_write_data();
9773f012e29Smrg	/* const fill using the CP */
9783f012e29Smrg	amdgpu_command_submission_gfx_cp_const_fill();
9793f012e29Smrg	/* copy data using the CP */
9803f012e29Smrg	amdgpu_command_submission_gfx_cp_copy_data();
9813f012e29Smrg	/* separate IB buffers for multi-IB submission */
9823f012e29Smrg	amdgpu_command_submission_gfx_separate_ibs();
9833f012e29Smrg	/* shared IB buffer for multi-IB submission */
9843f012e29Smrg	amdgpu_command_submission_gfx_shared_ib();
9853f012e29Smrg}
9863f012e29Smrg
9873f012e29Smrgstatic void amdgpu_semaphore_test(void)
9883f012e29Smrg{
9893f012e29Smrg	amdgpu_context_handle context_handle[2];
9903f012e29Smrg	amdgpu_semaphore_handle sem;
9913f012e29Smrg	amdgpu_bo_handle ib_result_handle[2];
9923f012e29Smrg	void *ib_result_cpu[2];
9933f012e29Smrg	uint64_t ib_result_mc_address[2];
9943f012e29Smrg	struct amdgpu_cs_request ibs_request[2] = {0};
9953f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2] = {0};
9963f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
9973f012e29Smrg	uint32_t *ptr;
9983f012e29Smrg	uint32_t expired;
999d8807b2fSmrg	uint32_t sdma_nop, gfx_nop;
10003f012e29Smrg	amdgpu_bo_list_handle bo_list[2];
10013f012e29Smrg	amdgpu_va_handle va_handle[2];
10023f012e29Smrg	int r, i;
10033f012e29Smrg
1004d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI) {
1005d8807b2fSmrg		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1006d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP_SI;
1007d8807b2fSmrg	} else {
1008d8807b2fSmrg		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1009d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP;
1010d8807b2fSmrg	}
1011d8807b2fSmrg
10123f012e29Smrg	r = amdgpu_cs_create_semaphore(&sem);
10133f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10143f012e29Smrg	for (i = 0; i < 2; i++) {
10153f012e29Smrg		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
10163f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10173f012e29Smrg
10183f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
10193f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
10203f012e29Smrg					    &ib_result_handle[i], &ib_result_cpu[i],
10213f012e29Smrg					    &ib_result_mc_address[i], &va_handle[i]);
10223f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10233f012e29Smrg
10243f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
10253f012e29Smrg				       NULL, &bo_list[i]);
10263f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10273f012e29Smrg	}
10283f012e29Smrg
10293f012e29Smrg	/* 1. same context different engine */
10303f012e29Smrg	ptr = ib_result_cpu[0];
1031d8807b2fSmrg	ptr[0] = sdma_nop;
10323f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
10333f012e29Smrg	ib_info[0].size = 1;
10343f012e29Smrg
10353f012e29Smrg	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
10363f012e29Smrg	ibs_request[0].number_of_ibs = 1;
10373f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
10383f012e29Smrg	ibs_request[0].resources = bo_list[0];
10393f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
10403f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
10413f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10423f012e29Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
10433f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10443f012e29Smrg
10453f012e29Smrg	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
10463f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10473f012e29Smrg	ptr = ib_result_cpu[1];
1048d8807b2fSmrg	ptr[0] = gfx_nop;
10493f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
10503f012e29Smrg	ib_info[1].size = 1;
10513f012e29Smrg
10523f012e29Smrg	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
10533f012e29Smrg	ibs_request[1].number_of_ibs = 1;
10543f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
10553f012e29Smrg	ibs_request[1].resources = bo_list[1];
10563f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
10573f012e29Smrg
10583f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
10593f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10603f012e29Smrg
10613f012e29Smrg	fence_status.context = context_handle[0];
10623f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
10633f012e29Smrg	fence_status.ip_instance = 0;
10643f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
10653f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
10663f012e29Smrg					 500000000, 0, &expired);
10673f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10683f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
10693f012e29Smrg
10703f012e29Smrg	/* 2. same engine different context */
10713f012e29Smrg	ptr = ib_result_cpu[0];
1072d8807b2fSmrg	ptr[0] = gfx_nop;
10733f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
10743f012e29Smrg	ib_info[0].size = 1;
10753f012e29Smrg
10763f012e29Smrg	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
10773f012e29Smrg	ibs_request[0].number_of_ibs = 1;
10783f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
10793f012e29Smrg	ibs_request[0].resources = bo_list[0];
10803f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
10813f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
10823f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10833f012e29Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
10843f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10853f012e29Smrg
10863f012e29Smrg	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
10873f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10883f012e29Smrg	ptr = ib_result_cpu[1];
1089d8807b2fSmrg	ptr[0] = gfx_nop;
10903f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
10913f012e29Smrg	ib_info[1].size = 1;
10923f012e29Smrg
10933f012e29Smrg	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
10943f012e29Smrg	ibs_request[1].number_of_ibs = 1;
10953f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
10963f012e29Smrg	ibs_request[1].resources = bo_list[1];
10973f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
10983f012e29Smrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
10993f012e29Smrg
11003f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11013f012e29Smrg
11023f012e29Smrg	fence_status.context = context_handle[1];
11033f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
11043f012e29Smrg	fence_status.ip_instance = 0;
11053f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
11063f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
11073f012e29Smrg					 500000000, 0, &expired);
11083f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11093f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
1110d8807b2fSmrg
11113f012e29Smrg	for (i = 0; i < 2; i++) {
11123f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
11133f012e29Smrg					     ib_result_mc_address[i], 4096);
11143f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11153f012e29Smrg
11163f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list[i]);
11173f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11183f012e29Smrg
11193f012e29Smrg		r = amdgpu_cs_ctx_free(context_handle[i]);
11203f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11213f012e29Smrg	}
11223f012e29Smrg
11233f012e29Smrg	r = amdgpu_cs_destroy_semaphore(sem);
11243f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11253f012e29Smrg}
11263f012e29Smrg
11273f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void)
11283f012e29Smrg{
11293f012e29Smrg	amdgpu_context_handle context_handle;
11303f012e29Smrg	amdgpu_bo_handle ib_result_handle;
11313f012e29Smrg	void *ib_result_cpu;
11323f012e29Smrg	uint64_t ib_result_mc_address;
11333f012e29Smrg	struct amdgpu_cs_request ibs_request;
11343f012e29Smrg	struct amdgpu_cs_ib_info ib_info;
11353f012e29Smrg	struct amdgpu_cs_fence fence_status;
11363f012e29Smrg	uint32_t *ptr;
11373f012e29Smrg	uint32_t expired;
113800a23bdaSmrg	int r, instance;
11393f012e29Smrg	amdgpu_bo_list_handle bo_list;
11403f012e29Smrg	amdgpu_va_handle va_handle;
1141d8807b2fSmrg	struct drm_amdgpu_info_hw_ip info;
1142d8807b2fSmrg
1143d8807b2fSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1144d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
11453f012e29Smrg
11463f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
11473f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11483f012e29Smrg
1149d8807b2fSmrg	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
11503f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
11513f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
11523f012e29Smrg					    &ib_result_handle, &ib_result_cpu,
11533f012e29Smrg					    &ib_result_mc_address, &va_handle);
11543f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11553f012e29Smrg
11563f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
11573f012e29Smrg				       &bo_list);
11583f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11593f012e29Smrg
11603f012e29Smrg		ptr = ib_result_cpu;
1161d8807b2fSmrg		memset(ptr, 0, 16);
1162d8807b2fSmrg		ptr[0]=PACKET3(PACKET3_NOP, 14);
11633f012e29Smrg
11643f012e29Smrg		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
11653f012e29Smrg		ib_info.ib_mc_address = ib_result_mc_address;
11663f012e29Smrg		ib_info.size = 16;
11673f012e29Smrg
11683f012e29Smrg		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
11693f012e29Smrg		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
11703f012e29Smrg		ibs_request.ring = instance;
11713f012e29Smrg		ibs_request.number_of_ibs = 1;
11723f012e29Smrg		ibs_request.ibs = &ib_info;
11733f012e29Smrg		ibs_request.resources = bo_list;
11743f012e29Smrg		ibs_request.fence_info.handle = NULL;
11753f012e29Smrg
11763f012e29Smrg		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
11773f012e29Smrg		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
11783f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11793f012e29Smrg
11803f012e29Smrg		fence_status.context = context_handle;
11813f012e29Smrg		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
11823f012e29Smrg		fence_status.ip_instance = 0;
11833f012e29Smrg		fence_status.ring = instance;
11843f012e29Smrg		fence_status.fence = ibs_request.seq_no;
11853f012e29Smrg
11863f012e29Smrg		r = amdgpu_cs_query_fence_status(&fence_status,
11873f012e29Smrg						 AMDGPU_TIMEOUT_INFINITE,
11883f012e29Smrg						 0, &expired);
11893f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11903f012e29Smrg
11913f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list);
11923f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11933f012e29Smrg
11943f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
11953f012e29Smrg					     ib_result_mc_address, 4096);
11963f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11973f012e29Smrg	}
11983f012e29Smrg
11993f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
12003f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12013f012e29Smrg}
12023f012e29Smrg
12033f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void)
12043f012e29Smrg{
12053f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
12063f012e29Smrg}
12073f012e29Smrg
12083f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void)
12093f012e29Smrg{
12103f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
12113f012e29Smrg}
12123f012e29Smrg
12133f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void)
12143f012e29Smrg{
12153f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
12163f012e29Smrg}
12173f012e29Smrg
12183f012e29Smrgstatic void amdgpu_command_submission_compute(void)
12193f012e29Smrg{
12203f012e29Smrg	/* write data using the CP */
12213f012e29Smrg	amdgpu_command_submission_compute_cp_write_data();
12223f012e29Smrg	/* const fill using the CP */
12233f012e29Smrg	amdgpu_command_submission_compute_cp_const_fill();
12243f012e29Smrg	/* copy data using the CP */
12253f012e29Smrg	amdgpu_command_submission_compute_cp_copy_data();
12263f012e29Smrg	/* nop test */
12273f012e29Smrg	amdgpu_command_submission_compute_nop();
12283f012e29Smrg}
12293f012e29Smrg
12303f012e29Smrg/*
12313f012e29Smrg * caller need create/release:
12323f012e29Smrg * pm4_src, resources, ib_info, and ibs_request
12333f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished
12343f012e29Smrg */
12353f012e29Smrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
12363f012e29Smrg				       unsigned ip_type,
12373f012e29Smrg				       int instance, int pm4_dw, uint32_t *pm4_src,
12383f012e29Smrg				       int res_cnt, amdgpu_bo_handle *resources,
12393f012e29Smrg				       struct amdgpu_cs_ib_info *ib_info,
12403f012e29Smrg				       struct amdgpu_cs_request *ibs_request)
12413f012e29Smrg{
12423f012e29Smrg	int r;
12433f012e29Smrg	uint32_t expired;
12443f012e29Smrg	uint32_t *ring_ptr;
12453f012e29Smrg	amdgpu_bo_handle ib_result_handle;
12463f012e29Smrg	void *ib_result_cpu;
12473f012e29Smrg	uint64_t ib_result_mc_address;
12483f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
12493f012e29Smrg	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
12503f012e29Smrg	amdgpu_va_handle va_handle;
12513f012e29Smrg
12523f012e29Smrg	/* prepare CS */
12533f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
12543f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
12553f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
12563f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
12573f012e29Smrg	CU_ASSERT_TRUE(pm4_dw <= 1024);
12583f012e29Smrg
12593f012e29Smrg	/* allocate IB */
12603f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
12613f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
12623f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
12633f012e29Smrg				    &ib_result_mc_address, &va_handle);
12643f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12653f012e29Smrg
12663f012e29Smrg	/* copy PM4 packet to ring from caller */
12673f012e29Smrg	ring_ptr = ib_result_cpu;
12683f012e29Smrg	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
12693f012e29Smrg
12703f012e29Smrg	ib_info->ib_mc_address = ib_result_mc_address;
12713f012e29Smrg	ib_info->size = pm4_dw;
12723f012e29Smrg
12733f012e29Smrg	ibs_request->ip_type = ip_type;
12743f012e29Smrg	ibs_request->ring = instance;
12753f012e29Smrg	ibs_request->number_of_ibs = 1;
12763f012e29Smrg	ibs_request->ibs = ib_info;
12773f012e29Smrg	ibs_request->fence_info.handle = NULL;
12783f012e29Smrg
12793f012e29Smrg	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
12803f012e29Smrg	all_res[res_cnt] = ib_result_handle;
12813f012e29Smrg
12823f012e29Smrg	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
12833f012e29Smrg				  NULL, &ibs_request->resources);
12843f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12853f012e29Smrg
12863f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
12873f012e29Smrg
12883f012e29Smrg	/* submit CS */
12893f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
12903f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12913f012e29Smrg
12923f012e29Smrg	r = amdgpu_bo_list_destroy(ibs_request->resources);
12933f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12943f012e29Smrg
12953f012e29Smrg	fence_status.ip_type = ip_type;
12963f012e29Smrg	fence_status.ip_instance = 0;
12973f012e29Smrg	fence_status.ring = ibs_request->ring;
12983f012e29Smrg	fence_status.context = context_handle;
12993f012e29Smrg	fence_status.fence = ibs_request->seq_no;
13003f012e29Smrg
13013f012e29Smrg	/* wait for IB accomplished */
13023f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
13033f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
13043f012e29Smrg					 0, &expired);
13053f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13063f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
13073f012e29Smrg
13083f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
13093f012e29Smrg				     ib_result_mc_address, 4096);
13103f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13113f012e29Smrg}
13123f012e29Smrg
13133f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
13143f012e29Smrg{
13153f012e29Smrg	const int sdma_write_length = 128;
13163f012e29Smrg	const int pm4_dw = 256;
13173f012e29Smrg	amdgpu_context_handle context_handle;
13183f012e29Smrg	amdgpu_bo_handle bo;
13193f012e29Smrg	amdgpu_bo_handle *resources;
13203f012e29Smrg	uint32_t *pm4;
13213f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
13223f012e29Smrg	struct amdgpu_cs_request *ibs_request;
13233f012e29Smrg	uint64_t bo_mc;
13243f012e29Smrg	volatile uint32_t *bo_cpu;
132500a23bdaSmrg	int i, j, r, loop, ring_id;
13263f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
13273f012e29Smrg	amdgpu_va_handle va_handle;
132800a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
13293f012e29Smrg
13303f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
13313f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
13323f012e29Smrg
13333f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
13343f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
13353f012e29Smrg
13363f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
13373f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
13383f012e29Smrg
133900a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
134000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
134100a23bdaSmrg
13423f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
13433f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13443f012e29Smrg
13453f012e29Smrg	/* prepare resource */
13463f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
13473f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
13483f012e29Smrg
134900a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
135000a23bdaSmrg		loop = 0;
135100a23bdaSmrg		while(loop < 2) {
135200a23bdaSmrg			/* allocate UC bo for sDMA use */
135300a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
135400a23bdaSmrg						    sdma_write_length * sizeof(uint32_t),
135500a23bdaSmrg						    4096, AMDGPU_GEM_DOMAIN_GTT,
135600a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
135700a23bdaSmrg						    &bo_mc, &va_handle);
135800a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
13593f012e29Smrg
136000a23bdaSmrg			/* clear bo */
136100a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
13623f012e29Smrg
136300a23bdaSmrg			resources[0] = bo;
13643f012e29Smrg
136500a23bdaSmrg			/* fulfill PM4: test DMA write-linear */
136600a23bdaSmrg			i = j = 0;
136700a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
136800a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI)
136900a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
137000a23bdaSmrg								  sdma_write_length);
137100a23bdaSmrg				else
137200a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
137300a23bdaSmrg							       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
137400a23bdaSmrg				pm4[i++] = 0xffffffff & bo_mc;
137500a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
137600a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
137700a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
137800a23bdaSmrg				else if (family_id != AMDGPU_FAMILY_SI)
137900a23bdaSmrg					pm4[i++] = sdma_write_length;
138000a23bdaSmrg				while(j++ < sdma_write_length)
138100a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
138200a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
138300a23bdaSmrg				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
138400a23bdaSmrg				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
138500a23bdaSmrg				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
138600a23bdaSmrg				pm4[i++] = 0xfffffffc & bo_mc;
138700a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
138800a23bdaSmrg				while(j++ < sdma_write_length)
138900a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
139000a23bdaSmrg			}
13913f012e29Smrg
139200a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
139300a23bdaSmrg						   ip_type, ring_id,
139400a23bdaSmrg						   i, pm4,
139500a23bdaSmrg						   1, resources,
139600a23bdaSmrg						   ib_info, ibs_request);
13973f012e29Smrg
139800a23bdaSmrg			/* verify if SDMA test result meets with expected */
139900a23bdaSmrg			i = 0;
140000a23bdaSmrg			while(i < sdma_write_length) {
140100a23bdaSmrg				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
140200a23bdaSmrg			}
14033f012e29Smrg
140400a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
140500a23bdaSmrg						     sdma_write_length * sizeof(uint32_t));
140600a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
140700a23bdaSmrg			loop++;
14083f012e29Smrg		}
14093f012e29Smrg	}
14103f012e29Smrg	/* clean resources */
14113f012e29Smrg	free(resources);
14123f012e29Smrg	free(ibs_request);
14133f012e29Smrg	free(ib_info);
14143f012e29Smrg	free(pm4);
14153f012e29Smrg
14163f012e29Smrg	/* end of test */
14173f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
14183f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
14193f012e29Smrg}
14203f012e29Smrg
14213f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void)
14223f012e29Smrg{
14233f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
14243f012e29Smrg}
14253f012e29Smrg
14263f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
14273f012e29Smrg{
14283f012e29Smrg	const int sdma_write_length = 1024 * 1024;
14293f012e29Smrg	const int pm4_dw = 256;
14303f012e29Smrg	amdgpu_context_handle context_handle;
14313f012e29Smrg	amdgpu_bo_handle bo;
14323f012e29Smrg	amdgpu_bo_handle *resources;
14333f012e29Smrg	uint32_t *pm4;
14343f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
14353f012e29Smrg	struct amdgpu_cs_request *ibs_request;
14363f012e29Smrg	uint64_t bo_mc;
14373f012e29Smrg	volatile uint32_t *bo_cpu;
143800a23bdaSmrg	int i, j, r, loop, ring_id;
14393f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
14403f012e29Smrg	amdgpu_va_handle va_handle;
144100a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
14423f012e29Smrg
14433f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
14443f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
14453f012e29Smrg
14463f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
14473f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
14483f012e29Smrg
14493f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
14503f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
14513f012e29Smrg
145200a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
145300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
145400a23bdaSmrg
14553f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
14563f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
14573f012e29Smrg
14583f012e29Smrg	/* prepare resource */
14593f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
14603f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
14613f012e29Smrg
146200a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
146300a23bdaSmrg		loop = 0;
146400a23bdaSmrg		while(loop < 2) {
146500a23bdaSmrg			/* allocate UC bo for sDMA use */
146600a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
146700a23bdaSmrg						    sdma_write_length, 4096,
146800a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
146900a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
147000a23bdaSmrg						    &bo_mc, &va_handle);
147100a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
14723f012e29Smrg
147300a23bdaSmrg			/* clear bo */
147400a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length);
14753f012e29Smrg
147600a23bdaSmrg			resources[0] = bo;
14773f012e29Smrg
147800a23bdaSmrg			/* fulfill PM4: test DMA const fill */
147900a23bdaSmrg			i = j = 0;
148000a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
148100a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
148200a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
148300a23bdaSmrg								  0, 0, 0,
148400a23bdaSmrg								  sdma_write_length / 4);
148500a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
148600a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
148700a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
148800a23bdaSmrg				} else {
148900a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
149000a23bdaSmrg							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
149100a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
149200a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
149300a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
149400a23bdaSmrg					if (family_id >= AMDGPU_FAMILY_AI)
149500a23bdaSmrg						pm4[i++] = sdma_write_length - 1;
149600a23bdaSmrg					else
149700a23bdaSmrg						pm4[i++] = sdma_write_length;
149800a23bdaSmrg				}
149900a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
150000a23bdaSmrg				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
150100a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
150200a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
150300a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
150400a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
150500a23bdaSmrg						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
150600a23bdaSmrg						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
150700a23bdaSmrg						   PACKET3_DMA_DATA_SI_CP_SYNC;
150800a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
150900a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1510d8807b2fSmrg					pm4[i++] = sdma_write_length;
151100a23bdaSmrg				} else {
151200a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
151300a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
151400a23bdaSmrg						   PACKET3_DMA_DATA_DST_SEL(0) |
151500a23bdaSmrg						   PACKET3_DMA_DATA_SRC_SEL(2) |
151600a23bdaSmrg						   PACKET3_DMA_DATA_CP_SYNC;
151700a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
151800a23bdaSmrg					pm4[i++] = 0;
151900a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
152000a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
152100a23bdaSmrg					pm4[i++] = sdma_write_length;
152200a23bdaSmrg				}
1523d8807b2fSmrg			}
15243f012e29Smrg
152500a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
152600a23bdaSmrg						   ip_type, ring_id,
152700a23bdaSmrg						   i, pm4,
152800a23bdaSmrg						   1, resources,
152900a23bdaSmrg						   ib_info, ibs_request);
15303f012e29Smrg
153100a23bdaSmrg			/* verify if SDMA test result meets with expected */
153200a23bdaSmrg			i = 0;
153300a23bdaSmrg			while(i < (sdma_write_length / 4)) {
153400a23bdaSmrg				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
153500a23bdaSmrg			}
15363f012e29Smrg
153700a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
153800a23bdaSmrg						     sdma_write_length);
153900a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
154000a23bdaSmrg			loop++;
154100a23bdaSmrg		}
15423f012e29Smrg	}
15433f012e29Smrg	/* clean resources */
15443f012e29Smrg	free(resources);
15453f012e29Smrg	free(ibs_request);
15463f012e29Smrg	free(ib_info);
15473f012e29Smrg	free(pm4);
15483f012e29Smrg
15493f012e29Smrg	/* end of test */
15503f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
15513f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15523f012e29Smrg}
15533f012e29Smrg
15543f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void)
15553f012e29Smrg{
15563f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
15573f012e29Smrg}
15583f012e29Smrg
15593f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
15603f012e29Smrg{
15613f012e29Smrg	const int sdma_write_length = 1024;
15623f012e29Smrg	const int pm4_dw = 256;
15633f012e29Smrg	amdgpu_context_handle context_handle;
15643f012e29Smrg	amdgpu_bo_handle bo1, bo2;
15653f012e29Smrg	amdgpu_bo_handle *resources;
15663f012e29Smrg	uint32_t *pm4;
15673f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
15683f012e29Smrg	struct amdgpu_cs_request *ibs_request;
15693f012e29Smrg	uint64_t bo1_mc, bo2_mc;
15703f012e29Smrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
157100a23bdaSmrg	int i, j, r, loop1, loop2, ring_id;
15723f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
15733f012e29Smrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
157400a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
15753f012e29Smrg
15763f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
15773f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
15783f012e29Smrg
15793f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
15803f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
15813f012e29Smrg
15823f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
15833f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
15843f012e29Smrg
158500a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
158600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
158700a23bdaSmrg
15883f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
15893f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15903f012e29Smrg
15913f012e29Smrg	/* prepare resource */
15923f012e29Smrg	resources = calloc(2, sizeof(amdgpu_bo_handle));
15933f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
15943f012e29Smrg
159500a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
159600a23bdaSmrg		loop1 = loop2 = 0;
159700a23bdaSmrg		/* run 9 circle to test all mapping combination */
159800a23bdaSmrg		while(loop1 < 2) {
159900a23bdaSmrg			while(loop2 < 2) {
160000a23bdaSmrg				/* allocate UC bo1for sDMA use */
160100a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
160200a23bdaSmrg							    sdma_write_length, 4096,
160300a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
160400a23bdaSmrg							    gtt_flags[loop1], &bo1,
160500a23bdaSmrg							    (void**)&bo1_cpu, &bo1_mc,
160600a23bdaSmrg							    &bo1_va_handle);
160700a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
160800a23bdaSmrg
160900a23bdaSmrg				/* set bo1 */
161000a23bdaSmrg				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
161100a23bdaSmrg
161200a23bdaSmrg				/* allocate UC bo2 for sDMA use */
161300a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
161400a23bdaSmrg							    sdma_write_length, 4096,
161500a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
161600a23bdaSmrg							    gtt_flags[loop2], &bo2,
161700a23bdaSmrg							    (void**)&bo2_cpu, &bo2_mc,
161800a23bdaSmrg							    &bo2_va_handle);
161900a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
162000a23bdaSmrg
162100a23bdaSmrg				/* clear bo2 */
162200a23bdaSmrg				memset((void*)bo2_cpu, 0, sdma_write_length);
162300a23bdaSmrg
162400a23bdaSmrg				resources[0] = bo1;
162500a23bdaSmrg				resources[1] = bo2;
162600a23bdaSmrg
162700a23bdaSmrg				/* fulfill PM4: test DMA copy linear */
162800a23bdaSmrg				i = j = 0;
162900a23bdaSmrg				if (ip_type == AMDGPU_HW_IP_DMA) {
163000a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
163100a23bdaSmrg						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
163200a23bdaSmrg									  0, 0, 0,
163300a23bdaSmrg									  sdma_write_length);
163400a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
163500a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
163600a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
163700a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
163800a23bdaSmrg					} else {
163900a23bdaSmrg						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
164000a23bdaSmrg								       SDMA_COPY_SUB_OPCODE_LINEAR,
164100a23bdaSmrg								       0);
164200a23bdaSmrg						if (family_id >= AMDGPU_FAMILY_AI)
164300a23bdaSmrg							pm4[i++] = sdma_write_length - 1;
164400a23bdaSmrg						else
164500a23bdaSmrg							pm4[i++] = sdma_write_length;
164600a23bdaSmrg						pm4[i++] = 0;
164700a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
164800a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
164900a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
165000a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
165100a23bdaSmrg					}
165200a23bdaSmrg				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
165300a23bdaSmrg					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
165400a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
165500a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
165600a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
165700a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
165800a23bdaSmrg							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
165900a23bdaSmrg							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
166000a23bdaSmrg							   PACKET3_DMA_DATA_SI_CP_SYNC |
166100a23bdaSmrg							   (0xffff00000000 & bo1_mc) >> 32;
166200a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
166300a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1664d8807b2fSmrg						pm4[i++] = sdma_write_length;
166500a23bdaSmrg					} else {
166600a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
166700a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
166800a23bdaSmrg							   PACKET3_DMA_DATA_DST_SEL(0) |
166900a23bdaSmrg							   PACKET3_DMA_DATA_SRC_SEL(0) |
167000a23bdaSmrg							   PACKET3_DMA_DATA_CP_SYNC;
167100a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
167200a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
167300a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
167400a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
167500a23bdaSmrg						pm4[i++] = sdma_write_length;
167600a23bdaSmrg					}
1677d8807b2fSmrg				}
16783f012e29Smrg
167900a23bdaSmrg				amdgpu_test_exec_cs_helper(context_handle,
168000a23bdaSmrg							   ip_type, ring_id,
168100a23bdaSmrg							   i, pm4,
168200a23bdaSmrg							   2, resources,
168300a23bdaSmrg							   ib_info, ibs_request);
16843f012e29Smrg
168500a23bdaSmrg				/* verify if SDMA test result meets with expected */
168600a23bdaSmrg				i = 0;
168700a23bdaSmrg				while(i < sdma_write_length) {
168800a23bdaSmrg					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
168900a23bdaSmrg				}
169000a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
169100a23bdaSmrg							     sdma_write_length);
169200a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
169300a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
169400a23bdaSmrg							     sdma_write_length);
169500a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
169600a23bdaSmrg				loop2++;
16973f012e29Smrg			}
169800a23bdaSmrg			loop1++;
16993f012e29Smrg		}
17003f012e29Smrg	}
17013f012e29Smrg	/* clean resources */
17023f012e29Smrg	free(resources);
17033f012e29Smrg	free(ibs_request);
17043f012e29Smrg	free(ib_info);
17053f012e29Smrg	free(pm4);
17063f012e29Smrg
17073f012e29Smrg	/* end of test */
17083f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
17093f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17103f012e29Smrg}
17113f012e29Smrg
17123f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void)
17133f012e29Smrg{
17143f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
17153f012e29Smrg}
17163f012e29Smrg
17173f012e29Smrgstatic void amdgpu_command_submission_sdma(void)
17183f012e29Smrg{
17193f012e29Smrg	amdgpu_command_submission_sdma_write_linear();
17203f012e29Smrg	amdgpu_command_submission_sdma_const_fill();
17213f012e29Smrg	amdgpu_command_submission_sdma_copy_linear();
17223f012e29Smrg}
17233f012e29Smrg
1724d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1725d8807b2fSmrg{
1726d8807b2fSmrg	amdgpu_context_handle context_handle;
1727d8807b2fSmrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1728d8807b2fSmrg	void *ib_result_cpu, *ib_result_ce_cpu;
1729d8807b2fSmrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1730d8807b2fSmrg	struct amdgpu_cs_request ibs_request[2] = {0};
1731d8807b2fSmrg	struct amdgpu_cs_ib_info ib_info[2];
1732d8807b2fSmrg	struct amdgpu_cs_fence fence_status[2] = {0};
1733d8807b2fSmrg	uint32_t *ptr;
1734d8807b2fSmrg	uint32_t expired;
1735d8807b2fSmrg	amdgpu_bo_list_handle bo_list;
1736d8807b2fSmrg	amdgpu_va_handle va_handle, va_handle_ce;
1737d8807b2fSmrg	int r;
1738d8807b2fSmrg	int i = 0, ib_cs_num = 2;
1739d8807b2fSmrg
1740d8807b2fSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1741d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1742d8807b2fSmrg
1743d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1744d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
1745d8807b2fSmrg				    &ib_result_handle, &ib_result_cpu,
1746d8807b2fSmrg				    &ib_result_mc_address, &va_handle);
1747d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1748d8807b2fSmrg
1749d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1750d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
1751d8807b2fSmrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
1752d8807b2fSmrg				    &ib_result_ce_mc_address, &va_handle_ce);
1753d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1754d8807b2fSmrg
1755d8807b2fSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1756d8807b2fSmrg			       ib_result_ce_handle, &bo_list);
1757d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1758d8807b2fSmrg
1759d8807b2fSmrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1760d8807b2fSmrg
1761d8807b2fSmrg	/* IT_SET_CE_DE_COUNTERS */
1762d8807b2fSmrg	ptr = ib_result_ce_cpu;
1763d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
1764d8807b2fSmrg		ptr[i++] = 0xc0008900;
1765d8807b2fSmrg		ptr[i++] = 0;
1766d8807b2fSmrg	}
1767d8807b2fSmrg	ptr[i++] = 0xc0008400;
1768d8807b2fSmrg	ptr[i++] = 1;
1769d8807b2fSmrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1770d8807b2fSmrg	ib_info[0].size = i;
1771d8807b2fSmrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1772d8807b2fSmrg
1773d8807b2fSmrg	/* IT_WAIT_ON_CE_COUNTER */
1774d8807b2fSmrg	ptr = ib_result_cpu;
1775d8807b2fSmrg	ptr[0] = 0xc0008600;
1776d8807b2fSmrg	ptr[1] = 0x00000001;
1777d8807b2fSmrg	ib_info[1].ib_mc_address = ib_result_mc_address;
1778d8807b2fSmrg	ib_info[1].size = 2;
1779d8807b2fSmrg
1780d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
1781d8807b2fSmrg		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1782d8807b2fSmrg		ibs_request[i].number_of_ibs = 2;
1783d8807b2fSmrg		ibs_request[i].ibs = ib_info;
1784d8807b2fSmrg		ibs_request[i].resources = bo_list;
1785d8807b2fSmrg		ibs_request[i].fence_info.handle = NULL;
1786d8807b2fSmrg	}
1787d8807b2fSmrg
1788d8807b2fSmrg	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1789d8807b2fSmrg
1790d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1791d8807b2fSmrg
1792d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
1793d8807b2fSmrg		fence_status[i].context = context_handle;
1794d8807b2fSmrg		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1795d8807b2fSmrg		fence_status[i].fence = ibs_request[i].seq_no;
1796d8807b2fSmrg	}
1797d8807b2fSmrg
1798d8807b2fSmrg	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1799d8807b2fSmrg				AMDGPU_TIMEOUT_INFINITE,
1800d8807b2fSmrg				&expired, NULL);
1801d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1802d8807b2fSmrg
1803d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1804d8807b2fSmrg				     ib_result_mc_address, 4096);
1805d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1806d8807b2fSmrg
1807d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1808d8807b2fSmrg				     ib_result_ce_mc_address, 4096);
1809d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1810d8807b2fSmrg
1811d8807b2fSmrg	r = amdgpu_bo_list_destroy(bo_list);
1812d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1813d8807b2fSmrg
1814d8807b2fSmrg	r = amdgpu_cs_ctx_free(context_handle);
1815d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1816d8807b2fSmrg}
1817d8807b2fSmrg
1818d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void)
1819d8807b2fSmrg{
1820d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(true);
1821d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(false);
1822d8807b2fSmrg}
1823d8807b2fSmrg
18243f012e29Smrgstatic void amdgpu_userptr_test(void)
18253f012e29Smrg{
18263f012e29Smrg	int i, r, j;
18273f012e29Smrg	uint32_t *pm4 = NULL;
18283f012e29Smrg	uint64_t bo_mc;
18293f012e29Smrg	void *ptr = NULL;
18303f012e29Smrg	int pm4_dw = 256;
18313f012e29Smrg	int sdma_write_length = 4;
18323f012e29Smrg	amdgpu_bo_handle handle;
18333f012e29Smrg	amdgpu_context_handle context_handle;
18343f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
18353f012e29Smrg	struct amdgpu_cs_request *ibs_request;
18363f012e29Smrg	amdgpu_bo_handle buf_handle;
18373f012e29Smrg	amdgpu_va_handle va_handle;
18383f012e29Smrg
18393f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
18403f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
18413f012e29Smrg
18423f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
18433f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
18443f012e29Smrg
18453f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
18463f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
18473f012e29Smrg
18483f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
18493f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18503f012e29Smrg
18513f012e29Smrg	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
18523f012e29Smrg	CU_ASSERT_NOT_EQUAL(ptr, NULL);
18533f012e29Smrg	memset(ptr, 0, BUFFER_SIZE);
18543f012e29Smrg
18553f012e29Smrg	r = amdgpu_create_bo_from_user_mem(device_handle,
18563f012e29Smrg					   ptr, BUFFER_SIZE, &buf_handle);
18573f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18583f012e29Smrg
18593f012e29Smrg	r = amdgpu_va_range_alloc(device_handle,
18603f012e29Smrg				  amdgpu_gpu_va_range_general,
18613f012e29Smrg				  BUFFER_SIZE, 1, 0, &bo_mc,
18623f012e29Smrg				  &va_handle, 0);
18633f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18643f012e29Smrg
18653f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
18663f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18673f012e29Smrg
18683f012e29Smrg	handle = buf_handle;
18693f012e29Smrg
18703f012e29Smrg	j = i = 0;
1871d8807b2fSmrg
1872d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI)
1873d8807b2fSmrg		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1874d8807b2fSmrg				sdma_write_length);
1875d8807b2fSmrg	else
1876d8807b2fSmrg		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1877d8807b2fSmrg				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
18783f012e29Smrg	pm4[i++] = 0xffffffff & bo_mc;
18793f012e29Smrg	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1880d8807b2fSmrg	if (family_id >= AMDGPU_FAMILY_AI)
1881d8807b2fSmrg		pm4[i++] = sdma_write_length - 1;
1882d8807b2fSmrg	else if (family_id != AMDGPU_FAMILY_SI)
1883d8807b2fSmrg		pm4[i++] = sdma_write_length;
18843f012e29Smrg
18853f012e29Smrg	while (j++ < sdma_write_length)
18863f012e29Smrg		pm4[i++] = 0xdeadbeaf;
18873f012e29Smrg
188800a23bdaSmrg	if (!fork()) {
188900a23bdaSmrg		pm4[0] = 0x0;
189000a23bdaSmrg		exit(0);
189100a23bdaSmrg	}
189200a23bdaSmrg
18933f012e29Smrg	amdgpu_test_exec_cs_helper(context_handle,
18943f012e29Smrg				   AMDGPU_HW_IP_DMA, 0,
18953f012e29Smrg				   i, pm4,
18963f012e29Smrg				   1, &handle,
18973f012e29Smrg				   ib_info, ibs_request);
18983f012e29Smrg	i = 0;
18993f012e29Smrg	while (i < sdma_write_length) {
19003f012e29Smrg		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
19013f012e29Smrg	}
19023f012e29Smrg	free(ibs_request);
19033f012e29Smrg	free(ib_info);
19043f012e29Smrg	free(pm4);
19053f012e29Smrg
19063f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
19073f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
19083f012e29Smrg	r = amdgpu_va_range_free(va_handle);
19093f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
19103f012e29Smrg	r = amdgpu_bo_free(buf_handle);
19113f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
19123f012e29Smrg	free(ptr);
19133f012e29Smrg
19143f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
19153f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
191600a23bdaSmrg
191700a23bdaSmrg	wait(NULL);
191800a23bdaSmrg}
191900a23bdaSmrg
192000a23bdaSmrgstatic void amdgpu_sync_dependency_test(void)
192100a23bdaSmrg{
192200a23bdaSmrg	amdgpu_context_handle context_handle[2];
192300a23bdaSmrg	amdgpu_bo_handle ib_result_handle;
192400a23bdaSmrg	void *ib_result_cpu;
192500a23bdaSmrg	uint64_t ib_result_mc_address;
192600a23bdaSmrg	struct amdgpu_cs_request ibs_request;
192700a23bdaSmrg	struct amdgpu_cs_ib_info ib_info;
192800a23bdaSmrg	struct amdgpu_cs_fence fence_status;
192900a23bdaSmrg	uint32_t expired;
193000a23bdaSmrg	int i, j, r;
193100a23bdaSmrg	amdgpu_bo_list_handle bo_list;
193200a23bdaSmrg	amdgpu_va_handle va_handle;
193300a23bdaSmrg	static uint32_t *ptr;
193400a23bdaSmrg	uint64_t seq_no;
193500a23bdaSmrg
193600a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
193700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
193800a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
193900a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
194000a23bdaSmrg
194100a23bdaSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
194200a23bdaSmrg			AMDGPU_GEM_DOMAIN_GTT, 0,
194300a23bdaSmrg						    &ib_result_handle, &ib_result_cpu,
194400a23bdaSmrg						    &ib_result_mc_address, &va_handle);
194500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
194600a23bdaSmrg
194700a23bdaSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
194800a23bdaSmrg			       &bo_list);
194900a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
195000a23bdaSmrg
195100a23bdaSmrg	ptr = ib_result_cpu;
195200a23bdaSmrg	i = 0;
195300a23bdaSmrg
195400a23bdaSmrg	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
195500a23bdaSmrg
195600a23bdaSmrg	/* Dispatch minimal init config and verify it's executed */
195700a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
195800a23bdaSmrg	ptr[i++] = 0x80000000;
195900a23bdaSmrg	ptr[i++] = 0x80000000;
196000a23bdaSmrg
196100a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
196200a23bdaSmrg	ptr[i++] = 0x80000000;
196300a23bdaSmrg
196400a23bdaSmrg
196500a23bdaSmrg	/* Program compute regs */
196600a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
196700a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
196800a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
196900a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
197000a23bdaSmrg
197100a23bdaSmrg
197200a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
197300a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
197400a23bdaSmrg	/*
197500a23bdaSmrg	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
197600a23bdaSmrg	                                      SGPRS = 1
197700a23bdaSmrg	                                      PRIORITY = 0
197800a23bdaSmrg	                                      FLOAT_MODE = 192 (0xc0)
197900a23bdaSmrg	                                      PRIV = 0
198000a23bdaSmrg	                                      DX10_CLAMP = 1
198100a23bdaSmrg	                                      DEBUG_MODE = 0
198200a23bdaSmrg	                                      IEEE_MODE = 0
198300a23bdaSmrg	                                      BULKY = 0
198400a23bdaSmrg	                                      CDBG_USER = 0
198500a23bdaSmrg	 *
198600a23bdaSmrg	 */
198700a23bdaSmrg	ptr[i++] = 0x002c0040;
198800a23bdaSmrg
198900a23bdaSmrg
199000a23bdaSmrg	/*
199100a23bdaSmrg	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
199200a23bdaSmrg	                                      USER_SGPR = 8
199300a23bdaSmrg	                                      TRAP_PRESENT = 0
199400a23bdaSmrg	                                      TGID_X_EN = 0
199500a23bdaSmrg	                                      TGID_Y_EN = 0
199600a23bdaSmrg	                                      TGID_Z_EN = 0
199700a23bdaSmrg	                                      TG_SIZE_EN = 0
199800a23bdaSmrg	                                      TIDIG_COMP_CNT = 0
199900a23bdaSmrg	                                      EXCP_EN_MSB = 0
200000a23bdaSmrg	                                      LDS_SIZE = 0
200100a23bdaSmrg	                                      EXCP_EN = 0
200200a23bdaSmrg	 *
200300a23bdaSmrg	 */
200400a23bdaSmrg	ptr[i++] = 0x00000010;
200500a23bdaSmrg
200600a23bdaSmrg
200700a23bdaSmrg/*
200800a23bdaSmrg * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
200900a23bdaSmrg                                         WAVESIZE = 0
201000a23bdaSmrg *
201100a23bdaSmrg */
201200a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
201300a23bdaSmrg	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
201400a23bdaSmrg	ptr[i++] = 0x00000100;
201500a23bdaSmrg
201600a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
201700a23bdaSmrg	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
201800a23bdaSmrg	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
201900a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
202000a23bdaSmrg
202100a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
202200a23bdaSmrg	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
202300a23bdaSmrg	ptr[i++] = 0;
202400a23bdaSmrg
202500a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
202600a23bdaSmrg	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
202700a23bdaSmrg	ptr[i++] = 1;
202800a23bdaSmrg	ptr[i++] = 1;
202900a23bdaSmrg	ptr[i++] = 1;
203000a23bdaSmrg
203100a23bdaSmrg
203200a23bdaSmrg	/* Dispatch */
203300a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
203400a23bdaSmrg	ptr[i++] = 1;
203500a23bdaSmrg	ptr[i++] = 1;
203600a23bdaSmrg	ptr[i++] = 1;
203700a23bdaSmrg	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
203800a23bdaSmrg
203900a23bdaSmrg
204000a23bdaSmrg	while (i & 7)
204100a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
204200a23bdaSmrg
204300a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
204400a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address;
204500a23bdaSmrg	ib_info.size = i;
204600a23bdaSmrg
204700a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
204800a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
204900a23bdaSmrg	ibs_request.ring = 0;
205000a23bdaSmrg	ibs_request.number_of_ibs = 1;
205100a23bdaSmrg	ibs_request.ibs = &ib_info;
205200a23bdaSmrg	ibs_request.resources = bo_list;
205300a23bdaSmrg	ibs_request.fence_info.handle = NULL;
205400a23bdaSmrg
205500a23bdaSmrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
205600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
205700a23bdaSmrg	seq_no = ibs_request.seq_no;
205800a23bdaSmrg
205900a23bdaSmrg
206000a23bdaSmrg
206100a23bdaSmrg	/* Prepare second command with dependency on the first */
206200a23bdaSmrg	j = i;
206300a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
206400a23bdaSmrg	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
206500a23bdaSmrg	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
206600a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
206700a23bdaSmrg	ptr[i++] = 99;
206800a23bdaSmrg
206900a23bdaSmrg	while (i & 7)
207000a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
207100a23bdaSmrg
207200a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
207300a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
207400a23bdaSmrg	ib_info.size = i - j;
207500a23bdaSmrg
207600a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
207700a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
207800a23bdaSmrg	ibs_request.ring = 0;
207900a23bdaSmrg	ibs_request.number_of_ibs = 1;
208000a23bdaSmrg	ibs_request.ibs = &ib_info;
208100a23bdaSmrg	ibs_request.resources = bo_list;
208200a23bdaSmrg	ibs_request.fence_info.handle = NULL;
208300a23bdaSmrg
208400a23bdaSmrg	ibs_request.number_of_dependencies = 1;
208500a23bdaSmrg
208600a23bdaSmrg	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
208700a23bdaSmrg	ibs_request.dependencies[0].context = context_handle[1];
208800a23bdaSmrg	ibs_request.dependencies[0].ip_instance = 0;
208900a23bdaSmrg	ibs_request.dependencies[0].ring = 0;
209000a23bdaSmrg	ibs_request.dependencies[0].fence = seq_no;
209100a23bdaSmrg
209200a23bdaSmrg
209300a23bdaSmrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
209400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
209500a23bdaSmrg
209600a23bdaSmrg
209700a23bdaSmrg	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
209800a23bdaSmrg	fence_status.context = context_handle[0];
209900a23bdaSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
210000a23bdaSmrg	fence_status.ip_instance = 0;
210100a23bdaSmrg	fence_status.ring = 0;
210200a23bdaSmrg	fence_status.fence = ibs_request.seq_no;
210300a23bdaSmrg
210400a23bdaSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
210500a23bdaSmrg		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
210600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
210700a23bdaSmrg
210800a23bdaSmrg	/* Expect the second command to wait for shader to complete */
210900a23bdaSmrg	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
211000a23bdaSmrg
211100a23bdaSmrg	r = amdgpu_bo_list_destroy(bo_list);
211200a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
211300a23bdaSmrg
211400a23bdaSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
211500a23bdaSmrg				     ib_result_mc_address, 4096);
211600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
211700a23bdaSmrg
211800a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[0]);
211900a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
212000a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[1]);
212100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
212200a23bdaSmrg
212300a23bdaSmrg	free(ibs_request.dependencies);
21243f012e29Smrg}
21255324fb0dSmrg
21269bd392adSmrgstatic int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
21279bd392adSmrg{
21289bd392adSmrg	struct amdgpu_test_shader *shader;
21299bd392adSmrg	int i, loop = 0x10000;
21309bd392adSmrg
21319bd392adSmrg	switch (family) {
21329bd392adSmrg		case AMDGPU_FAMILY_AI:
21339bd392adSmrg			shader = &memcpy_cs_hang_slow_ai;
21349bd392adSmrg			break;
21359bd392adSmrg		case AMDGPU_FAMILY_RV:
21369bd392adSmrg			shader = &memcpy_cs_hang_slow_rv;
21379bd392adSmrg			break;
21389bd392adSmrg		default:
21399bd392adSmrg			return -1;
21409bd392adSmrg			break;
21419bd392adSmrg	}
21429bd392adSmrg
21439bd392adSmrg	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
21449bd392adSmrg
21459bd392adSmrg	for (i = 0; i < loop; i++)
21469bd392adSmrg		memcpy(ptr + shader->header_length + shader->body_length * i,
21479bd392adSmrg			shader->shader + shader->header_length,
21489bd392adSmrg			shader->body_length * sizeof(uint32_t));
21499bd392adSmrg
21509bd392adSmrg	memcpy(ptr + shader->header_length + shader->body_length * loop,
21519bd392adSmrg		shader->shader + shader->header_length + shader->body_length,
21529bd392adSmrg		shader->foot_length * sizeof(uint32_t));
21539bd392adSmrg
21549bd392adSmrg	return 0;
21559bd392adSmrg}
21569bd392adSmrg
21575324fb0dSmrgstatic int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
21585324fb0dSmrg					   int cs_type)
21595324fb0dSmrg{
21605324fb0dSmrg	uint32_t shader_size;
21615324fb0dSmrg	const uint32_t *shader;
21625324fb0dSmrg
21635324fb0dSmrg	switch (cs_type) {
21645324fb0dSmrg		case CS_BUFFERCLEAR:
21655324fb0dSmrg			shader = bufferclear_cs_shader_gfx9;
21665324fb0dSmrg			shader_size = sizeof(bufferclear_cs_shader_gfx9);
21675324fb0dSmrg			break;
21685324fb0dSmrg		case CS_BUFFERCOPY:
21695324fb0dSmrg			shader = buffercopy_cs_shader_gfx9;
21705324fb0dSmrg			shader_size = sizeof(buffercopy_cs_shader_gfx9);
21715324fb0dSmrg			break;
21729bd392adSmrg		case CS_HANG:
21739bd392adSmrg			shader = memcpy_ps_hang;
21749bd392adSmrg			shader_size = sizeof(memcpy_ps_hang);
21759bd392adSmrg			break;
21765324fb0dSmrg		default:
21775324fb0dSmrg			return -1;
21785324fb0dSmrg			break;
21795324fb0dSmrg	}
21805324fb0dSmrg
21815324fb0dSmrg	memcpy(ptr, shader, shader_size);
21825324fb0dSmrg	return 0;
21835324fb0dSmrg}
21845324fb0dSmrg
21855324fb0dSmrgstatic int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
21865324fb0dSmrg{
21875324fb0dSmrg	int i = 0;
21885324fb0dSmrg
21895324fb0dSmrg	/* Write context control and load shadowing register if necessary */
21905324fb0dSmrg	if (ip_type == AMDGPU_HW_IP_GFX) {
21915324fb0dSmrg		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
21925324fb0dSmrg		ptr[i++] = 0x80000000;
21935324fb0dSmrg		ptr[i++] = 0x80000000;
21945324fb0dSmrg	}
21955324fb0dSmrg
21965324fb0dSmrg	/* Issue commands to set default compute state. */
21975324fb0dSmrg	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
21985324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
21995324fb0dSmrg	ptr[i++] = 0x204;
22005324fb0dSmrg	i += 3;
220188f8a8d2Smrg
22025324fb0dSmrg	/* clear mmCOMPUTE_TMPRING_SIZE */
22035324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
22045324fb0dSmrg	ptr[i++] = 0x218;
22055324fb0dSmrg	ptr[i++] = 0;
22065324fb0dSmrg
22075324fb0dSmrg	return i;
22085324fb0dSmrg}
22095324fb0dSmrg
22105324fb0dSmrgstatic int amdgpu_dispatch_write_cumask(uint32_t *ptr)
22115324fb0dSmrg{
22125324fb0dSmrg	int i = 0;
22135324fb0dSmrg
22145324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
22155324fb0dSmrg	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
22165324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
22175324fb0dSmrg	ptr[i++] = 0x216;
22185324fb0dSmrg	ptr[i++] = 0xffffffff;
22195324fb0dSmrg	ptr[i++] = 0xffffffff;
22205324fb0dSmrg	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
22215324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
22225324fb0dSmrg	ptr[i++] = 0x219;
22235324fb0dSmrg	ptr[i++] = 0xffffffff;
22245324fb0dSmrg	ptr[i++] = 0xffffffff;
22255324fb0dSmrg
22265324fb0dSmrg	return i;
22275324fb0dSmrg}
22285324fb0dSmrg
22295324fb0dSmrgstatic int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
22305324fb0dSmrg{
22315324fb0dSmrg	int i, j;
22325324fb0dSmrg
22335324fb0dSmrg	i = 0;
22345324fb0dSmrg
22355324fb0dSmrg	/* Writes shader state to HW */
22365324fb0dSmrg	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
22375324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
22385324fb0dSmrg	ptr[i++] = 0x20c;
22395324fb0dSmrg	ptr[i++] = (shader_addr >> 8);
22405324fb0dSmrg	ptr[i++] = (shader_addr >> 40);
22415324fb0dSmrg	/* write sh regs*/
22425324fb0dSmrg	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
22435324fb0dSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
22445324fb0dSmrg		/* - Gfx9ShRegBase */
22455324fb0dSmrg		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
22465324fb0dSmrg		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
22475324fb0dSmrg	}
22485324fb0dSmrg
22495324fb0dSmrg	return i;
22505324fb0dSmrg}
22515324fb0dSmrg
22525324fb0dSmrgstatic void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
22535324fb0dSmrg					 uint32_t ip_type,
22545324fb0dSmrg					 uint32_t ring)
22555324fb0dSmrg{
22565324fb0dSmrg	amdgpu_context_handle context_handle;
22575324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
22585324fb0dSmrg	volatile unsigned char *ptr_dst;
22595324fb0dSmrg	void *ptr_shader;
22605324fb0dSmrg	uint32_t *ptr_cmd;
22615324fb0dSmrg	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
22625324fb0dSmrg	amdgpu_va_handle va_dst, va_shader, va_cmd;
22635324fb0dSmrg	int i, r;
22645324fb0dSmrg	int bo_dst_size = 16384;
22655324fb0dSmrg	int bo_shader_size = 4096;
22665324fb0dSmrg	int bo_cmd_size = 4096;
22675324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
22685324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
22695324fb0dSmrg	amdgpu_bo_list_handle bo_list;
22705324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
22715324fb0dSmrg	uint32_t expired;
22725324fb0dSmrg
22735324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
22745324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22755324fb0dSmrg
22765324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
22775324fb0dSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
22785324fb0dSmrg					&bo_cmd, (void **)&ptr_cmd,
22795324fb0dSmrg					&mc_address_cmd, &va_cmd);
22805324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22815324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
22825324fb0dSmrg
22835324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
22845324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
22855324fb0dSmrg					&bo_shader, &ptr_shader,
22865324fb0dSmrg					&mc_address_shader, &va_shader);
22875324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
228888f8a8d2Smrg	memset(ptr_shader, 0, bo_shader_size);
22895324fb0dSmrg
22905324fb0dSmrg	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
22915324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22925324fb0dSmrg
22935324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
22945324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
22955324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
22965324fb0dSmrg					&mc_address_dst, &va_dst);
22975324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22985324fb0dSmrg
22995324fb0dSmrg	i = 0;
23005324fb0dSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
23015324fb0dSmrg
23025324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
23035324fb0dSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
23045324fb0dSmrg
23055324fb0dSmrg	/* Writes shader state to HW */
23065324fb0dSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
23075324fb0dSmrg
23085324fb0dSmrg	/* Write constant data */
23095324fb0dSmrg	/* Writes the UAV constant data to the SGPRs. */
23105324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
23115324fb0dSmrg	ptr_cmd[i++] = 0x240;
23125324fb0dSmrg	ptr_cmd[i++] = mc_address_dst;
23135324fb0dSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
23145324fb0dSmrg	ptr_cmd[i++] = 0x400;
23155324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
23165324fb0dSmrg
23175324fb0dSmrg	/* Sets a range of pixel shader constants */
23185324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
23195324fb0dSmrg	ptr_cmd[i++] = 0x244;
23205324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
23215324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
23225324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
23235324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
23245324fb0dSmrg
232588f8a8d2Smrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
232688f8a8d2Smrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
232788f8a8d2Smrg	ptr_cmd[i++] = 0x215;
232888f8a8d2Smrg	ptr_cmd[i++] = 0;
232988f8a8d2Smrg
23305324fb0dSmrg	/* dispatch direct command */
23315324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
23325324fb0dSmrg	ptr_cmd[i++] = 0x10;
23335324fb0dSmrg	ptr_cmd[i++] = 1;
23345324fb0dSmrg	ptr_cmd[i++] = 1;
23355324fb0dSmrg	ptr_cmd[i++] = 1;
23365324fb0dSmrg
23375324fb0dSmrg	while (i & 7)
23385324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
23395324fb0dSmrg
23405324fb0dSmrg	resources[0] = bo_dst;
23415324fb0dSmrg	resources[1] = bo_shader;
23425324fb0dSmrg	resources[2] = bo_cmd;
23435324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
23445324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23455324fb0dSmrg
23465324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
23475324fb0dSmrg	ib_info.size = i;
23485324fb0dSmrg	ibs_request.ip_type = ip_type;
23495324fb0dSmrg	ibs_request.ring = ring;
23505324fb0dSmrg	ibs_request.resources = bo_list;
23515324fb0dSmrg	ibs_request.number_of_ibs = 1;
23525324fb0dSmrg	ibs_request.ibs = &ib_info;
23535324fb0dSmrg	ibs_request.fence_info.handle = NULL;
23545324fb0dSmrg
23555324fb0dSmrg	/* submit CS */
23565324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
23575324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23585324fb0dSmrg
23595324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
23605324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23615324fb0dSmrg
23625324fb0dSmrg	fence_status.ip_type = ip_type;
23635324fb0dSmrg	fence_status.ip_instance = 0;
23645324fb0dSmrg	fence_status.ring = ring;
23655324fb0dSmrg	fence_status.context = context_handle;
23665324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
23675324fb0dSmrg
23685324fb0dSmrg	/* wait for IB accomplished */
23695324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
23705324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
23715324fb0dSmrg					 0, &expired);
23725324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23735324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
23745324fb0dSmrg
23755324fb0dSmrg	/* verify if memset test result meets with expected */
23765324fb0dSmrg	i = 0;
23775324fb0dSmrg	while(i < bo_dst_size) {
23785324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
23795324fb0dSmrg	}
23805324fb0dSmrg
23815324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
23825324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23835324fb0dSmrg
23845324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
23855324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23865324fb0dSmrg
23875324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
23885324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23895324fb0dSmrg
23905324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
23915324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23925324fb0dSmrg}
23935324fb0dSmrg
23945324fb0dSmrgstatic void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
23955324fb0dSmrg					uint32_t ip_type,
23969bd392adSmrg					uint32_t ring,
23979bd392adSmrg					int hang)
23985324fb0dSmrg{
23995324fb0dSmrg	amdgpu_context_handle context_handle;
24005324fb0dSmrg	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
24015324fb0dSmrg	volatile unsigned char *ptr_dst;
24025324fb0dSmrg	void *ptr_shader;
24035324fb0dSmrg	unsigned char *ptr_src;
24045324fb0dSmrg	uint32_t *ptr_cmd;
24055324fb0dSmrg	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
24065324fb0dSmrg	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
24075324fb0dSmrg	int i, r;
24085324fb0dSmrg	int bo_dst_size = 16384;
24095324fb0dSmrg	int bo_shader_size = 4096;
24105324fb0dSmrg	int bo_cmd_size = 4096;
24115324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
24125324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
24139bd392adSmrg	uint32_t expired, hang_state, hangs;
24149bd392adSmrg	enum cs_type cs_type;
24155324fb0dSmrg	amdgpu_bo_list_handle bo_list;
24165324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
24175324fb0dSmrg
24185324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
24195324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24205324fb0dSmrg
24215324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
24225324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
24235324fb0dSmrg				    &bo_cmd, (void **)&ptr_cmd,
24245324fb0dSmrg				    &mc_address_cmd, &va_cmd);
24255324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24265324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
24275324fb0dSmrg
24285324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
24295324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
24305324fb0dSmrg					&bo_shader, &ptr_shader,
24315324fb0dSmrg					&mc_address_shader, &va_shader);
24325324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
243388f8a8d2Smrg	memset(ptr_shader, 0, bo_shader_size);
24345324fb0dSmrg
24359bd392adSmrg	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
24369bd392adSmrg	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
24375324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24385324fb0dSmrg
24395324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
24405324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
24415324fb0dSmrg					&bo_src, (void **)&ptr_src,
24425324fb0dSmrg					&mc_address_src, &va_src);
24435324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24445324fb0dSmrg
24455324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
24465324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
24475324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
24485324fb0dSmrg					&mc_address_dst, &va_dst);
24495324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24505324fb0dSmrg
24515324fb0dSmrg	memset(ptr_src, 0x55, bo_dst_size);
24525324fb0dSmrg
24535324fb0dSmrg	i = 0;
24545324fb0dSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
24555324fb0dSmrg
24565324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
24575324fb0dSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
24585324fb0dSmrg
24595324fb0dSmrg	/* Writes shader state to HW */
24605324fb0dSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
24615324fb0dSmrg
24625324fb0dSmrg	/* Write constant data */
24635324fb0dSmrg	/* Writes the texture resource constants data to the SGPRs */
24645324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
24655324fb0dSmrg	ptr_cmd[i++] = 0x240;
24665324fb0dSmrg	ptr_cmd[i++] = mc_address_src;
24675324fb0dSmrg	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
24685324fb0dSmrg	ptr_cmd[i++] = 0x400;
24695324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
24705324fb0dSmrg
24715324fb0dSmrg	/* Writes the UAV constant data to the SGPRs. */
24725324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
24735324fb0dSmrg	ptr_cmd[i++] = 0x244;
24745324fb0dSmrg	ptr_cmd[i++] = mc_address_dst;
24755324fb0dSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
24765324fb0dSmrg	ptr_cmd[i++] = 0x400;
24775324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
24785324fb0dSmrg
247988f8a8d2Smrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
248088f8a8d2Smrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
248188f8a8d2Smrg	ptr_cmd[i++] = 0x215;
248288f8a8d2Smrg	ptr_cmd[i++] = 0;
248388f8a8d2Smrg
24845324fb0dSmrg	/* dispatch direct command */
24855324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
24865324fb0dSmrg	ptr_cmd[i++] = 0x10;
24875324fb0dSmrg	ptr_cmd[i++] = 1;
24885324fb0dSmrg	ptr_cmd[i++] = 1;
24895324fb0dSmrg	ptr_cmd[i++] = 1;
24905324fb0dSmrg
24915324fb0dSmrg	while (i & 7)
24925324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
24935324fb0dSmrg
24945324fb0dSmrg	resources[0] = bo_shader;
24955324fb0dSmrg	resources[1] = bo_src;
24965324fb0dSmrg	resources[2] = bo_dst;
24975324fb0dSmrg	resources[3] = bo_cmd;
24985324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
24995324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25005324fb0dSmrg
25015324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
25025324fb0dSmrg	ib_info.size = i;
25035324fb0dSmrg	ibs_request.ip_type = ip_type;
25045324fb0dSmrg	ibs_request.ring = ring;
25055324fb0dSmrg	ibs_request.resources = bo_list;
25065324fb0dSmrg	ibs_request.number_of_ibs = 1;
25075324fb0dSmrg	ibs_request.ibs = &ib_info;
25085324fb0dSmrg	ibs_request.fence_info.handle = NULL;
25095324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
25105324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25115324fb0dSmrg
25125324fb0dSmrg	fence_status.ip_type = ip_type;
25135324fb0dSmrg	fence_status.ip_instance = 0;
25145324fb0dSmrg	fence_status.ring = ring;
25155324fb0dSmrg	fence_status.context = context_handle;
25165324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
25175324fb0dSmrg
25185324fb0dSmrg	/* wait for IB accomplished */
25195324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
25205324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
25215324fb0dSmrg					 0, &expired);
25225324fb0dSmrg
25239bd392adSmrg	if (!hang) {
25249bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
25259bd392adSmrg		CU_ASSERT_EQUAL(expired, true);
25269bd392adSmrg
25279bd392adSmrg		/* verify if memcpy test result meets with expected */
25289bd392adSmrg		i = 0;
25299bd392adSmrg		while(i < bo_dst_size) {
25309bd392adSmrg			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
25319bd392adSmrg			i++;
25329bd392adSmrg		}
25339bd392adSmrg	} else {
25349bd392adSmrg		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
25359bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
25369bd392adSmrg		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
25375324fb0dSmrg	}
25385324fb0dSmrg
25395324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
25405324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25415324fb0dSmrg
25425324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
25435324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25445324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
25455324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25465324fb0dSmrg
25475324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
25485324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25495324fb0dSmrg
25505324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
25515324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25525324fb0dSmrg
25535324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
25545324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25555324fb0dSmrg}
255688f8a8d2Smrg
255788f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void)
25585324fb0dSmrg{
25595324fb0dSmrg	int r;
25605324fb0dSmrg	struct drm_amdgpu_info_hw_ip info;
25615324fb0dSmrg	uint32_t ring_id;
25625324fb0dSmrg
25635324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
25645324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
256588f8a8d2Smrg	if (!info.available_rings)
256688f8a8d2Smrg		printf("SKIP ... as there's no compute ring\n");
25675324fb0dSmrg
25685324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
25695324fb0dSmrg		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
25709bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
25715324fb0dSmrg	}
257288f8a8d2Smrg}
257388f8a8d2Smrg
257488f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void)
257588f8a8d2Smrg{
257688f8a8d2Smrg	int r;
257788f8a8d2Smrg	struct drm_amdgpu_info_hw_ip info;
257888f8a8d2Smrg	uint32_t ring_id;
25795324fb0dSmrg
25805324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
25815324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
258288f8a8d2Smrg	if (!info.available_rings)
258388f8a8d2Smrg		printf("SKIP ... as there's no graphics ring\n");
25845324fb0dSmrg
25855324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
25865324fb0dSmrg		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
25879bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
25889bd392adSmrg	}
25899bd392adSmrg}
25909bd392adSmrg
25919bd392adSmrgvoid amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
25929bd392adSmrg{
25939bd392adSmrg	int r;
25949bd392adSmrg	struct drm_amdgpu_info_hw_ip info;
25959bd392adSmrg	uint32_t ring_id;
25969bd392adSmrg
25979bd392adSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
25989bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
25999bd392adSmrg	if (!info.available_rings)
26009bd392adSmrg		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
26019bd392adSmrg
26029bd392adSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
26039bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
26049bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
26059bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
26069bd392adSmrg	}
26079bd392adSmrg}
26089bd392adSmrg
26099bd392adSmrgstatic void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
26109bd392adSmrg						  uint32_t ip_type, uint32_t ring)
26119bd392adSmrg{
26129bd392adSmrg	amdgpu_context_handle context_handle;
26139bd392adSmrg	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
26149bd392adSmrg	volatile unsigned char *ptr_dst;
26159bd392adSmrg	void *ptr_shader;
26169bd392adSmrg	unsigned char *ptr_src;
26179bd392adSmrg	uint32_t *ptr_cmd;
26189bd392adSmrg	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
26199bd392adSmrg	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
26209bd392adSmrg	int i, r;
26219bd392adSmrg	int bo_dst_size = 0x4000000;
26229bd392adSmrg	int bo_shader_size = 0x400000;
26239bd392adSmrg	int bo_cmd_size = 4096;
26249bd392adSmrg	struct amdgpu_cs_request ibs_request = {0};
26259bd392adSmrg	struct amdgpu_cs_ib_info ib_info= {0};
26269bd392adSmrg	uint32_t hang_state, hangs, expired;
26279bd392adSmrg	struct amdgpu_gpu_info gpu_info = {0};
26289bd392adSmrg	amdgpu_bo_list_handle bo_list;
26299bd392adSmrg	struct amdgpu_cs_fence fence_status = {0};
26309bd392adSmrg
26319bd392adSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
26329bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
26339bd392adSmrg
26349bd392adSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
26359bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
26369bd392adSmrg
26379bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
26389bd392adSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
26399bd392adSmrg				    &bo_cmd, (void **)&ptr_cmd,
26409bd392adSmrg				    &mc_address_cmd, &va_cmd);
26419bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
26429bd392adSmrg	memset(ptr_cmd, 0, bo_cmd_size);
26439bd392adSmrg
26449bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
26459bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
26469bd392adSmrg					&bo_shader, &ptr_shader,
26479bd392adSmrg					&mc_address_shader, &va_shader);
26489bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
26499bd392adSmrg	memset(ptr_shader, 0, bo_shader_size);
26509bd392adSmrg
26519bd392adSmrg	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
26529bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
26539bd392adSmrg
26549bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
26559bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
26569bd392adSmrg					&bo_src, (void **)&ptr_src,
26579bd392adSmrg					&mc_address_src, &va_src);
26589bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
26599bd392adSmrg
26609bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
26619bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
26629bd392adSmrg					&bo_dst, (void **)&ptr_dst,
26639bd392adSmrg					&mc_address_dst, &va_dst);
26649bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
26659bd392adSmrg
26669bd392adSmrg	memset(ptr_src, 0x55, bo_dst_size);
26679bd392adSmrg
26689bd392adSmrg	i = 0;
26699bd392adSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
26709bd392adSmrg
26719bd392adSmrg	/*  Issue commands to set cu mask used in current dispatch */
26729bd392adSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
26739bd392adSmrg
26749bd392adSmrg	/* Writes shader state to HW */
26759bd392adSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
26769bd392adSmrg
26779bd392adSmrg	/* Write constant data */
26789bd392adSmrg	/* Writes the texture resource constants data to the SGPRs */
26799bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
26809bd392adSmrg	ptr_cmd[i++] = 0x240;
26819bd392adSmrg	ptr_cmd[i++] = mc_address_src;
26829bd392adSmrg	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
26839bd392adSmrg	ptr_cmd[i++] = 0x400000;
26849bd392adSmrg	ptr_cmd[i++] = 0x74fac;
26859bd392adSmrg
26869bd392adSmrg	/* Writes the UAV constant data to the SGPRs. */
26879bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
26889bd392adSmrg	ptr_cmd[i++] = 0x244;
26899bd392adSmrg	ptr_cmd[i++] = mc_address_dst;
26909bd392adSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
26919bd392adSmrg	ptr_cmd[i++] = 0x400000;
26929bd392adSmrg	ptr_cmd[i++] = 0x74fac;
26939bd392adSmrg
26949bd392adSmrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
26959bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
26969bd392adSmrg	ptr_cmd[i++] = 0x215;
26979bd392adSmrg	ptr_cmd[i++] = 0;
26989bd392adSmrg
26999bd392adSmrg	/* dispatch direct command */
27009bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
27019bd392adSmrg	ptr_cmd[i++] = 0x10000;
27029bd392adSmrg	ptr_cmd[i++] = 1;
27039bd392adSmrg	ptr_cmd[i++] = 1;
27049bd392adSmrg	ptr_cmd[i++] = 1;
27059bd392adSmrg
27069bd392adSmrg	while (i & 7)
27079bd392adSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
27089bd392adSmrg
27099bd392adSmrg	resources[0] = bo_shader;
27109bd392adSmrg	resources[1] = bo_src;
27119bd392adSmrg	resources[2] = bo_dst;
27129bd392adSmrg	resources[3] = bo_cmd;
27139bd392adSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
27149bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
27159bd392adSmrg
27169bd392adSmrg	ib_info.ib_mc_address = mc_address_cmd;
27179bd392adSmrg	ib_info.size = i;
27189bd392adSmrg	ibs_request.ip_type = ip_type;
27199bd392adSmrg	ibs_request.ring = ring;
27209bd392adSmrg	ibs_request.resources = bo_list;
27219bd392adSmrg	ibs_request.number_of_ibs = 1;
27229bd392adSmrg	ibs_request.ibs = &ib_info;
27239bd392adSmrg	ibs_request.fence_info.handle = NULL;
27249bd392adSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
27259bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
27269bd392adSmrg
27279bd392adSmrg	fence_status.ip_type = ip_type;
27289bd392adSmrg	fence_status.ip_instance = 0;
27299bd392adSmrg	fence_status.ring = ring;
27309bd392adSmrg	fence_status.context = context_handle;
27319bd392adSmrg	fence_status.fence = ibs_request.seq_no;
27329bd392adSmrg
27339bd392adSmrg	/* wait for IB accomplished */
27349bd392adSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
27359bd392adSmrg					 AMDGPU_TIMEOUT_INFINITE,
27369bd392adSmrg					 0, &expired);
27379bd392adSmrg
27389bd392adSmrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
27399bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
27409bd392adSmrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
27419bd392adSmrg
27429bd392adSmrg	r = amdgpu_bo_list_destroy(bo_list);
27439bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
27449bd392adSmrg
27459bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
27469bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
27479bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
27489bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
27499bd392adSmrg
27509bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
27519bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
27529bd392adSmrg
27539bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
27549bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
27559bd392adSmrg
27569bd392adSmrg	r = amdgpu_cs_ctx_free(context_handle);
27579bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
27589bd392adSmrg}
27599bd392adSmrg
27609bd392adSmrgvoid amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
27619bd392adSmrg{
27629bd392adSmrg	int r;
27639bd392adSmrg	struct drm_amdgpu_info_hw_ip info;
27649bd392adSmrg	uint32_t ring_id;
27659bd392adSmrg
27669bd392adSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
27679bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
27689bd392adSmrg	if (!info.available_rings)
27699bd392adSmrg		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
27709bd392adSmrg
27719bd392adSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
27729bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
27739bd392adSmrg		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
27749bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
27759bd392adSmrg	}
27769bd392adSmrg}
27779bd392adSmrg
27789bd392adSmrgstatic int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
27799bd392adSmrg{
27809bd392adSmrg	struct amdgpu_test_shader *shader;
27819bd392adSmrg	int i, loop = 0x40000;
27829bd392adSmrg
27839bd392adSmrg	switch (family) {
27849bd392adSmrg		case AMDGPU_FAMILY_AI:
27859bd392adSmrg		case AMDGPU_FAMILY_RV:
27869bd392adSmrg			shader = &memcpy_ps_hang_slow_ai;
27879bd392adSmrg			break;
27889bd392adSmrg		default:
27899bd392adSmrg			return -1;
27909bd392adSmrg			break;
27915324fb0dSmrg	}
27929bd392adSmrg
27939bd392adSmrg	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
27949bd392adSmrg
27959bd392adSmrg	for (i = 0; i < loop; i++)
27969bd392adSmrg		memcpy(ptr + shader->header_length + shader->body_length * i,
27979bd392adSmrg			shader->shader + shader->header_length,
27989bd392adSmrg			shader->body_length * sizeof(uint32_t));
27999bd392adSmrg
28009bd392adSmrg	memcpy(ptr + shader->header_length + shader->body_length * loop,
28019bd392adSmrg		shader->shader + shader->header_length + shader->body_length,
28029bd392adSmrg		shader->foot_length * sizeof(uint32_t));
28039bd392adSmrg
28049bd392adSmrg	return 0;
28055324fb0dSmrg}
28065324fb0dSmrg
28075324fb0dSmrgstatic int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
28085324fb0dSmrg{
28095324fb0dSmrg	int i;
28105324fb0dSmrg	uint32_t shader_offset= 256;
28115324fb0dSmrg	uint32_t mem_offset, patch_code_offset;
28125324fb0dSmrg	uint32_t shader_size, patchinfo_code_size;
28135324fb0dSmrg	const uint32_t *shader;
28145324fb0dSmrg	const uint32_t *patchinfo_code;
28155324fb0dSmrg	const uint32_t *patchcode_offset;
28165324fb0dSmrg
28175324fb0dSmrg	switch (ps_type) {
28185324fb0dSmrg		case PS_CONST:
28195324fb0dSmrg			shader = ps_const_shader_gfx9;
28205324fb0dSmrg			shader_size = sizeof(ps_const_shader_gfx9);
28215324fb0dSmrg			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
28225324fb0dSmrg			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
28235324fb0dSmrg			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
28245324fb0dSmrg			break;
28255324fb0dSmrg		case PS_TEX:
28265324fb0dSmrg			shader = ps_tex_shader_gfx9;
28275324fb0dSmrg			shader_size = sizeof(ps_tex_shader_gfx9);
28285324fb0dSmrg			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
28295324fb0dSmrg			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
28305324fb0dSmrg			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
28315324fb0dSmrg			break;
28329bd392adSmrg		case PS_HANG:
28339bd392adSmrg			shader = memcpy_ps_hang;
28349bd392adSmrg			shader_size = sizeof(memcpy_ps_hang);
28359bd392adSmrg
28369bd392adSmrg			memcpy(ptr, shader, shader_size);
28379bd392adSmrg			return 0;
28385324fb0dSmrg		default:
28395324fb0dSmrg			return -1;
28405324fb0dSmrg			break;
28415324fb0dSmrg	}
28425324fb0dSmrg
28435324fb0dSmrg	/* write main shader program */
28445324fb0dSmrg	for (i = 0 ; i < 10; i++) {
28455324fb0dSmrg		mem_offset = i * shader_offset;
28465324fb0dSmrg		memcpy(ptr + mem_offset, shader, shader_size);
28475324fb0dSmrg	}
28485324fb0dSmrg
28495324fb0dSmrg	/* overwrite patch codes */
28505324fb0dSmrg	for (i = 0 ; i < 10; i++) {
28515324fb0dSmrg		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
28525324fb0dSmrg		patch_code_offset = i * patchinfo_code_size;
28535324fb0dSmrg		memcpy(ptr + mem_offset,
28545324fb0dSmrg			patchinfo_code + patch_code_offset,
28555324fb0dSmrg			patchinfo_code_size * sizeof(uint32_t));
28565324fb0dSmrg	}
28575324fb0dSmrg
28585324fb0dSmrg	return 0;
28595324fb0dSmrg}
28605324fb0dSmrg
28615324fb0dSmrg/* load RectPosTexFast_VS */
28625324fb0dSmrgstatic int amdgpu_draw_load_vs_shader(uint8_t *ptr)
28635324fb0dSmrg{
28645324fb0dSmrg	const uint32_t *shader;
28655324fb0dSmrg	uint32_t shader_size;
28665324fb0dSmrg
28675324fb0dSmrg	shader = vs_RectPosTexFast_shader_gfx9;
28685324fb0dSmrg	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
28695324fb0dSmrg
28705324fb0dSmrg	memcpy(ptr, shader, shader_size);
28715324fb0dSmrg
28725324fb0dSmrg	return 0;
28735324fb0dSmrg}
28745324fb0dSmrg
28755324fb0dSmrgstatic int amdgpu_draw_init(uint32_t *ptr)
28765324fb0dSmrg{
28775324fb0dSmrg	int i = 0;
28785324fb0dSmrg	const uint32_t *preamblecache_ptr;
28795324fb0dSmrg	uint32_t preamblecache_size;
28805324fb0dSmrg
28815324fb0dSmrg	/* Write context control and load shadowing register if necessary */
28825324fb0dSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
28835324fb0dSmrg	ptr[i++] = 0x80000000;
28845324fb0dSmrg	ptr[i++] = 0x80000000;
28855324fb0dSmrg
28865324fb0dSmrg	preamblecache_ptr = preamblecache_gfx9;
28875324fb0dSmrg	preamblecache_size = sizeof(preamblecache_gfx9);
28885324fb0dSmrg
28895324fb0dSmrg	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
28905324fb0dSmrg	return i + preamblecache_size/sizeof(uint32_t);
28915324fb0dSmrg}
28925324fb0dSmrg
28935324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
28949bd392adSmrg							 uint64_t dst_addr,
28959bd392adSmrg							 int hang_slow)
28965324fb0dSmrg{
28975324fb0dSmrg	int i = 0;
28985324fb0dSmrg
28995324fb0dSmrg	/* setup color buffer */
29005324fb0dSmrg	/* offset   reg
29015324fb0dSmrg	   0xA318   CB_COLOR0_BASE
29025324fb0dSmrg	   0xA319   CB_COLOR0_BASE_EXT
29035324fb0dSmrg	   0xA31A   CB_COLOR0_ATTRIB2
29045324fb0dSmrg	   0xA31B   CB_COLOR0_VIEW
29055324fb0dSmrg	   0xA31C   CB_COLOR0_INFO
29065324fb0dSmrg	   0xA31D   CB_COLOR0_ATTRIB
29075324fb0dSmrg	   0xA31E   CB_COLOR0_DCC_CONTROL
29085324fb0dSmrg	   0xA31F   CB_COLOR0_CMASK
29095324fb0dSmrg	   0xA320   CB_COLOR0_CMASK_BASE_EXT
29105324fb0dSmrg	   0xA321   CB_COLOR0_FMASK
29115324fb0dSmrg	   0xA322   CB_COLOR0_FMASK_BASE_EXT
29125324fb0dSmrg	   0xA323   CB_COLOR0_CLEAR_WORD0
29135324fb0dSmrg	   0xA324   CB_COLOR0_CLEAR_WORD1
29145324fb0dSmrg	   0xA325   CB_COLOR0_DCC_BASE
29155324fb0dSmrg	   0xA326   CB_COLOR0_DCC_BASE_EXT */
29165324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
29175324fb0dSmrg	ptr[i++] = 0x318;
29185324fb0dSmrg	ptr[i++] = dst_addr >> 8;
29195324fb0dSmrg	ptr[i++] = dst_addr >> 40;
29209bd392adSmrg	ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
29215324fb0dSmrg	ptr[i++] = 0;
29225324fb0dSmrg	ptr[i++] = 0x50438;
29235324fb0dSmrg	ptr[i++] = 0x10140000;
29245324fb0dSmrg	i += 9;
29255324fb0dSmrg
29265324fb0dSmrg	/* mmCB_MRT0_EPITCH */
29275324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
29285324fb0dSmrg	ptr[i++] = 0x1e8;
29299bd392adSmrg	ptr[i++] = hang_slow ? 0x7ff : 0x1f;
29305324fb0dSmrg
29315324fb0dSmrg	/* 0xA32B   CB_COLOR1_BASE */
29325324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
29335324fb0dSmrg	ptr[i++] = 0x32b;
29345324fb0dSmrg	ptr[i++] = 0;
29355324fb0dSmrg
29365324fb0dSmrg	/* 0xA33A   CB_COLOR1_BASE */
29375324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
29385324fb0dSmrg	ptr[i++] = 0x33a;
29395324fb0dSmrg	ptr[i++] = 0;
29405324fb0dSmrg
29415324fb0dSmrg	/* SPI_SHADER_COL_FORMAT */
29425324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
29435324fb0dSmrg	ptr[i++] = 0x1c5;
29445324fb0dSmrg	ptr[i++] = 9;
29455324fb0dSmrg
29465324fb0dSmrg	/* Setup depth buffer */
29475324fb0dSmrg	/* mmDB_Z_INFO */
29485324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
29495324fb0dSmrg	ptr[i++] = 0xe;
29505324fb0dSmrg	i += 2;
29515324fb0dSmrg
29525324fb0dSmrg	return i;
29535324fb0dSmrg}
29545324fb0dSmrg
29559bd392adSmrgstatic int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
29565324fb0dSmrg{
29575324fb0dSmrg	int i = 0;
29585324fb0dSmrg	const uint32_t *cached_cmd_ptr;
29595324fb0dSmrg	uint32_t cached_cmd_size;
29605324fb0dSmrg
29615324fb0dSmrg	/* mmPA_SC_TILE_STEERING_OVERRIDE */
29625324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
29635324fb0dSmrg	ptr[i++] = 0xd7;
29645324fb0dSmrg	ptr[i++] = 0;
29655324fb0dSmrg
29665324fb0dSmrg	ptr[i++] = 0xffff1000;
29675324fb0dSmrg	ptr[i++] = 0xc0021000;
29685324fb0dSmrg
29695324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
29705324fb0dSmrg	ptr[i++] = 0xd7;
29715324fb0dSmrg	ptr[i++] = 1;
29725324fb0dSmrg
29735324fb0dSmrg	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
29745324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
29755324fb0dSmrg	ptr[i++] = 0x2fe;
29765324fb0dSmrg	i += 16;
29775324fb0dSmrg
29785324fb0dSmrg	/* mmPA_SC_CENTROID_PRIORITY_0 */
29795324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
29805324fb0dSmrg	ptr[i++] = 0x2f5;
29815324fb0dSmrg	i += 2;
29825324fb0dSmrg
29835324fb0dSmrg	cached_cmd_ptr = cached_cmd_gfx9;
29845324fb0dSmrg	cached_cmd_size = sizeof(cached_cmd_gfx9);
29855324fb0dSmrg
29865324fb0dSmrg	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
29879bd392adSmrg	if (hang_slow)
29889bd392adSmrg		*(ptr + i + 12) = 0x8000800;
29895324fb0dSmrg	i += cached_cmd_size/sizeof(uint32_t);
29905324fb0dSmrg
29915324fb0dSmrg	return i;
29925324fb0dSmrg}
29935324fb0dSmrg
29945324fb0dSmrgstatic int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
29955324fb0dSmrg						  int ps_type,
29969bd392adSmrg						  uint64_t shader_addr,
29979bd392adSmrg						  int hang_slow)
29985324fb0dSmrg{
29995324fb0dSmrg	int i = 0;
30005324fb0dSmrg
30015324fb0dSmrg	/* mmPA_CL_VS_OUT_CNTL */
30025324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
30035324fb0dSmrg	ptr[i++] = 0x207;
30045324fb0dSmrg	ptr[i++] = 0;
30055324fb0dSmrg
30065324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC3_VS */
30075324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
30085324fb0dSmrg	ptr[i++] = 0x46;
30095324fb0dSmrg	ptr[i++] = 0xffff;
30105324fb0dSmrg
30115324fb0dSmrg	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
30125324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
30135324fb0dSmrg	ptr[i++] = 0x48;
30145324fb0dSmrg	ptr[i++] = shader_addr >> 8;
30155324fb0dSmrg	ptr[i++] = shader_addr >> 40;
30165324fb0dSmrg
30175324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC1_VS */
30185324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
30195324fb0dSmrg	ptr[i++] = 0x4a;
30205324fb0dSmrg	ptr[i++] = 0xc0081;
30215324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC2_VS */
30225324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
30235324fb0dSmrg	ptr[i++] = 0x4b;
30245324fb0dSmrg	ptr[i++] = 0x18;
30255324fb0dSmrg
30265324fb0dSmrg	/* mmSPI_VS_OUT_CONFIG */
30275324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
30285324fb0dSmrg	ptr[i++] = 0x1b1;
30295324fb0dSmrg	ptr[i++] = 2;
30305324fb0dSmrg
30315324fb0dSmrg	/* mmSPI_SHADER_POS_FORMAT */
30325324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
30335324fb0dSmrg	ptr[i++] = 0x1c3;
30345324fb0dSmrg	ptr[i++] = 4;
30355324fb0dSmrg
30365324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
30375324fb0dSmrg	ptr[i++] = 0x4c;
30385324fb0dSmrg	i += 2;
30399bd392adSmrg	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
30409bd392adSmrg	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
30415324fb0dSmrg
30425324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
30435324fb0dSmrg	ptr[i++] = 0x50;
30445324fb0dSmrg	i += 2;
30455324fb0dSmrg	if (ps_type == PS_CONST) {
30465324fb0dSmrg		i += 2;
30475324fb0dSmrg	} else if (ps_type == PS_TEX) {
30485324fb0dSmrg		ptr[i++] = 0x3f800000;
30495324fb0dSmrg		ptr[i++] = 0x3f800000;
30505324fb0dSmrg	}
30515324fb0dSmrg
30525324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
30535324fb0dSmrg	ptr[i++] = 0x54;
30545324fb0dSmrg	i += 4;
30555324fb0dSmrg
30565324fb0dSmrg	return i;
30575324fb0dSmrg}
30585324fb0dSmrg
30595324fb0dSmrgstatic int amdgpu_draw_ps_write2hw(uint32_t *ptr,
30605324fb0dSmrg				   int ps_type,
30615324fb0dSmrg				   uint64_t shader_addr)
30625324fb0dSmrg{
30635324fb0dSmrg	int i, j;
30645324fb0dSmrg	const uint32_t *sh_registers;
30655324fb0dSmrg	const uint32_t *context_registers;
30665324fb0dSmrg	uint32_t num_sh_reg, num_context_reg;
30675324fb0dSmrg
30685324fb0dSmrg	if (ps_type == PS_CONST) {
30695324fb0dSmrg		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
30705324fb0dSmrg		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
30715324fb0dSmrg		num_sh_reg = ps_num_sh_registers_gfx9;
30725324fb0dSmrg		num_context_reg = ps_num_context_registers_gfx9;
30735324fb0dSmrg	} else if (ps_type == PS_TEX) {
30745324fb0dSmrg		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
30755324fb0dSmrg		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
30765324fb0dSmrg		num_sh_reg = ps_num_sh_registers_gfx9;
30775324fb0dSmrg		num_context_reg = ps_num_context_registers_gfx9;
30785324fb0dSmrg	}
30795324fb0dSmrg
30805324fb0dSmrg	i = 0;
30815324fb0dSmrg
30825324fb0dSmrg	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
30835324fb0dSmrg	   0x2c08   SPI_SHADER_PGM_LO_PS
30845324fb0dSmrg	   0x2c09   SPI_SHADER_PGM_HI_PS */
30855324fb0dSmrg	shader_addr += 256 * 9;
30865324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
30875324fb0dSmrg	ptr[i++] = 0x7;
30885324fb0dSmrg	ptr[i++] = 0xffff;
30895324fb0dSmrg	ptr[i++] = shader_addr >> 8;
30905324fb0dSmrg	ptr[i++] = shader_addr >> 40;
30915324fb0dSmrg
30925324fb0dSmrg	for (j = 0; j < num_sh_reg; j++) {
30935324fb0dSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
30945324fb0dSmrg		ptr[i++] = sh_registers[j * 2] - 0x2c00;
30955324fb0dSmrg		ptr[i++] = sh_registers[j * 2 + 1];
30965324fb0dSmrg	}
30975324fb0dSmrg
30985324fb0dSmrg	for (j = 0; j < num_context_reg; j++) {
30995324fb0dSmrg		if (context_registers[j * 2] != 0xA1C5) {
31005324fb0dSmrg			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31015324fb0dSmrg			ptr[i++] = context_registers[j * 2] - 0xa000;
31025324fb0dSmrg			ptr[i++] = context_registers[j * 2 + 1];
31035324fb0dSmrg		}
31045324fb0dSmrg
31055324fb0dSmrg		if (context_registers[j * 2] == 0xA1B4) {
31065324fb0dSmrg			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31075324fb0dSmrg			ptr[i++] = 0x1b3;
31085324fb0dSmrg			ptr[i++] = 2;
31095324fb0dSmrg		}
31105324fb0dSmrg	}
31115324fb0dSmrg
31125324fb0dSmrg	return i;
31135324fb0dSmrg}
31145324fb0dSmrg
31155324fb0dSmrgstatic int amdgpu_draw_draw(uint32_t *ptr)
31165324fb0dSmrg{
31175324fb0dSmrg	int i = 0;
31185324fb0dSmrg
31195324fb0dSmrg	/* mmIA_MULTI_VGT_PARAM */
31205324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
31215324fb0dSmrg	ptr[i++] = 0x40000258;
31225324fb0dSmrg	ptr[i++] = 0xd00ff;
31235324fb0dSmrg
31245324fb0dSmrg	/* mmVGT_PRIMITIVE_TYPE */
31255324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
31265324fb0dSmrg	ptr[i++] = 0x10000242;
31275324fb0dSmrg	ptr[i++] = 0x11;
31285324fb0dSmrg
31295324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
31305324fb0dSmrg	ptr[i++] = 3;
31315324fb0dSmrg	ptr[i++] = 2;
31325324fb0dSmrg
31335324fb0dSmrg	return i;
31345324fb0dSmrg}
31355324fb0dSmrg
31365324fb0dSmrgvoid amdgpu_memset_draw(amdgpu_device_handle device_handle,
31375324fb0dSmrg			amdgpu_bo_handle bo_shader_ps,
31385324fb0dSmrg			amdgpu_bo_handle bo_shader_vs,
31395324fb0dSmrg			uint64_t mc_address_shader_ps,
31405324fb0dSmrg			uint64_t mc_address_shader_vs,
31415324fb0dSmrg			uint32_t ring_id)
31425324fb0dSmrg{
31435324fb0dSmrg	amdgpu_context_handle context_handle;
31445324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
31455324fb0dSmrg	volatile unsigned char *ptr_dst;
31465324fb0dSmrg	uint32_t *ptr_cmd;
31475324fb0dSmrg	uint64_t mc_address_dst, mc_address_cmd;
31485324fb0dSmrg	amdgpu_va_handle va_dst, va_cmd;
31495324fb0dSmrg	int i, r;
31505324fb0dSmrg	int bo_dst_size = 16384;
31515324fb0dSmrg	int bo_cmd_size = 4096;
31525324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
31535324fb0dSmrg	struct amdgpu_cs_ib_info ib_info = {0};
31545324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
31555324fb0dSmrg	uint32_t expired;
31565324fb0dSmrg	amdgpu_bo_list_handle bo_list;
31575324fb0dSmrg
31585324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
31595324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
31605324fb0dSmrg
31615324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
31625324fb0dSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
31635324fb0dSmrg					&bo_cmd, (void **)&ptr_cmd,
31645324fb0dSmrg					&mc_address_cmd, &va_cmd);
31655324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
31665324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
31675324fb0dSmrg
31685324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
31695324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
31705324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
31715324fb0dSmrg					&mc_address_dst, &va_dst);
31725324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
31735324fb0dSmrg
31745324fb0dSmrg	i = 0;
31755324fb0dSmrg	i += amdgpu_draw_init(ptr_cmd + i);
31765324fb0dSmrg
31779bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
31785324fb0dSmrg
31799bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
31805324fb0dSmrg
31819bd392adSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
31825324fb0dSmrg
31835324fb0dSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
31845324fb0dSmrg
31855324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
31865324fb0dSmrg	ptr_cmd[i++] = 0xc;
31875324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
31885324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
31895324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
31905324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
31915324fb0dSmrg
31925324fb0dSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
31935324fb0dSmrg
31945324fb0dSmrg	while (i & 7)
31955324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
31965324fb0dSmrg
31975324fb0dSmrg	resources[0] = bo_dst;
31985324fb0dSmrg	resources[1] = bo_shader_ps;
31995324fb0dSmrg	resources[2] = bo_shader_vs;
32005324fb0dSmrg	resources[3] = bo_cmd;
32019bd392adSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
32025324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
32035324fb0dSmrg
32045324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
32055324fb0dSmrg	ib_info.size = i;
32065324fb0dSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
32075324fb0dSmrg	ibs_request.ring = ring_id;
32085324fb0dSmrg	ibs_request.resources = bo_list;
32095324fb0dSmrg	ibs_request.number_of_ibs = 1;
32105324fb0dSmrg	ibs_request.ibs = &ib_info;
32115324fb0dSmrg	ibs_request.fence_info.handle = NULL;
32125324fb0dSmrg
32135324fb0dSmrg	/* submit CS */
32145324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
32155324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
32165324fb0dSmrg
32175324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
32185324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
32195324fb0dSmrg
32205324fb0dSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
32215324fb0dSmrg	fence_status.ip_instance = 0;
32225324fb0dSmrg	fence_status.ring = ring_id;
32235324fb0dSmrg	fence_status.context = context_handle;
32245324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
32255324fb0dSmrg
32265324fb0dSmrg	/* wait for IB accomplished */
32275324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
32285324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
32295324fb0dSmrg					 0, &expired);
32305324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
32315324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
32325324fb0dSmrg
32335324fb0dSmrg	/* verify if memset test result meets with expected */
32345324fb0dSmrg	i = 0;
32355324fb0dSmrg	while(i < bo_dst_size) {
32365324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
32375324fb0dSmrg	}
32385324fb0dSmrg
32395324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
32405324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
32415324fb0dSmrg
32425324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
32435324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
32445324fb0dSmrg
32455324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
32465324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
32475324fb0dSmrg}
32485324fb0dSmrg
32495324fb0dSmrgstatic void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
32505324fb0dSmrg				    uint32_t ring)
32515324fb0dSmrg{
32525324fb0dSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
32535324fb0dSmrg	void *ptr_shader_ps;
32545324fb0dSmrg	void *ptr_shader_vs;
32555324fb0dSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
32565324fb0dSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
32575324fb0dSmrg	int r;
32585324fb0dSmrg	int bo_shader_size = 4096;
32595324fb0dSmrg
32605324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
32615324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
32625324fb0dSmrg					&bo_shader_ps, &ptr_shader_ps,
32635324fb0dSmrg					&mc_address_shader_ps, &va_shader_ps);
32645324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
326588f8a8d2Smrg	memset(ptr_shader_ps, 0, bo_shader_size);
32665324fb0dSmrg
32675324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
32685324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
32695324fb0dSmrg					&bo_shader_vs, &ptr_shader_vs,
32705324fb0dSmrg					&mc_address_shader_vs, &va_shader_vs);
32715324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
327288f8a8d2Smrg	memset(ptr_shader_vs, 0, bo_shader_size);
32735324fb0dSmrg
32745324fb0dSmrg	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
32755324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
32765324fb0dSmrg
32775324fb0dSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
32785324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
32795324fb0dSmrg
32805324fb0dSmrg	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
32815324fb0dSmrg			mc_address_shader_ps, mc_address_shader_vs, ring);
32825324fb0dSmrg
32835324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
32845324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
32855324fb0dSmrg
32865324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
32875324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
32885324fb0dSmrg}
32895324fb0dSmrg
32905324fb0dSmrgstatic void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
32915324fb0dSmrg			       amdgpu_bo_handle bo_shader_ps,
32925324fb0dSmrg			       amdgpu_bo_handle bo_shader_vs,
32935324fb0dSmrg			       uint64_t mc_address_shader_ps,
32945324fb0dSmrg			       uint64_t mc_address_shader_vs,
32959bd392adSmrg			       uint32_t ring, int hang)
32965324fb0dSmrg{
32975324fb0dSmrg	amdgpu_context_handle context_handle;
32985324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
32995324fb0dSmrg	volatile unsigned char *ptr_dst;
33005324fb0dSmrg	unsigned char *ptr_src;
33015324fb0dSmrg	uint32_t *ptr_cmd;
33025324fb0dSmrg	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
33035324fb0dSmrg	amdgpu_va_handle va_dst, va_src, va_cmd;
33045324fb0dSmrg	int i, r;
33055324fb0dSmrg	int bo_size = 16384;
33065324fb0dSmrg	int bo_cmd_size = 4096;
33075324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
33085324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
33099bd392adSmrg	uint32_t hang_state, hangs;
33109bd392adSmrg	uint32_t expired;
33115324fb0dSmrg	amdgpu_bo_list_handle bo_list;
33125324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
33135324fb0dSmrg
33145324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
33155324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33165324fb0dSmrg
33175324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
33185324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
33195324fb0dSmrg				    &bo_cmd, (void **)&ptr_cmd,
33205324fb0dSmrg				    &mc_address_cmd, &va_cmd);
33215324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33225324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
33235324fb0dSmrg
33245324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
33255324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
33265324fb0dSmrg					&bo_src, (void **)&ptr_src,
33275324fb0dSmrg					&mc_address_src, &va_src);
33285324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33295324fb0dSmrg
33305324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
33315324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
33325324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
33335324fb0dSmrg					&mc_address_dst, &va_dst);
33345324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33355324fb0dSmrg
33365324fb0dSmrg	memset(ptr_src, 0x55, bo_size);
33375324fb0dSmrg
33385324fb0dSmrg	i = 0;
33395324fb0dSmrg	i += amdgpu_draw_init(ptr_cmd + i);
33405324fb0dSmrg
33419bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
33425324fb0dSmrg
33439bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
33445324fb0dSmrg
33459bd392adSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
33465324fb0dSmrg
33475324fb0dSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
33485324fb0dSmrg
33495324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
33505324fb0dSmrg	ptr_cmd[i++] = 0xc;
33515324fb0dSmrg	ptr_cmd[i++] = mc_address_src >> 8;
33525324fb0dSmrg	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
33535324fb0dSmrg	ptr_cmd[i++] = 0x7c01f;
33545324fb0dSmrg	ptr_cmd[i++] = 0x90500fac;
33555324fb0dSmrg	ptr_cmd[i++] = 0x3e000;
33565324fb0dSmrg	i += 3;
33575324fb0dSmrg
33585324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
33595324fb0dSmrg	ptr_cmd[i++] = 0x14;
33605324fb0dSmrg	ptr_cmd[i++] = 0x92;
33615324fb0dSmrg	i += 3;
33625324fb0dSmrg
336388f8a8d2Smrg	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
33645324fb0dSmrg	ptr_cmd[i++] = 0x191;
33655324fb0dSmrg	ptr_cmd[i++] = 0;
33665324fb0dSmrg
33675324fb0dSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
33685324fb0dSmrg
33695324fb0dSmrg	while (i & 7)
33705324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
33715324fb0dSmrg
33725324fb0dSmrg	resources[0] = bo_dst;
33735324fb0dSmrg	resources[1] = bo_src;
33745324fb0dSmrg	resources[2] = bo_shader_ps;
33755324fb0dSmrg	resources[3] = bo_shader_vs;
33765324fb0dSmrg	resources[4] = bo_cmd;
33775324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
33785324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33795324fb0dSmrg
33805324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
33815324fb0dSmrg	ib_info.size = i;
33825324fb0dSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
33835324fb0dSmrg	ibs_request.ring = ring;
33845324fb0dSmrg	ibs_request.resources = bo_list;
33855324fb0dSmrg	ibs_request.number_of_ibs = 1;
33865324fb0dSmrg	ibs_request.ibs = &ib_info;
33875324fb0dSmrg	ibs_request.fence_info.handle = NULL;
33885324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
33895324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33905324fb0dSmrg
33915324fb0dSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
33925324fb0dSmrg	fence_status.ip_instance = 0;
33935324fb0dSmrg	fence_status.ring = ring;
33945324fb0dSmrg	fence_status.context = context_handle;
33955324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
33965324fb0dSmrg
33975324fb0dSmrg	/* wait for IB accomplished */
33985324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
33995324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
34005324fb0dSmrg					 0, &expired);
34019bd392adSmrg	if (!hang) {
34029bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
34039bd392adSmrg		CU_ASSERT_EQUAL(expired, true);
34045324fb0dSmrg
34059bd392adSmrg		/* verify if memcpy test result meets with expected */
34069bd392adSmrg		i = 0;
34079bd392adSmrg		while(i < bo_size) {
34089bd392adSmrg			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
34099bd392adSmrg			i++;
34109bd392adSmrg		}
34119bd392adSmrg	} else {
34129bd392adSmrg		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
34139bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
34149bd392adSmrg		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
34155324fb0dSmrg	}
34165324fb0dSmrg
34175324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
34185324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34195324fb0dSmrg
34205324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
34215324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34225324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
34235324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34245324fb0dSmrg
34255324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
34265324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34275324fb0dSmrg
34285324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
34295324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34305324fb0dSmrg}
34315324fb0dSmrg
34329bd392adSmrgvoid amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
34339bd392adSmrg			     int hang)
34345324fb0dSmrg{
34355324fb0dSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
34365324fb0dSmrg	void *ptr_shader_ps;
34375324fb0dSmrg	void *ptr_shader_vs;
34385324fb0dSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
34395324fb0dSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
34405324fb0dSmrg	int bo_shader_size = 4096;
34419bd392adSmrg	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
34425324fb0dSmrg	int r;
34435324fb0dSmrg
34445324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
34455324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
34465324fb0dSmrg					&bo_shader_ps, &ptr_shader_ps,
34475324fb0dSmrg					&mc_address_shader_ps, &va_shader_ps);
34485324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
344988f8a8d2Smrg	memset(ptr_shader_ps, 0, bo_shader_size);
34505324fb0dSmrg
34515324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
34525324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
34535324fb0dSmrg					&bo_shader_vs, &ptr_shader_vs,
34545324fb0dSmrg					&mc_address_shader_vs, &va_shader_vs);
34555324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
345688f8a8d2Smrg	memset(ptr_shader_vs, 0, bo_shader_size);
34575324fb0dSmrg
34589bd392adSmrg	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
34595324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34605324fb0dSmrg
34615324fb0dSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
34625324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34635324fb0dSmrg
34645324fb0dSmrg	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
34659bd392adSmrg			mc_address_shader_ps, mc_address_shader_vs, ring, hang);
34665324fb0dSmrg
34675324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
34685324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34695324fb0dSmrg
34705324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
34715324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34725324fb0dSmrg}
34735324fb0dSmrg
34745324fb0dSmrgstatic void amdgpu_draw_test(void)
34755324fb0dSmrg{
34765324fb0dSmrg	int r;
34775324fb0dSmrg	struct drm_amdgpu_info_hw_ip info;
34785324fb0dSmrg	uint32_t ring_id;
34795324fb0dSmrg
34805324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
34815324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
348288f8a8d2Smrg	if (!info.available_rings)
348388f8a8d2Smrg		printf("SKIP ... as there's no graphics ring\n");
34845324fb0dSmrg
34855324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
34865324fb0dSmrg		amdgpu_memset_draw_test(device_handle, ring_id);
34879bd392adSmrg		amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
34885324fb0dSmrg	}
34895324fb0dSmrg}
349088f8a8d2Smrg
34919bd392adSmrgvoid amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
34929bd392adSmrg{
34939bd392adSmrg	amdgpu_context_handle context_handle;
34949bd392adSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
34959bd392adSmrg	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
34969bd392adSmrg	void *ptr_shader_ps;
34979bd392adSmrg	void *ptr_shader_vs;
34989bd392adSmrg	volatile unsigned char *ptr_dst;
34999bd392adSmrg	unsigned char *ptr_src;
35009bd392adSmrg	uint32_t *ptr_cmd;
35019bd392adSmrg	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
35029bd392adSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
35039bd392adSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
35049bd392adSmrg	amdgpu_va_handle va_dst, va_src, va_cmd;
35059bd392adSmrg	struct amdgpu_gpu_info gpu_info = {0};
35069bd392adSmrg	int i, r;
35079bd392adSmrg	int bo_size = 0x4000000;
35089bd392adSmrg	int bo_shader_ps_size = 0x400000;
35099bd392adSmrg	int bo_shader_vs_size = 4096;
35109bd392adSmrg	int bo_cmd_size = 4096;
35119bd392adSmrg	struct amdgpu_cs_request ibs_request = {0};
35129bd392adSmrg	struct amdgpu_cs_ib_info ib_info= {0};
35139bd392adSmrg	uint32_t hang_state, hangs, expired;
35149bd392adSmrg	amdgpu_bo_list_handle bo_list;
35159bd392adSmrg	struct amdgpu_cs_fence fence_status = {0};
35169bd392adSmrg
35179bd392adSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
35189bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
35199bd392adSmrg
35209bd392adSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
35219bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
35229bd392adSmrg
35239bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
35249bd392adSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
35259bd392adSmrg				    &bo_cmd, (void **)&ptr_cmd,
35269bd392adSmrg				    &mc_address_cmd, &va_cmd);
35279bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
35289bd392adSmrg	memset(ptr_cmd, 0, bo_cmd_size);
35299bd392adSmrg
35309bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
35319bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
35329bd392adSmrg					&bo_shader_ps, &ptr_shader_ps,
35339bd392adSmrg					&mc_address_shader_ps, &va_shader_ps);
35349bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
35359bd392adSmrg	memset(ptr_shader_ps, 0, bo_shader_ps_size);
35369bd392adSmrg
35379bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
35389bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
35399bd392adSmrg					&bo_shader_vs, &ptr_shader_vs,
35409bd392adSmrg					&mc_address_shader_vs, &va_shader_vs);
35419bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
35429bd392adSmrg	memset(ptr_shader_vs, 0, bo_shader_vs_size);
35439bd392adSmrg
35449bd392adSmrg	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
35459bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
35469bd392adSmrg
35479bd392adSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
35489bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
35499bd392adSmrg
35509bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
35519bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
35529bd392adSmrg					&bo_src, (void **)&ptr_src,
35539bd392adSmrg					&mc_address_src, &va_src);
35549bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
35559bd392adSmrg
35569bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
35579bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
35589bd392adSmrg					&bo_dst, (void **)&ptr_dst,
35599bd392adSmrg					&mc_address_dst, &va_dst);
35609bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
35619bd392adSmrg
35629bd392adSmrg	memset(ptr_src, 0x55, bo_size);
35639bd392adSmrg
35649bd392adSmrg	i = 0;
35659bd392adSmrg	i += amdgpu_draw_init(ptr_cmd + i);
35669bd392adSmrg
35679bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
35689bd392adSmrg
35699bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
35709bd392adSmrg
35719bd392adSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
35729bd392adSmrg							mc_address_shader_vs, 1);
35739bd392adSmrg
35749bd392adSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
35759bd392adSmrg
35769bd392adSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
35779bd392adSmrg	ptr_cmd[i++] = 0xc;
35789bd392adSmrg	ptr_cmd[i++] = mc_address_src >> 8;
35799bd392adSmrg	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
35809bd392adSmrg	ptr_cmd[i++] = 0x1ffc7ff;
35819bd392adSmrg	ptr_cmd[i++] = 0x90500fac;
35829bd392adSmrg	ptr_cmd[i++] = 0xffe000;
35839bd392adSmrg	i += 3;
35849bd392adSmrg
35859bd392adSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
35869bd392adSmrg	ptr_cmd[i++] = 0x14;
35879bd392adSmrg	ptr_cmd[i++] = 0x92;
35889bd392adSmrg	i += 3;
35899bd392adSmrg
35909bd392adSmrg	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
35919bd392adSmrg	ptr_cmd[i++] = 0x191;
35929bd392adSmrg	ptr_cmd[i++] = 0;
35939bd392adSmrg
35949bd392adSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
35959bd392adSmrg
35969bd392adSmrg	while (i & 7)
35979bd392adSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
35989bd392adSmrg
35999bd392adSmrg	resources[0] = bo_dst;
36009bd392adSmrg	resources[1] = bo_src;
36019bd392adSmrg	resources[2] = bo_shader_ps;
36029bd392adSmrg	resources[3] = bo_shader_vs;
36039bd392adSmrg	resources[4] = bo_cmd;
36049bd392adSmrg	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
36059bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
36069bd392adSmrg
36079bd392adSmrg	ib_info.ib_mc_address = mc_address_cmd;
36089bd392adSmrg	ib_info.size = i;
36099bd392adSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
36109bd392adSmrg	ibs_request.ring = ring;
36119bd392adSmrg	ibs_request.resources = bo_list;
36129bd392adSmrg	ibs_request.number_of_ibs = 1;
36139bd392adSmrg	ibs_request.ibs = &ib_info;
36149bd392adSmrg	ibs_request.fence_info.handle = NULL;
36159bd392adSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
36169bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
36179bd392adSmrg
36189bd392adSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
36199bd392adSmrg	fence_status.ip_instance = 0;
36209bd392adSmrg	fence_status.ring = ring;
36219bd392adSmrg	fence_status.context = context_handle;
36229bd392adSmrg	fence_status.fence = ibs_request.seq_no;
36239bd392adSmrg
36249bd392adSmrg	/* wait for IB accomplished */
36259bd392adSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
36269bd392adSmrg					 AMDGPU_TIMEOUT_INFINITE,
36279bd392adSmrg					 0, &expired);
36289bd392adSmrg
36299bd392adSmrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
36309bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
36319bd392adSmrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
36329bd392adSmrg
36339bd392adSmrg	r = amdgpu_bo_list_destroy(bo_list);
36349bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
36359bd392adSmrg
36369bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
36379bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
36389bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
36399bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
36409bd392adSmrg
36419bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
36429bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
36439bd392adSmrg
36449bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
36459bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
36469bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
36479bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
36489bd392adSmrg
36499bd392adSmrg	r = amdgpu_cs_ctx_free(context_handle);
36509bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
36519bd392adSmrg}
36529bd392adSmrg
365388f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void)
365488f8a8d2Smrg{
365588f8a8d2Smrg	int r;
365688f8a8d2Smrg	char debugfs_path[256], tmp[10];
365788f8a8d2Smrg	int fd;
365888f8a8d2Smrg	struct stat sbuf;
365988f8a8d2Smrg	amdgpu_context_handle context_handle;
366088f8a8d2Smrg	uint32_t hang_state, hangs;
366188f8a8d2Smrg
366288f8a8d2Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
366388f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
366488f8a8d2Smrg
366588f8a8d2Smrg	r = fstat(drm_amdgpu[0], &sbuf);
366688f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
366788f8a8d2Smrg
366888f8a8d2Smrg	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
366988f8a8d2Smrg	fd = open(debugfs_path, O_RDONLY);
367088f8a8d2Smrg	CU_ASSERT(fd >= 0);
367188f8a8d2Smrg
367288f8a8d2Smrg	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
367388f8a8d2Smrg	CU_ASSERT(r > 0);
367488f8a8d2Smrg
367588f8a8d2Smrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
367688f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
367788f8a8d2Smrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
367888f8a8d2Smrg
367988f8a8d2Smrg	close(fd);
368088f8a8d2Smrg	r = amdgpu_cs_ctx_free(context_handle);
368188f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
368288f8a8d2Smrg
368388f8a8d2Smrg	amdgpu_compute_dispatch_test();
368488f8a8d2Smrg	amdgpu_gfx_dispatch_test();
368588f8a8d2Smrg}
3686