basic_tests.c revision 41687f09
13f012e29Smrg/*
23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc.
33f012e29Smrg *
43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a
53f012e29Smrg * copy of this software and associated documentation files (the "Software"),
63f012e29Smrg * to deal in the Software without restriction, including without limitation
73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the
93f012e29Smrg * Software is furnished to do so, subject to the following conditions:
103f012e29Smrg *
113f012e29Smrg * The above copyright notice and this permission notice shall be included in
123f012e29Smrg * all copies or substantial portions of the Software.
133f012e29Smrg *
143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE.
213f012e29Smrg *
223f012e29Smrg*/
233f012e29Smrg
243f012e29Smrg#include <stdio.h>
253f012e29Smrg#include <stdlib.h>
263f012e29Smrg#include <unistd.h>
2788f8a8d2Smrg#include <sys/types.h>
2888f8a8d2Smrg#ifdef MAJOR_IN_SYSMACROS
2988f8a8d2Smrg#include <sys/sysmacros.h>
3088f8a8d2Smrg#endif
3188f8a8d2Smrg#include <sys/stat.h>
3288f8a8d2Smrg#include <fcntl.h>
339bd392adSmrg#if HAVE_ALLOCA_H
343f012e29Smrg# include <alloca.h>
353f012e29Smrg#endif
3600a23bdaSmrg#include <sys/wait.h>
373f012e29Smrg
383f012e29Smrg#include "CUnit/Basic.h"
393f012e29Smrg
403f012e29Smrg#include "amdgpu_test.h"
413f012e29Smrg#include "amdgpu_drm.h"
4241687f09Smrg#include "amdgpu_internal.h"
437cdc0497Smrg#include "util_math.h"
443f012e29Smrg
453f012e29Smrgstatic  amdgpu_device_handle device_handle;
463f012e29Smrgstatic  uint32_t  major_version;
473f012e29Smrgstatic  uint32_t  minor_version;
48d8807b2fSmrgstatic  uint32_t  family_id;
493f012e29Smrg
503f012e29Smrgstatic void amdgpu_query_info_test(void);
513f012e29Smrgstatic void amdgpu_command_submission_gfx(void);
523f012e29Smrgstatic void amdgpu_command_submission_compute(void);
53d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void);
543f012e29Smrgstatic void amdgpu_command_submission_sdma(void);
553f012e29Smrgstatic void amdgpu_userptr_test(void);
563f012e29Smrgstatic void amdgpu_semaphore_test(void);
5700a23bdaSmrgstatic void amdgpu_sync_dependency_test(void);
5800a23bdaSmrgstatic void amdgpu_bo_eviction_test(void);
5988f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void);
6088f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void);
615324fb0dSmrgstatic void amdgpu_draw_test(void);
6288f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void);
633f012e29Smrg
643f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
653f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
663f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
6700a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
6800a23bdaSmrg				       unsigned ip_type,
6900a23bdaSmrg				       int instance, int pm4_dw, uint32_t *pm4_src,
7000a23bdaSmrg				       int res_cnt, amdgpu_bo_handle *resources,
7100a23bdaSmrg				       struct amdgpu_cs_ib_info *ib_info,
7200a23bdaSmrg				       struct amdgpu_cs_request *ibs_request);
7341687f09Smrg
743f012e29SmrgCU_TestInfo basic_tests[] = {
753f012e29Smrg	{ "Query Info Test",  amdgpu_query_info_test },
763f012e29Smrg	{ "Userptr Test",  amdgpu_userptr_test },
7700a23bdaSmrg	{ "bo eviction Test",  amdgpu_bo_eviction_test },
783f012e29Smrg	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
793f012e29Smrg	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
80d8807b2fSmrg	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
813f012e29Smrg	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
823f012e29Smrg	{ "SW semaphore Test",  amdgpu_semaphore_test },
8300a23bdaSmrg	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
8488f8a8d2Smrg	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
8588f8a8d2Smrg	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
865324fb0dSmrg	{ "Draw Test",  amdgpu_draw_test },
8788f8a8d2Smrg	{ "GPU reset Test", amdgpu_gpu_reset_test },
883f012e29Smrg	CU_TEST_INFO_NULL,
893f012e29Smrg};
909bd392adSmrg#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
913f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0
923f012e29Smrg#define SDMA_PKT_HEADER_op_mask   0x000000FF
933f012e29Smrg#define SDMA_PKT_HEADER_op_shift  0
943f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
953f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL  11
963f012e29Smrg#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
973f012e29Smrg	/* 0 = byte fill
983f012e29Smrg	 * 2 = DW fill
993f012e29Smrg	 */
1003f012e29Smrg#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
1013f012e29Smrg					(((sub_op) & 0xFF) << 8) |	\
1023f012e29Smrg					(((op) & 0xFF) << 0))
1033f012e29Smrg#define	SDMA_OPCODE_WRITE				  2
1043f012e29Smrg#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
1053f012e29Smrg#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
1063f012e29Smrg
1073f012e29Smrg#define	SDMA_OPCODE_COPY				  1
1083f012e29Smrg#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
1093f012e29Smrg
11041687f09Smrg#define	SDMA_OPCODE_ATOMIC				  10
11141687f09Smrg#		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
11241687f09Smrg        /* 0 - single_pass_atomic.
11341687f09Smrg         * 1 - loop_until_compare_satisfied.
11441687f09Smrg         */
11541687f09Smrg#		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
11641687f09Smrg		/* 0 - non-TMZ.
11741687f09Smrg		 * 1 - TMZ.
11841687f09Smrg	     */
11941687f09Smrg#		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
12041687f09Smrg		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
12141687f09Smrg		 * same as Packet 3
12241687f09Smrg		 */
12341687f09Smrg
1243f012e29Smrg#define GFX_COMPUTE_NOP  0xffff1000
1253f012e29Smrg#define SDMA_NOP  0x0
1263f012e29Smrg
1273f012e29Smrg/* PM4 */
1283f012e29Smrg#define	PACKET_TYPE0	0
1293f012e29Smrg#define	PACKET_TYPE1	1
1303f012e29Smrg#define	PACKET_TYPE2	2
1313f012e29Smrg#define	PACKET_TYPE3	3
1323f012e29Smrg
1333f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
1343f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
1353f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
1363f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
1373f012e29Smrg#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
1383f012e29Smrg			 ((reg) & 0xFFFF) |			\
1393f012e29Smrg			 ((n) & 0x3FFF) << 16)
1403f012e29Smrg#define CP_PACKET2			0x80000000
1413f012e29Smrg#define		PACKET2_PAD_SHIFT		0
1423f012e29Smrg#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
1433f012e29Smrg
1443f012e29Smrg#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
1453f012e29Smrg
1463f012e29Smrg#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
1473f012e29Smrg			 (((op) & 0xFF) << 8) |				\
1483f012e29Smrg			 ((n) & 0x3FFF) << 16)
1495324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
1503f012e29Smrg
1513f012e29Smrg/* Packet 3 types */
1523f012e29Smrg#define	PACKET3_NOP					0x10
1533f012e29Smrg
1543f012e29Smrg#define	PACKET3_WRITE_DATA				0x37
1553f012e29Smrg#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
1563f012e29Smrg		/* 0 - register
1573f012e29Smrg		 * 1 - memory (sync - via GRBM)
1583f012e29Smrg		 * 2 - gl2
1593f012e29Smrg		 * 3 - gds
1603f012e29Smrg		 * 4 - reserved
1613f012e29Smrg		 * 5 - memory (async - direct)
1623f012e29Smrg		 */
1633f012e29Smrg#define		WR_ONE_ADDR                             (1 << 16)
1643f012e29Smrg#define		WR_CONFIRM                              (1 << 20)
1653f012e29Smrg#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
1663f012e29Smrg		/* 0 - LRU
1673f012e29Smrg		 * 1 - Stream
1683f012e29Smrg		 */
1693f012e29Smrg#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
1703f012e29Smrg		/* 0 - me
1713f012e29Smrg		 * 1 - pfp
1723f012e29Smrg		 * 2 - ce
1733f012e29Smrg		 */
1743f012e29Smrg
17541687f09Smrg#define	PACKET3_ATOMIC_MEM				0x1E
17641687f09Smrg#define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
17741687f09Smrg#define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
17841687f09Smrg            /* 0 - single_pass_atomic.
17941687f09Smrg             * 1 - loop_until_compare_satisfied.
18041687f09Smrg             */
18141687f09Smrg#define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
18241687f09Smrg            /* 0 - lru.
18341687f09Smrg             * 1 - stream.
18441687f09Smrg             */
18541687f09Smrg#define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
18641687f09Smrg            /* 0 - micro_engine.
18741687f09Smrg			 */
18841687f09Smrg
1893f012e29Smrg#define	PACKET3_DMA_DATA				0x50
1903f012e29Smrg/* 1. header
1913f012e29Smrg * 2. CONTROL
1923f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0]
1933f012e29Smrg * 4. SRC_ADDR_HI [31:0]
1943f012e29Smrg * 5. DST_ADDR_LO [31:0]
1953f012e29Smrg * 6. DST_ADDR_HI [7:0]
1963f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
1973f012e29Smrg */
1983f012e29Smrg/* CONTROL */
1993f012e29Smrg#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
2003f012e29Smrg		/* 0 - ME
2013f012e29Smrg		 * 1 - PFP
2023f012e29Smrg		 */
2033f012e29Smrg#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
2043f012e29Smrg		/* 0 - LRU
2053f012e29Smrg		 * 1 - Stream
2063f012e29Smrg		 * 2 - Bypass
2073f012e29Smrg		 */
2083f012e29Smrg#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
2093f012e29Smrg#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
2103f012e29Smrg		/* 0 - DST_ADDR using DAS
2113f012e29Smrg		 * 1 - GDS
2123f012e29Smrg		 * 3 - DST_ADDR using L2
2133f012e29Smrg		 */
2143f012e29Smrg#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
2153f012e29Smrg		/* 0 - LRU
2163f012e29Smrg		 * 1 - Stream
2173f012e29Smrg		 * 2 - Bypass
2183f012e29Smrg		 */
2193f012e29Smrg#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
2203f012e29Smrg#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
2213f012e29Smrg		/* 0 - SRC_ADDR using SAS
2223f012e29Smrg		 * 1 - GDS
2233f012e29Smrg		 * 2 - DATA
2243f012e29Smrg		 * 3 - SRC_ADDR using L2
2253f012e29Smrg		 */
2263f012e29Smrg#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
2273f012e29Smrg/* COMMAND */
2283f012e29Smrg#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
2293f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
2303f012e29Smrg		/* 0 - none
2313f012e29Smrg		 * 1 - 8 in 16
2323f012e29Smrg		 * 2 - 8 in 32
2333f012e29Smrg		 * 3 - 8 in 64
2343f012e29Smrg		 */
2353f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
2363f012e29Smrg		/* 0 - none
2373f012e29Smrg		 * 1 - 8 in 16
2383f012e29Smrg		 * 2 - 8 in 32
2393f012e29Smrg		 * 3 - 8 in 64
2403f012e29Smrg		 */
2413f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
2423f012e29Smrg		/* 0 - memory
2433f012e29Smrg		 * 1 - register
2443f012e29Smrg		 */
2453f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
2463f012e29Smrg		/* 0 - memory
2473f012e29Smrg		 * 1 - register
2483f012e29Smrg		 */
2493f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
2503f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
2513f012e29Smrg#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
2523f012e29Smrg
253d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
254d8807b2fSmrg						(((b) & 0x1) << 26) |		\
255d8807b2fSmrg						(((t) & 0x1) << 23) |		\
256d8807b2fSmrg						(((s) & 0x1) << 22) |		\
257d8807b2fSmrg						(((cnt) & 0xFFFFF) << 0))
258d8807b2fSmrg#define	SDMA_OPCODE_COPY_SI	3
259d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI	13
260d8807b2fSmrg#define SDMA_NOP_SI  0xf
261d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000
262d8807b2fSmrg#define	PACKET3_DMA_DATA_SI	0x41
263d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
264d8807b2fSmrg		/* 0 - ME
265d8807b2fSmrg		 * 1 - PFP
266d8807b2fSmrg		 */
267d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
268d8807b2fSmrg		/* 0 - DST_ADDR using DAS
269d8807b2fSmrg		 * 1 - GDS
270d8807b2fSmrg		 * 3 - DST_ADDR using L2
271d8807b2fSmrg		 */
272d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
273d8807b2fSmrg		/* 0 - SRC_ADDR using SAS
274d8807b2fSmrg		 * 1 - GDS
275d8807b2fSmrg		 * 2 - DATA
276d8807b2fSmrg		 * 3 - SRC_ADDR using L2
277d8807b2fSmrg		 */
278d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
279d8807b2fSmrg
28000a23bdaSmrg
28100a23bdaSmrg#define PKT3_CONTEXT_CONTROL                   0x28
28200a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
28300a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
28400a23bdaSmrg#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
28500a23bdaSmrg
28600a23bdaSmrg#define PKT3_CLEAR_STATE                       0x12
28700a23bdaSmrg
28800a23bdaSmrg#define PKT3_SET_SH_REG                        0x76
28900a23bdaSmrg#define		PACKET3_SET_SH_REG_START			0x00002c00
29000a23bdaSmrg
29100a23bdaSmrg#define	PACKET3_DISPATCH_DIRECT				0x15
2925324fb0dSmrg#define PACKET3_EVENT_WRITE				0x46
2935324fb0dSmrg#define PACKET3_ACQUIRE_MEM				0x58
2945324fb0dSmrg#define PACKET3_SET_CONTEXT_REG				0x69
2955324fb0dSmrg#define PACKET3_SET_UCONFIG_REG				0x79
2965324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO				0x2D
29700a23bdaSmrg/* gfx 8 */
29800a23bdaSmrg#define mmCOMPUTE_PGM_LO                                                        0x2e0c
29900a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
30000a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
30100a23bdaSmrg#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
30200a23bdaSmrg#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
30300a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
30400a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
30500a23bdaSmrg
30600a23bdaSmrg
30700a23bdaSmrg
30800a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
30900a23bdaSmrg		      ((num & 0x0000ff00) << 8) | \
31000a23bdaSmrg		      ((num & 0x00ff0000) >> 8) | \
31100a23bdaSmrg		      ((num & 0x000000ff) << 24))
31200a23bdaSmrg
31300a23bdaSmrg
31400a23bdaSmrg/* Shader code
31500a23bdaSmrg * void main()
31600a23bdaSmrg{
31700a23bdaSmrg
31800a23bdaSmrg	float x = some_input;
31900a23bdaSmrg		for (unsigned i = 0; i < 1000000; i++)
32000a23bdaSmrg  	x = sin(x);
32100a23bdaSmrg
32200a23bdaSmrg	u[0] = 42u;
32300a23bdaSmrg}
32400a23bdaSmrg*/
32500a23bdaSmrg
32600a23bdaSmrgstatic  uint32_t shader_bin[] = {
32700a23bdaSmrg	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
32800a23bdaSmrg	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
32900a23bdaSmrg	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
33000a23bdaSmrg	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
33100a23bdaSmrg};
33200a23bdaSmrg
33300a23bdaSmrg#define CODE_OFFSET 512
33400a23bdaSmrg#define DATA_OFFSET 1024
33500a23bdaSmrg
3365324fb0dSmrgenum cs_type {
3375324fb0dSmrg	CS_BUFFERCLEAR,
3389bd392adSmrg	CS_BUFFERCOPY,
3399bd392adSmrg	CS_HANG,
3409bd392adSmrg	CS_HANG_SLOW
3415324fb0dSmrg};
3425324fb0dSmrg
3435324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = {
3445324fb0dSmrg    0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
3455324fb0dSmrg    0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
3465324fb0dSmrg    0xBF810000
3475324fb0dSmrg};
3485324fb0dSmrg
3495324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
3505324fb0dSmrg	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
3515324fb0dSmrg	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
3525324fb0dSmrg	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
3535324fb0dSmrg	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
3545324fb0dSmrg	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
3555324fb0dSmrg};
3565324fb0dSmrg
3575324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
3585324fb0dSmrg
3595324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = {
3605324fb0dSmrg    0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
3615324fb0dSmrg    0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
3625324fb0dSmrg};
3635324fb0dSmrg
3645324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = {
3655324fb0dSmrg	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
3665324fb0dSmrg	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
3675324fb0dSmrg	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
3685324fb0dSmrg	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
3695324fb0dSmrg	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
3705324fb0dSmrg	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
3715324fb0dSmrg	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
3725324fb0dSmrg	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
37388f8a8d2Smrg	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
3745324fb0dSmrg	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
3755324fb0dSmrg	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
3765324fb0dSmrg	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
3775324fb0dSmrg	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
3785324fb0dSmrg	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
3795324fb0dSmrg	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
38088f8a8d2Smrg	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
38188f8a8d2Smrg	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
3825324fb0dSmrg	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
3835324fb0dSmrg	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
3845324fb0dSmrg	0xc0017900, 0x24b, 0x0
3855324fb0dSmrg};
3865324fb0dSmrg
3875324fb0dSmrgenum ps_type {
3885324fb0dSmrg	PS_CONST,
3899bd392adSmrg	PS_TEX,
3909bd392adSmrg	PS_HANG,
3919bd392adSmrg	PS_HANG_SLOW
3925324fb0dSmrg};
3935324fb0dSmrg
3945324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = {
3955324fb0dSmrg    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
3965324fb0dSmrg    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
3975324fb0dSmrg    0xC4001C0F, 0x00000100, 0xBF810000
3985324fb0dSmrg};
3995324fb0dSmrg
4005324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
4015324fb0dSmrg
4025324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
4035324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
4045324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
4055324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
4065324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
4075324fb0dSmrg     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
4085324fb0dSmrg     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
4095324fb0dSmrg     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
4105324fb0dSmrg     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
4115324fb0dSmrg     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
4125324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
4135324fb0dSmrg    }
4145324fb0dSmrg};
4155324fb0dSmrg
4165324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
4175324fb0dSmrg    0x00000004
4185324fb0dSmrg};
4195324fb0dSmrg
4205324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2;
4215324fb0dSmrg
4225324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = {
4235324fb0dSmrg    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
4245324fb0dSmrg    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
4255324fb0dSmrg};
4265324fb0dSmrg
4275324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7;
4285324fb0dSmrg
4295324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = {
4305324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
4315324fb0dSmrg    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
4325324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
4335324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
4345324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
4355324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
4365324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
4375324fb0dSmrg};
4385324fb0dSmrg
4395324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = {
4405324fb0dSmrg    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
4415324fb0dSmrg    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
4425324fb0dSmrg    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
4435324fb0dSmrg    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
4445324fb0dSmrg    0x00000100, 0xBF810000
4455324fb0dSmrg};
4465324fb0dSmrg
4475324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
4485324fb0dSmrg    0x0000000B
4495324fb0dSmrg};
4505324fb0dSmrg
4515324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
4525324fb0dSmrg
4535324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
4545324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
4555324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
4565324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
4575324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
4585324fb0dSmrg     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4595324fb0dSmrg     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4605324fb0dSmrg     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4615324fb0dSmrg     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4625324fb0dSmrg     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4635324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
4645324fb0dSmrg    }
4655324fb0dSmrg};
4665324fb0dSmrg
4675324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = {
4685324fb0dSmrg    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
4695324fb0dSmrg    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
4705324fb0dSmrg};
4715324fb0dSmrg
4725324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = {
4735324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
4745324fb0dSmrg    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
4755324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
4765324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
4775324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
4785324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
4795324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
4805324fb0dSmrg};
4815324fb0dSmrg
4825324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
4835324fb0dSmrg    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
4845324fb0dSmrg    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
4855324fb0dSmrg    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
4865324fb0dSmrg    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
4875324fb0dSmrg    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
4885324fb0dSmrg    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
4895324fb0dSmrg    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
4905324fb0dSmrg    0xC400020F, 0x05060403, 0xBF810000
4915324fb0dSmrg};
4925324fb0dSmrg
4935324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = {
4945324fb0dSmrg	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
4955324fb0dSmrg	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
4965324fb0dSmrg	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
4979bd392adSmrg	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
4985324fb0dSmrg	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
4995324fb0dSmrg	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
5005324fb0dSmrg	0xc0026900, 0x292, 0x20, 0x60201b8,
5015324fb0dSmrg	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
5025324fb0dSmrg};
50300a23bdaSmrg
5049bd392adSmrgunsigned int memcpy_ps_hang[] = {
5059bd392adSmrg        0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
5069bd392adSmrg        0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
5079bd392adSmrg        0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
5089bd392adSmrg        0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
5099bd392adSmrg        0xF800180F, 0x03020100, 0xBF810000
5109bd392adSmrg};
5119bd392adSmrg
5129bd392adSmrgstruct amdgpu_test_shader {
5139bd392adSmrg	uint32_t *shader;
5149bd392adSmrg	uint32_t header_length;
5159bd392adSmrg	uint32_t body_length;
5169bd392adSmrg	uint32_t foot_length;
5179bd392adSmrg};
5189bd392adSmrg
5199bd392adSmrgunsigned int memcpy_cs_hang_slow_ai_codes[] = {
5209bd392adSmrg    0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
5219bd392adSmrg    0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
5229bd392adSmrg};
5239bd392adSmrg
5249bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
5259bd392adSmrg        memcpy_cs_hang_slow_ai_codes,
5269bd392adSmrg        4,
5279bd392adSmrg        3,
5289bd392adSmrg        1
5299bd392adSmrg};
5309bd392adSmrg
5319bd392adSmrgunsigned int memcpy_cs_hang_slow_rv_codes[] = {
5329bd392adSmrg    0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
5339bd392adSmrg    0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
5349bd392adSmrg};
5359bd392adSmrg
5369bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
5379bd392adSmrg        memcpy_cs_hang_slow_rv_codes,
5389bd392adSmrg        4,
5399bd392adSmrg        3,
5409bd392adSmrg        1
5419bd392adSmrg};
5429bd392adSmrg
5439bd392adSmrgunsigned int memcpy_ps_hang_slow_ai_codes[] = {
5449bd392adSmrg        0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
5459bd392adSmrg        0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
5469bd392adSmrg        0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
5479bd392adSmrg        0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
5489bd392adSmrg        0x03020100, 0xbf810000
5499bd392adSmrg};
5509bd392adSmrg
5519bd392adSmrgstruct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
5529bd392adSmrg        memcpy_ps_hang_slow_ai_codes,
5539bd392adSmrg        7,
5549bd392adSmrg        2,
5559bd392adSmrg        9
5569bd392adSmrg};
5579bd392adSmrg
5587cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
5597cdc0497Smrg			unsigned alignment, unsigned heap, uint64_t alloc_flags,
5607cdc0497Smrg			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
5617cdc0497Smrg			uint64_t *mc_address,
5627cdc0497Smrg			amdgpu_va_handle *va_handle)
5637cdc0497Smrg{
5647cdc0497Smrg	struct amdgpu_bo_alloc_request request = {};
5657cdc0497Smrg	amdgpu_bo_handle buf_handle;
5667cdc0497Smrg	amdgpu_va_handle handle;
5677cdc0497Smrg	uint64_t vmc_addr;
5687cdc0497Smrg	int r;
5697cdc0497Smrg
5707cdc0497Smrg	request.alloc_size = size;
5717cdc0497Smrg	request.phys_alignment = alignment;
5727cdc0497Smrg	request.preferred_heap = heap;
5737cdc0497Smrg	request.flags = alloc_flags;
5747cdc0497Smrg
5757cdc0497Smrg	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
5767cdc0497Smrg	if (r)
5777cdc0497Smrg		return r;
5787cdc0497Smrg
5797cdc0497Smrg	r = amdgpu_va_range_alloc(dev,
5807cdc0497Smrg				  amdgpu_gpu_va_range_general,
5817cdc0497Smrg				  size, alignment, 0, &vmc_addr,
5827cdc0497Smrg				  &handle, 0);
5837cdc0497Smrg	if (r)
5847cdc0497Smrg		goto error_va_alloc;
5857cdc0497Smrg
5867cdc0497Smrg	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
5877cdc0497Smrg				   AMDGPU_VM_PAGE_READABLE |
5887cdc0497Smrg				   AMDGPU_VM_PAGE_WRITEABLE |
5897cdc0497Smrg				   AMDGPU_VM_PAGE_EXECUTABLE |
5907cdc0497Smrg				   mapping_flags,
5917cdc0497Smrg				   AMDGPU_VA_OP_MAP);
5927cdc0497Smrg	if (r)
5937cdc0497Smrg		goto error_va_map;
5947cdc0497Smrg
5957cdc0497Smrg	r = amdgpu_bo_cpu_map(buf_handle, cpu);
5967cdc0497Smrg	if (r)
5977cdc0497Smrg		goto error_cpu_map;
5987cdc0497Smrg
5997cdc0497Smrg	*bo = buf_handle;
6007cdc0497Smrg	*mc_address = vmc_addr;
6017cdc0497Smrg	*va_handle = handle;
6027cdc0497Smrg
6037cdc0497Smrg	return 0;
6047cdc0497Smrg
6057cdc0497Smrg error_cpu_map:
6067cdc0497Smrg	amdgpu_bo_cpu_unmap(buf_handle);
6077cdc0497Smrg
6087cdc0497Smrg error_va_map:
6097cdc0497Smrg	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
6107cdc0497Smrg
6117cdc0497Smrg error_va_alloc:
6127cdc0497Smrg	amdgpu_bo_free(buf_handle);
6137cdc0497Smrg	return r;
6147cdc0497Smrg}
6157cdc0497Smrg
6167cdc0497Smrg
6177cdc0497Smrg
61841687f09SmrgCU_BOOL suite_basic_tests_enable(void)
61941687f09Smrg{
62041687f09Smrg	uint32_t asic_id;
62141687f09Smrg
62241687f09Smrg	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
62341687f09Smrg					     &minor_version, &device_handle))
62441687f09Smrg		return CU_FALSE;
62541687f09Smrg
62641687f09Smrg	asic_id = device_handle->info.asic_id;
62741687f09Smrg
62841687f09Smrg	if (amdgpu_device_deinitialize(device_handle))
62941687f09Smrg		return CU_FALSE;
63041687f09Smrg
63141687f09Smrg	/* disable gfx engine basic test cases for Arturus due to no CPG */
63241687f09Smrg	if (asic_is_arcturus(asic_id)) {
63341687f09Smrg		if (amdgpu_set_test_active("Basic Tests",
63441687f09Smrg					"Command submission Test (GFX)",
63541687f09Smrg					CU_FALSE))
63641687f09Smrg			fprintf(stderr, "test deactivation failed - %s\n",
63741687f09Smrg				CU_get_error_msg());
63841687f09Smrg
63941687f09Smrg		if (amdgpu_set_test_active("Basic Tests",
64041687f09Smrg					"Command submission Test (Multi-Fence)",
64141687f09Smrg					CU_FALSE))
64241687f09Smrg			fprintf(stderr, "test deactivation failed - %s\n",
64341687f09Smrg				CU_get_error_msg());
64441687f09Smrg
64541687f09Smrg		if (amdgpu_set_test_active("Basic Tests",
64641687f09Smrg					"Sync dependency Test",
64741687f09Smrg					CU_FALSE))
64841687f09Smrg			fprintf(stderr, "test deactivation failed - %s\n",
64941687f09Smrg				CU_get_error_msg());
65041687f09Smrg	}
65141687f09Smrg
65241687f09Smrg	return CU_TRUE;
65341687f09Smrg}
65441687f09Smrg
6553f012e29Smrgint suite_basic_tests_init(void)
6563f012e29Smrg{
657d8807b2fSmrg	struct amdgpu_gpu_info gpu_info = {0};
6583f012e29Smrg	int r;
6593f012e29Smrg
6603f012e29Smrg	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
6613f012e29Smrg				   &minor_version, &device_handle);
6623f012e29Smrg
663d8807b2fSmrg	if (r) {
664037b3c26Smrg		if ((r == -EACCES) && (errno == EACCES))
665037b3c26Smrg			printf("\n\nError:%s. "
666037b3c26Smrg				"Hint:Try to run this test program as root.",
667037b3c26Smrg				strerror(errno));
6683f012e29Smrg		return CUE_SINIT_FAILED;
669037b3c26Smrg	}
670d8807b2fSmrg
671d8807b2fSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
672d8807b2fSmrg	if (r)
673d8807b2fSmrg		return CUE_SINIT_FAILED;
674d8807b2fSmrg
675d8807b2fSmrg	family_id = gpu_info.family_id;
676d8807b2fSmrg
677d8807b2fSmrg	return CUE_SUCCESS;
6783f012e29Smrg}
6793f012e29Smrg
6803f012e29Smrgint suite_basic_tests_clean(void)
6813f012e29Smrg{
6823f012e29Smrg	int r = amdgpu_device_deinitialize(device_handle);
6833f012e29Smrg
6843f012e29Smrg	if (r == 0)
6853f012e29Smrg		return CUE_SUCCESS;
6863f012e29Smrg	else
6873f012e29Smrg		return CUE_SCLEAN_FAILED;
6883f012e29Smrg}
6893f012e29Smrg
6903f012e29Smrgstatic void amdgpu_query_info_test(void)
6913f012e29Smrg{
6923f012e29Smrg	struct amdgpu_gpu_info gpu_info = {0};
6933f012e29Smrg	uint32_t version, feature;
6943f012e29Smrg	int r;
6953f012e29Smrg
6963f012e29Smrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
6973f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6983f012e29Smrg
6993f012e29Smrg	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
7003f012e29Smrg					  0, &version, &feature);
7013f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7023f012e29Smrg}
7033f012e29Smrg
7043f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void)
7053f012e29Smrg{
7063f012e29Smrg	amdgpu_context_handle context_handle;
7073f012e29Smrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
7083f012e29Smrg	void *ib_result_cpu, *ib_result_ce_cpu;
7093f012e29Smrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
7103f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
7113f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
7123f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
7133f012e29Smrg	uint32_t *ptr;
7143f012e29Smrg	uint32_t expired;
7153f012e29Smrg	amdgpu_bo_list_handle bo_list;
7163f012e29Smrg	amdgpu_va_handle va_handle, va_handle_ce;
717d8807b2fSmrg	int r, i = 0;
7183f012e29Smrg
7193f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
7203f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7213f012e29Smrg
7223f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
7233f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
7243f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
7253f012e29Smrg				    &ib_result_mc_address, &va_handle);
7263f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7273f012e29Smrg
7283f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
7293f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
7303f012e29Smrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
7313f012e29Smrg				    &ib_result_ce_mc_address, &va_handle_ce);
7323f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7333f012e29Smrg
7343f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
7353f012e29Smrg			       ib_result_ce_handle, &bo_list);
7363f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7373f012e29Smrg
7383f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
7393f012e29Smrg
7403f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
7413f012e29Smrg	ptr = ib_result_ce_cpu;
742d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
743d8807b2fSmrg		ptr[i++] = 0xc0008900;
744d8807b2fSmrg		ptr[i++] = 0;
745d8807b2fSmrg	}
746d8807b2fSmrg	ptr[i++] = 0xc0008400;
747d8807b2fSmrg	ptr[i++] = 1;
7483f012e29Smrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
749d8807b2fSmrg	ib_info[0].size = i;
7503f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
7513f012e29Smrg
7523f012e29Smrg	/* IT_WAIT_ON_CE_COUNTER */
7533f012e29Smrg	ptr = ib_result_cpu;
7543f012e29Smrg	ptr[0] = 0xc0008600;
7553f012e29Smrg	ptr[1] = 0x00000001;
7563f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address;
7573f012e29Smrg	ib_info[1].size = 2;
7583f012e29Smrg
7593f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
7603f012e29Smrg	ibs_request.number_of_ibs = 2;
7613f012e29Smrg	ibs_request.ibs = ib_info;
7623f012e29Smrg	ibs_request.resources = bo_list;
7633f012e29Smrg	ibs_request.fence_info.handle = NULL;
7643f012e29Smrg
7653f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
7663f012e29Smrg
7673f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7683f012e29Smrg
7693f012e29Smrg	fence_status.context = context_handle;
7703f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
7713f012e29Smrg	fence_status.ip_instance = 0;
7723f012e29Smrg	fence_status.fence = ibs_request.seq_no;
7733f012e29Smrg
7743f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
7753f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
7763f012e29Smrg					 0, &expired);
7773f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7783f012e29Smrg
7793f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
7803f012e29Smrg				     ib_result_mc_address, 4096);
7813f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7823f012e29Smrg
7833f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
7843f012e29Smrg				     ib_result_ce_mc_address, 4096);
7853f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7863f012e29Smrg
7873f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
7883f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7893f012e29Smrg
7903f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
7913f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7923f012e29Smrg
7933f012e29Smrg}
7943f012e29Smrg
7953f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void)
7963f012e29Smrg{
7973f012e29Smrg	amdgpu_context_handle context_handle;
7983f012e29Smrg	amdgpu_bo_handle ib_result_handle;
7993f012e29Smrg	void *ib_result_cpu;
8003f012e29Smrg	uint64_t ib_result_mc_address;
8013f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
8023f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
8033f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
8043f012e29Smrg	uint32_t *ptr;
8053f012e29Smrg	uint32_t expired;
8063f012e29Smrg	amdgpu_bo_list_handle bo_list;
8073f012e29Smrg	amdgpu_va_handle va_handle;
808d8807b2fSmrg	int r, i = 0;
8093f012e29Smrg
8103f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
8113f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8123f012e29Smrg
8133f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
8143f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
8153f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
8163f012e29Smrg				    &ib_result_mc_address, &va_handle);
8173f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8183f012e29Smrg
8193f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
8203f012e29Smrg			       &bo_list);
8213f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8223f012e29Smrg
8233f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
8243f012e29Smrg
8253f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
8263f012e29Smrg	ptr = ib_result_cpu;
827d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
828d8807b2fSmrg		ptr[i++] = 0xc0008900;
829d8807b2fSmrg		ptr[i++] = 0;
830d8807b2fSmrg	}
831d8807b2fSmrg	ptr[i++] = 0xc0008400;
832d8807b2fSmrg	ptr[i++] = 1;
8333f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address;
834d8807b2fSmrg	ib_info[0].size = i;
8353f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
8363f012e29Smrg
8373f012e29Smrg	ptr = (uint32_t *)ib_result_cpu + 4;
8383f012e29Smrg	ptr[0] = 0xc0008600;
8393f012e29Smrg	ptr[1] = 0x00000001;
8403f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
8413f012e29Smrg	ib_info[1].size = 2;
8423f012e29Smrg
8433f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
8443f012e29Smrg	ibs_request.number_of_ibs = 2;
8453f012e29Smrg	ibs_request.ibs = ib_info;
8463f012e29Smrg	ibs_request.resources = bo_list;
8473f012e29Smrg	ibs_request.fence_info.handle = NULL;
8483f012e29Smrg
8493f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
8503f012e29Smrg
8513f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8523f012e29Smrg
8533f012e29Smrg	fence_status.context = context_handle;
8543f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
8553f012e29Smrg	fence_status.ip_instance = 0;
8563f012e29Smrg	fence_status.fence = ibs_request.seq_no;
8573f012e29Smrg
8583f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
8593f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
8603f012e29Smrg					 0, &expired);
8613f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8623f012e29Smrg
8633f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
8643f012e29Smrg				     ib_result_mc_address, 4096);
8653f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8663f012e29Smrg
8673f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
8683f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8693f012e29Smrg
8703f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
8713f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8723f012e29Smrg}
8733f012e29Smrg
8743f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void)
8753f012e29Smrg{
8763f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
8773f012e29Smrg}
8783f012e29Smrg
8793f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void)
8803f012e29Smrg{
8813f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
8823f012e29Smrg}
8833f012e29Smrg
8843f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void)
8853f012e29Smrg{
8863f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
8873f012e29Smrg}
8883f012e29Smrg
88900a23bdaSmrgstatic void amdgpu_bo_eviction_test(void)
89000a23bdaSmrg{
89100a23bdaSmrg	const int sdma_write_length = 1024;
89200a23bdaSmrg	const int pm4_dw = 256;
89300a23bdaSmrg	amdgpu_context_handle context_handle;
89400a23bdaSmrg	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
89500a23bdaSmrg	amdgpu_bo_handle *resources;
89600a23bdaSmrg	uint32_t *pm4;
89700a23bdaSmrg	struct amdgpu_cs_ib_info *ib_info;
89800a23bdaSmrg	struct amdgpu_cs_request *ibs_request;
89900a23bdaSmrg	uint64_t bo1_mc, bo2_mc;
90000a23bdaSmrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
90100a23bdaSmrg	int i, j, r, loop1, loop2;
90200a23bdaSmrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
90300a23bdaSmrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
90400a23bdaSmrg	struct amdgpu_heap_info vram_info, gtt_info;
90500a23bdaSmrg
90600a23bdaSmrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
90700a23bdaSmrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
90800a23bdaSmrg
90900a23bdaSmrg	ib_info = calloc(1, sizeof(*ib_info));
91000a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
91100a23bdaSmrg
91200a23bdaSmrg	ibs_request = calloc(1, sizeof(*ibs_request));
91300a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
91400a23bdaSmrg
91500a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
91600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
91700a23bdaSmrg
91800a23bdaSmrg	/* prepare resource */
91900a23bdaSmrg	resources = calloc(4, sizeof(amdgpu_bo_handle));
92000a23bdaSmrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
92100a23bdaSmrg
92200a23bdaSmrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
92300a23bdaSmrg				   0, &vram_info);
92400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
92500a23bdaSmrg
92641687f09Smrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
92741687f09Smrg				   0, &gtt_info);
92841687f09Smrg	CU_ASSERT_EQUAL(r, 0);
92941687f09Smrg
93041687f09Smrg	if (vram_info.max_allocation > gtt_info.heap_size/3) {
93141687f09Smrg		vram_info.max_allocation = gtt_info.heap_size/3;
93241687f09Smrg		gtt_info.max_allocation = vram_info.max_allocation;
93341687f09Smrg	}
93441687f09Smrg
93500a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
93600a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
93700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
93800a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
93900a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
94000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
94100a23bdaSmrg
94200a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
94300a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
94400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
94500a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
94600a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
94700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
94800a23bdaSmrg
94900a23bdaSmrg
95000a23bdaSmrg
95100a23bdaSmrg	loop1 = loop2 = 0;
95200a23bdaSmrg	/* run 9 circle to test all mapping combination */
95300a23bdaSmrg	while(loop1 < 2) {
95400a23bdaSmrg		while(loop2 < 2) {
95500a23bdaSmrg			/* allocate UC bo1for sDMA use */
95600a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
95700a23bdaSmrg						    sdma_write_length, 4096,
95800a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
95900a23bdaSmrg						    gtt_flags[loop1], &bo1,
96000a23bdaSmrg						    (void**)&bo1_cpu, &bo1_mc,
96100a23bdaSmrg						    &bo1_va_handle);
96200a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
96300a23bdaSmrg
96400a23bdaSmrg			/* set bo1 */
96500a23bdaSmrg			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
96600a23bdaSmrg
96700a23bdaSmrg			/* allocate UC bo2 for sDMA use */
96800a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
96900a23bdaSmrg						    sdma_write_length, 4096,
97000a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
97100a23bdaSmrg						    gtt_flags[loop2], &bo2,
97200a23bdaSmrg						    (void**)&bo2_cpu, &bo2_mc,
97300a23bdaSmrg						    &bo2_va_handle);
97400a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
97500a23bdaSmrg
97600a23bdaSmrg			/* clear bo2 */
97700a23bdaSmrg			memset((void*)bo2_cpu, 0, sdma_write_length);
97800a23bdaSmrg
97900a23bdaSmrg			resources[0] = bo1;
98000a23bdaSmrg			resources[1] = bo2;
98100a23bdaSmrg			resources[2] = vram_max[loop2];
98200a23bdaSmrg			resources[3] = gtt_max[loop2];
98300a23bdaSmrg
98400a23bdaSmrg			/* fulfill PM4: test DMA copy linear */
98500a23bdaSmrg			i = j = 0;
98600a23bdaSmrg			if (family_id == AMDGPU_FAMILY_SI) {
98700a23bdaSmrg				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
98800a23bdaSmrg							  sdma_write_length);
98900a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
99000a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
99100a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
99200a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
99300a23bdaSmrg			} else {
99400a23bdaSmrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
99500a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
99600a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
99700a23bdaSmrg				else
99800a23bdaSmrg					pm4[i++] = sdma_write_length;
99900a23bdaSmrg				pm4[i++] = 0;
100000a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
100100a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
100200a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
100300a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
100400a23bdaSmrg			}
100500a23bdaSmrg
100600a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
100700a23bdaSmrg						   AMDGPU_HW_IP_DMA, 0,
100800a23bdaSmrg						   i, pm4,
100900a23bdaSmrg						   4, resources,
101000a23bdaSmrg						   ib_info, ibs_request);
101100a23bdaSmrg
101200a23bdaSmrg			/* verify if SDMA test result meets with expected */
101300a23bdaSmrg			i = 0;
101400a23bdaSmrg			while(i < sdma_write_length) {
101500a23bdaSmrg				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
101600a23bdaSmrg			}
101700a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
101800a23bdaSmrg						     sdma_write_length);
101900a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
102000a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
102100a23bdaSmrg						     sdma_write_length);
102200a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
102300a23bdaSmrg			loop2++;
102400a23bdaSmrg		}
102500a23bdaSmrg		loop2 = 0;
102600a23bdaSmrg		loop1++;
102700a23bdaSmrg	}
102800a23bdaSmrg	amdgpu_bo_free(vram_max[0]);
102900a23bdaSmrg	amdgpu_bo_free(vram_max[1]);
103000a23bdaSmrg	amdgpu_bo_free(gtt_max[0]);
103100a23bdaSmrg	amdgpu_bo_free(gtt_max[1]);
103200a23bdaSmrg	/* clean resources */
103300a23bdaSmrg	free(resources);
103400a23bdaSmrg	free(ibs_request);
103500a23bdaSmrg	free(ib_info);
103600a23bdaSmrg	free(pm4);
103700a23bdaSmrg
103800a23bdaSmrg	/* end of test */
103900a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle);
104000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
104100a23bdaSmrg}
104200a23bdaSmrg
104300a23bdaSmrg
10443f012e29Smrgstatic void amdgpu_command_submission_gfx(void)
10453f012e29Smrg{
10463f012e29Smrg	/* write data using the CP */
10473f012e29Smrg	amdgpu_command_submission_gfx_cp_write_data();
10483f012e29Smrg	/* const fill using the CP */
10493f012e29Smrg	amdgpu_command_submission_gfx_cp_const_fill();
10503f012e29Smrg	/* copy data using the CP */
10513f012e29Smrg	amdgpu_command_submission_gfx_cp_copy_data();
10523f012e29Smrg	/* separate IB buffers for multi-IB submission */
10533f012e29Smrg	amdgpu_command_submission_gfx_separate_ibs();
10543f012e29Smrg	/* shared IB buffer for multi-IB submission */
10553f012e29Smrg	amdgpu_command_submission_gfx_shared_ib();
10563f012e29Smrg}
10573f012e29Smrg
10583f012e29Smrgstatic void amdgpu_semaphore_test(void)
10593f012e29Smrg{
10603f012e29Smrg	amdgpu_context_handle context_handle[2];
10613f012e29Smrg	amdgpu_semaphore_handle sem;
10623f012e29Smrg	amdgpu_bo_handle ib_result_handle[2];
10633f012e29Smrg	void *ib_result_cpu[2];
10643f012e29Smrg	uint64_t ib_result_mc_address[2];
10653f012e29Smrg	struct amdgpu_cs_request ibs_request[2] = {0};
10663f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2] = {0};
10673f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
10683f012e29Smrg	uint32_t *ptr;
10693f012e29Smrg	uint32_t expired;
1070d8807b2fSmrg	uint32_t sdma_nop, gfx_nop;
10713f012e29Smrg	amdgpu_bo_list_handle bo_list[2];
10723f012e29Smrg	amdgpu_va_handle va_handle[2];
10733f012e29Smrg	int r, i;
10743f012e29Smrg
1075d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI) {
1076d8807b2fSmrg		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1077d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP_SI;
1078d8807b2fSmrg	} else {
1079d8807b2fSmrg		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1080d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP;
1081d8807b2fSmrg	}
1082d8807b2fSmrg
10833f012e29Smrg	r = amdgpu_cs_create_semaphore(&sem);
10843f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10853f012e29Smrg	for (i = 0; i < 2; i++) {
10863f012e29Smrg		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
10873f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10883f012e29Smrg
10893f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
10903f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
10913f012e29Smrg					    &ib_result_handle[i], &ib_result_cpu[i],
10923f012e29Smrg					    &ib_result_mc_address[i], &va_handle[i]);
10933f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10943f012e29Smrg
10953f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
10963f012e29Smrg				       NULL, &bo_list[i]);
10973f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10983f012e29Smrg	}
10993f012e29Smrg
11003f012e29Smrg	/* 1. same context different engine */
11013f012e29Smrg	ptr = ib_result_cpu[0];
1102d8807b2fSmrg	ptr[0] = sdma_nop;
11033f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
11043f012e29Smrg	ib_info[0].size = 1;
11053f012e29Smrg
11063f012e29Smrg	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
11073f012e29Smrg	ibs_request[0].number_of_ibs = 1;
11083f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
11093f012e29Smrg	ibs_request[0].resources = bo_list[0];
11103f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
11113f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
11123f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11133f012e29Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
11143f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11153f012e29Smrg
11163f012e29Smrg	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
11173f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11183f012e29Smrg	ptr = ib_result_cpu[1];
1119d8807b2fSmrg	ptr[0] = gfx_nop;
11203f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
11213f012e29Smrg	ib_info[1].size = 1;
11223f012e29Smrg
11233f012e29Smrg	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
11243f012e29Smrg	ibs_request[1].number_of_ibs = 1;
11253f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
11263f012e29Smrg	ibs_request[1].resources = bo_list[1];
11273f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
11283f012e29Smrg
11293f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
11303f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11313f012e29Smrg
11323f012e29Smrg	fence_status.context = context_handle[0];
11333f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
11343f012e29Smrg	fence_status.ip_instance = 0;
11353f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
11363f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
11373f012e29Smrg					 500000000, 0, &expired);
11383f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11393f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
11403f012e29Smrg
11413f012e29Smrg	/* 2. same engine different context */
11423f012e29Smrg	ptr = ib_result_cpu[0];
1143d8807b2fSmrg	ptr[0] = gfx_nop;
11443f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
11453f012e29Smrg	ib_info[0].size = 1;
11463f012e29Smrg
11473f012e29Smrg	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
11483f012e29Smrg	ibs_request[0].number_of_ibs = 1;
11493f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
11503f012e29Smrg	ibs_request[0].resources = bo_list[0];
11513f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
11523f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
11533f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11543f012e29Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
11553f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11563f012e29Smrg
11573f012e29Smrg	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
11583f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11593f012e29Smrg	ptr = ib_result_cpu[1];
1160d8807b2fSmrg	ptr[0] = gfx_nop;
11613f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
11623f012e29Smrg	ib_info[1].size = 1;
11633f012e29Smrg
11643f012e29Smrg	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
11653f012e29Smrg	ibs_request[1].number_of_ibs = 1;
11663f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
11673f012e29Smrg	ibs_request[1].resources = bo_list[1];
11683f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
11693f012e29Smrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
11703f012e29Smrg
11713f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11723f012e29Smrg
11733f012e29Smrg	fence_status.context = context_handle[1];
11743f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
11753f012e29Smrg	fence_status.ip_instance = 0;
11763f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
11773f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
11783f012e29Smrg					 500000000, 0, &expired);
11793f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11803f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
1181d8807b2fSmrg
11823f012e29Smrg	for (i = 0; i < 2; i++) {
11833f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
11843f012e29Smrg					     ib_result_mc_address[i], 4096);
11853f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11863f012e29Smrg
11873f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list[i]);
11883f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11893f012e29Smrg
11903f012e29Smrg		r = amdgpu_cs_ctx_free(context_handle[i]);
11913f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11923f012e29Smrg	}
11933f012e29Smrg
11943f012e29Smrg	r = amdgpu_cs_destroy_semaphore(sem);
11953f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11963f012e29Smrg}
11973f012e29Smrg
11983f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void)
11993f012e29Smrg{
12003f012e29Smrg	amdgpu_context_handle context_handle;
12013f012e29Smrg	amdgpu_bo_handle ib_result_handle;
12023f012e29Smrg	void *ib_result_cpu;
12033f012e29Smrg	uint64_t ib_result_mc_address;
12043f012e29Smrg	struct amdgpu_cs_request ibs_request;
12053f012e29Smrg	struct amdgpu_cs_ib_info ib_info;
12063f012e29Smrg	struct amdgpu_cs_fence fence_status;
12073f012e29Smrg	uint32_t *ptr;
12083f012e29Smrg	uint32_t expired;
120900a23bdaSmrg	int r, instance;
12103f012e29Smrg	amdgpu_bo_list_handle bo_list;
12113f012e29Smrg	amdgpu_va_handle va_handle;
1212d8807b2fSmrg	struct drm_amdgpu_info_hw_ip info;
1213d8807b2fSmrg
1214d8807b2fSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1215d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
12163f012e29Smrg
12173f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
12183f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12193f012e29Smrg
1220d8807b2fSmrg	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
12213f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
12223f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
12233f012e29Smrg					    &ib_result_handle, &ib_result_cpu,
12243f012e29Smrg					    &ib_result_mc_address, &va_handle);
12253f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12263f012e29Smrg
12273f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
12283f012e29Smrg				       &bo_list);
12293f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12303f012e29Smrg
12313f012e29Smrg		ptr = ib_result_cpu;
1232d8807b2fSmrg		memset(ptr, 0, 16);
1233d8807b2fSmrg		ptr[0]=PACKET3(PACKET3_NOP, 14);
12343f012e29Smrg
12353f012e29Smrg		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
12363f012e29Smrg		ib_info.ib_mc_address = ib_result_mc_address;
12373f012e29Smrg		ib_info.size = 16;
12383f012e29Smrg
12393f012e29Smrg		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
12403f012e29Smrg		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
12413f012e29Smrg		ibs_request.ring = instance;
12423f012e29Smrg		ibs_request.number_of_ibs = 1;
12433f012e29Smrg		ibs_request.ibs = &ib_info;
12443f012e29Smrg		ibs_request.resources = bo_list;
12453f012e29Smrg		ibs_request.fence_info.handle = NULL;
12463f012e29Smrg
12473f012e29Smrg		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
12483f012e29Smrg		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
12493f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12503f012e29Smrg
12513f012e29Smrg		fence_status.context = context_handle;
12523f012e29Smrg		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
12533f012e29Smrg		fence_status.ip_instance = 0;
12543f012e29Smrg		fence_status.ring = instance;
12553f012e29Smrg		fence_status.fence = ibs_request.seq_no;
12563f012e29Smrg
12573f012e29Smrg		r = amdgpu_cs_query_fence_status(&fence_status,
12583f012e29Smrg						 AMDGPU_TIMEOUT_INFINITE,
12593f012e29Smrg						 0, &expired);
12603f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12613f012e29Smrg
12623f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list);
12633f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12643f012e29Smrg
12653f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
12663f012e29Smrg					     ib_result_mc_address, 4096);
12673f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12683f012e29Smrg	}
12693f012e29Smrg
12703f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
12713f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12723f012e29Smrg}
12733f012e29Smrg
12743f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void)
12753f012e29Smrg{
12763f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
12773f012e29Smrg}
12783f012e29Smrg
12793f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void)
12803f012e29Smrg{
12813f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
12823f012e29Smrg}
12833f012e29Smrg
12843f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void)
12853f012e29Smrg{
12863f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
12873f012e29Smrg}
12883f012e29Smrg
12893f012e29Smrgstatic void amdgpu_command_submission_compute(void)
12903f012e29Smrg{
12913f012e29Smrg	/* write data using the CP */
12923f012e29Smrg	amdgpu_command_submission_compute_cp_write_data();
12933f012e29Smrg	/* const fill using the CP */
12943f012e29Smrg	amdgpu_command_submission_compute_cp_const_fill();
12953f012e29Smrg	/* copy data using the CP */
12963f012e29Smrg	amdgpu_command_submission_compute_cp_copy_data();
12973f012e29Smrg	/* nop test */
12983f012e29Smrg	amdgpu_command_submission_compute_nop();
12993f012e29Smrg}
13003f012e29Smrg
13013f012e29Smrg/*
13023f012e29Smrg * caller need create/release:
13033f012e29Smrg * pm4_src, resources, ib_info, and ibs_request
13043f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished
13053f012e29Smrg */
130641687f09Smrgvoid
130741687f09Smrgamdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
130841687f09Smrg			       amdgpu_context_handle context_handle,
130941687f09Smrg			       unsigned ip_type, int instance, int pm4_dw,
131041687f09Smrg			       uint32_t *pm4_src, int res_cnt,
131141687f09Smrg			       amdgpu_bo_handle *resources,
131241687f09Smrg			       struct amdgpu_cs_ib_info *ib_info,
131341687f09Smrg			       struct amdgpu_cs_request *ibs_request,
131441687f09Smrg			       bool secure)
13153f012e29Smrg{
13163f012e29Smrg	int r;
13173f012e29Smrg	uint32_t expired;
13183f012e29Smrg	uint32_t *ring_ptr;
13193f012e29Smrg	amdgpu_bo_handle ib_result_handle;
13203f012e29Smrg	void *ib_result_cpu;
13213f012e29Smrg	uint64_t ib_result_mc_address;
13223f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
13233f012e29Smrg	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
13243f012e29Smrg	amdgpu_va_handle va_handle;
13253f012e29Smrg
13263f012e29Smrg	/* prepare CS */
13273f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
13283f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
13293f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
13303f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
13313f012e29Smrg	CU_ASSERT_TRUE(pm4_dw <= 1024);
13323f012e29Smrg
13333f012e29Smrg	/* allocate IB */
13343f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
13353f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
13363f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
13373f012e29Smrg				    &ib_result_mc_address, &va_handle);
13383f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13393f012e29Smrg
13403f012e29Smrg	/* copy PM4 packet to ring from caller */
13413f012e29Smrg	ring_ptr = ib_result_cpu;
13423f012e29Smrg	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
13433f012e29Smrg
13443f012e29Smrg	ib_info->ib_mc_address = ib_result_mc_address;
13453f012e29Smrg	ib_info->size = pm4_dw;
134641687f09Smrg	if (secure)
134741687f09Smrg		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
13483f012e29Smrg
13493f012e29Smrg	ibs_request->ip_type = ip_type;
13503f012e29Smrg	ibs_request->ring = instance;
13513f012e29Smrg	ibs_request->number_of_ibs = 1;
13523f012e29Smrg	ibs_request->ibs = ib_info;
13533f012e29Smrg	ibs_request->fence_info.handle = NULL;
13543f012e29Smrg
13553f012e29Smrg	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
13563f012e29Smrg	all_res[res_cnt] = ib_result_handle;
13573f012e29Smrg
13583f012e29Smrg	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
13593f012e29Smrg				  NULL, &ibs_request->resources);
13603f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13613f012e29Smrg
13623f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
13633f012e29Smrg
13643f012e29Smrg	/* submit CS */
13653f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
13663f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13673f012e29Smrg
13683f012e29Smrg	r = amdgpu_bo_list_destroy(ibs_request->resources);
13693f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13703f012e29Smrg
13713f012e29Smrg	fence_status.ip_type = ip_type;
13723f012e29Smrg	fence_status.ip_instance = 0;
13733f012e29Smrg	fence_status.ring = ibs_request->ring;
13743f012e29Smrg	fence_status.context = context_handle;
13753f012e29Smrg	fence_status.fence = ibs_request->seq_no;
13763f012e29Smrg
13773f012e29Smrg	/* wait for IB accomplished */
13783f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
13793f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
13803f012e29Smrg					 0, &expired);
13813f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13823f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
13833f012e29Smrg
13843f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
13853f012e29Smrg				     ib_result_mc_address, 4096);
13863f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13873f012e29Smrg}
13883f012e29Smrg
138941687f09Smrgstatic void
139041687f09Smrgamdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
139141687f09Smrg			   unsigned ip_type, int instance, int pm4_dw,
139241687f09Smrg			   uint32_t *pm4_src, int res_cnt,
139341687f09Smrg			   amdgpu_bo_handle *resources,
139441687f09Smrg			   struct amdgpu_cs_ib_info *ib_info,
139541687f09Smrg			   struct amdgpu_cs_request *ibs_request)
139641687f09Smrg{
139741687f09Smrg	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
139841687f09Smrg				       ip_type, instance, pm4_dw, pm4_src,
139941687f09Smrg				       res_cnt, resources, ib_info,
140041687f09Smrg				       ibs_request, false);
140141687f09Smrg}
140241687f09Smrg
140341687f09Smrgvoid
140441687f09Smrgamdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
140541687f09Smrg							  device, unsigned
140641687f09Smrg							  ip_type, bool secure)
14073f012e29Smrg{
14083f012e29Smrg	const int sdma_write_length = 128;
14093f012e29Smrg	const int pm4_dw = 256;
14103f012e29Smrg	amdgpu_context_handle context_handle;
14113f012e29Smrg	amdgpu_bo_handle bo;
14123f012e29Smrg	amdgpu_bo_handle *resources;
14133f012e29Smrg	uint32_t *pm4;
14143f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
14153f012e29Smrg	struct amdgpu_cs_request *ibs_request;
14163f012e29Smrg	uint64_t bo_mc;
14173f012e29Smrg	volatile uint32_t *bo_cpu;
141841687f09Smrg	uint32_t bo_cpu_origin;
141900a23bdaSmrg	int i, j, r, loop, ring_id;
14203f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
14213f012e29Smrg	amdgpu_va_handle va_handle;
142200a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
14233f012e29Smrg
14243f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
14253f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
14263f012e29Smrg
14273f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
14283f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
14293f012e29Smrg
14303f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
14313f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
14323f012e29Smrg
143341687f09Smrg	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
143400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
143500a23bdaSmrg
143641687f09Smrg	for (i = 0; secure && (i < 2); i++)
143741687f09Smrg		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
143841687f09Smrg
143941687f09Smrg	r = amdgpu_cs_ctx_create(device, &context_handle);
144041687f09Smrg
14413f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
14423f012e29Smrg
14433f012e29Smrg	/* prepare resource */
14443f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
14453f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
14463f012e29Smrg
144700a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
144800a23bdaSmrg		loop = 0;
144900a23bdaSmrg		while(loop < 2) {
145000a23bdaSmrg			/* allocate UC bo for sDMA use */
145141687f09Smrg			r = amdgpu_bo_alloc_and_map(device,
145200a23bdaSmrg						    sdma_write_length * sizeof(uint32_t),
145300a23bdaSmrg						    4096, AMDGPU_GEM_DOMAIN_GTT,
145400a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
145500a23bdaSmrg						    &bo_mc, &va_handle);
145600a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
14573f012e29Smrg
145800a23bdaSmrg			/* clear bo */
145900a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
14603f012e29Smrg
146100a23bdaSmrg			resources[0] = bo;
14623f012e29Smrg
146300a23bdaSmrg			/* fulfill PM4: test DMA write-linear */
146400a23bdaSmrg			i = j = 0;
146500a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
146600a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI)
146700a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
146800a23bdaSmrg								  sdma_write_length);
146900a23bdaSmrg				else
147000a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
147141687f09Smrg							       SDMA_WRITE_SUB_OPCODE_LINEAR,
147241687f09Smrg							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
147341687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
147400a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
147500a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
147600a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
147700a23bdaSmrg				else if (family_id != AMDGPU_FAMILY_SI)
147800a23bdaSmrg					pm4[i++] = sdma_write_length;
147900a23bdaSmrg				while(j++ < sdma_write_length)
148000a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
148100a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
148200a23bdaSmrg				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
148300a23bdaSmrg				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
148400a23bdaSmrg				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
148500a23bdaSmrg				pm4[i++] = 0xfffffffc & bo_mc;
148600a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
148700a23bdaSmrg				while(j++ < sdma_write_length)
148800a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
148900a23bdaSmrg			}
14903f012e29Smrg
149141687f09Smrg			amdgpu_test_exec_cs_helper_raw(device, context_handle,
149241687f09Smrg						       ip_type, ring_id, i, pm4,
149341687f09Smrg						       1, resources, ib_info,
149441687f09Smrg						       ibs_request, secure);
14953f012e29Smrg
149600a23bdaSmrg			/* verify if SDMA test result meets with expected */
149700a23bdaSmrg			i = 0;
149841687f09Smrg			if (!secure) {
149941687f09Smrg				while(i < sdma_write_length) {
150041687f09Smrg					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
150141687f09Smrg				}
150241687f09Smrg			} else if (ip_type == AMDGPU_HW_IP_GFX) {
150341687f09Smrg				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
150441687f09Smrg				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
150541687f09Smrg				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
150641687f09Smrg				 * command, 1-loop_until_compare_satisfied.
150741687f09Smrg				 * single_pass_atomic, 0-lru
150841687f09Smrg				 * engine_sel, 0-micro_engine
150941687f09Smrg				 */
151041687f09Smrg				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
151141687f09Smrg							ATOMIC_MEM_COMMAND(1) |
151241687f09Smrg							ATOMIC_MEM_CACHEPOLICAY(0) |
151341687f09Smrg							ATOMIC_MEM_ENGINESEL(0));
151441687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
151541687f09Smrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
151641687f09Smrg				pm4[i++] = 0x12345678;
151741687f09Smrg				pm4[i++] = 0x0;
151841687f09Smrg				pm4[i++] = 0xdeadbeaf;
151941687f09Smrg				pm4[i++] = 0x0;
152041687f09Smrg				pm4[i++] = 0x100;
152141687f09Smrg				amdgpu_test_exec_cs_helper_raw(device, context_handle,
152241687f09Smrg							ip_type, ring_id, i, pm4,
152341687f09Smrg							1, resources, ib_info,
152441687f09Smrg							ibs_request, true);
152541687f09Smrg			} else if (ip_type == AMDGPU_HW_IP_DMA) {
152641687f09Smrg				/* restore the bo_cpu to compare */
152741687f09Smrg				bo_cpu_origin = bo_cpu[0];
152841687f09Smrg				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
152941687f09Smrg				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
153041687f09Smrg				 * loop, 1-loop_until_compare_satisfied.
153141687f09Smrg				 * single_pass_atomic, 0-lru
153241687f09Smrg				 */
153341687f09Smrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
153441687f09Smrg							       0,
153541687f09Smrg							       SDMA_ATOMIC_LOOP(1) |
153641687f09Smrg							       SDMA_ATOMIC_TMZ(1) |
153741687f09Smrg							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
153841687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
153941687f09Smrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
154041687f09Smrg				pm4[i++] = 0x12345678;
154141687f09Smrg				pm4[i++] = 0x0;
154241687f09Smrg				pm4[i++] = 0xdeadbeaf;
154341687f09Smrg				pm4[i++] = 0x0;
154441687f09Smrg				pm4[i++] = 0x100;
154541687f09Smrg				amdgpu_test_exec_cs_helper_raw(device, context_handle,
154641687f09Smrg							ip_type, ring_id, i, pm4,
154741687f09Smrg							1, resources, ib_info,
154841687f09Smrg							ibs_request, true);
154941687f09Smrg				/* DMA's atomic behavir is unlike GFX
155041687f09Smrg				 * If the comparing data is not equal to destination data,
155141687f09Smrg				 * For GFX, loop again till gfx timeout(system hang).
155241687f09Smrg				 * For DMA, loop again till timer expired and then send interrupt.
155341687f09Smrg				 * So testcase can't use interrupt mechanism.
155441687f09Smrg				 * We take another way to verify. When the comparing data is not
155541687f09Smrg				 * equal to destination data, overwrite the source data to the destination
155641687f09Smrg				 * buffer. Otherwise, original destination data unchanged.
155741687f09Smrg				 * So if the bo_cpu data is overwritten, the result is passed.
155841687f09Smrg				 */
155941687f09Smrg				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
156041687f09Smrg
156141687f09Smrg				/* compare again for the case of dest_data != cmp_data */
156241687f09Smrg				i = 0;
156341687f09Smrg				/* restore again, here dest_data should be */
156441687f09Smrg				bo_cpu_origin = bo_cpu[0];
156541687f09Smrg				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
156641687f09Smrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
156741687f09Smrg							       0,
156841687f09Smrg							       SDMA_ATOMIC_LOOP(1) |
156941687f09Smrg							       SDMA_ATOMIC_TMZ(1) |
157041687f09Smrg							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
157141687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
157241687f09Smrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
157341687f09Smrg				pm4[i++] = 0x87654321;
157441687f09Smrg				pm4[i++] = 0x0;
157541687f09Smrg				pm4[i++] = 0xdeadbeaf;
157641687f09Smrg				pm4[i++] = 0x0;
157741687f09Smrg				pm4[i++] = 0x100;
157841687f09Smrg				amdgpu_test_exec_cs_helper_raw(device, context_handle,
157941687f09Smrg							ip_type, ring_id, i, pm4,
158041687f09Smrg							1, resources, ib_info,
158141687f09Smrg							ibs_request, true);
158241687f09Smrg				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
158341687f09Smrg				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
158400a23bdaSmrg			}
15853f012e29Smrg
158600a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
158700a23bdaSmrg						     sdma_write_length * sizeof(uint32_t));
158800a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
158900a23bdaSmrg			loop++;
15903f012e29Smrg		}
15913f012e29Smrg	}
15923f012e29Smrg	/* clean resources */
15933f012e29Smrg	free(resources);
15943f012e29Smrg	free(ibs_request);
15953f012e29Smrg	free(ib_info);
15963f012e29Smrg	free(pm4);
15973f012e29Smrg
15983f012e29Smrg	/* end of test */
15993f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
16003f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
16013f012e29Smrg}
16023f012e29Smrg
160341687f09Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
160441687f09Smrg{
160541687f09Smrg	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
160641687f09Smrg								  ip_type,
160741687f09Smrg								  false);
160841687f09Smrg}
160941687f09Smrg
16103f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void)
16113f012e29Smrg{
16123f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
16133f012e29Smrg}
16143f012e29Smrg
16153f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
16163f012e29Smrg{
16173f012e29Smrg	const int sdma_write_length = 1024 * 1024;
16183f012e29Smrg	const int pm4_dw = 256;
16193f012e29Smrg	amdgpu_context_handle context_handle;
16203f012e29Smrg	amdgpu_bo_handle bo;
16213f012e29Smrg	amdgpu_bo_handle *resources;
16223f012e29Smrg	uint32_t *pm4;
16233f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
16243f012e29Smrg	struct amdgpu_cs_request *ibs_request;
16253f012e29Smrg	uint64_t bo_mc;
16263f012e29Smrg	volatile uint32_t *bo_cpu;
162700a23bdaSmrg	int i, j, r, loop, ring_id;
16283f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
16293f012e29Smrg	amdgpu_va_handle va_handle;
163000a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
16313f012e29Smrg
16323f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
16333f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
16343f012e29Smrg
16353f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
16363f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
16373f012e29Smrg
16383f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
16393f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
16403f012e29Smrg
164100a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
164200a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
164300a23bdaSmrg
16443f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
16453f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
16463f012e29Smrg
16473f012e29Smrg	/* prepare resource */
16483f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
16493f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
16503f012e29Smrg
165100a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
165200a23bdaSmrg		loop = 0;
165300a23bdaSmrg		while(loop < 2) {
165400a23bdaSmrg			/* allocate UC bo for sDMA use */
165500a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
165600a23bdaSmrg						    sdma_write_length, 4096,
165700a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
165800a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
165900a23bdaSmrg						    &bo_mc, &va_handle);
166000a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
16613f012e29Smrg
166200a23bdaSmrg			/* clear bo */
166300a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length);
16643f012e29Smrg
166500a23bdaSmrg			resources[0] = bo;
16663f012e29Smrg
166700a23bdaSmrg			/* fulfill PM4: test DMA const fill */
166800a23bdaSmrg			i = j = 0;
166900a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
167000a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
167100a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
167200a23bdaSmrg								  0, 0, 0,
167300a23bdaSmrg								  sdma_write_length / 4);
167400a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
167500a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
167600a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
167700a23bdaSmrg				} else {
167800a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
167900a23bdaSmrg							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
168000a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
168100a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
168200a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
168300a23bdaSmrg					if (family_id >= AMDGPU_FAMILY_AI)
168400a23bdaSmrg						pm4[i++] = sdma_write_length - 1;
168500a23bdaSmrg					else
168600a23bdaSmrg						pm4[i++] = sdma_write_length;
168700a23bdaSmrg				}
168800a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
168900a23bdaSmrg				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
169000a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
169100a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
169200a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
169300a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
169400a23bdaSmrg						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
169500a23bdaSmrg						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
169600a23bdaSmrg						   PACKET3_DMA_DATA_SI_CP_SYNC;
169700a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
169800a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1699d8807b2fSmrg					pm4[i++] = sdma_write_length;
170000a23bdaSmrg				} else {
170100a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
170200a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
170300a23bdaSmrg						   PACKET3_DMA_DATA_DST_SEL(0) |
170400a23bdaSmrg						   PACKET3_DMA_DATA_SRC_SEL(2) |
170500a23bdaSmrg						   PACKET3_DMA_DATA_CP_SYNC;
170600a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
170700a23bdaSmrg					pm4[i++] = 0;
170800a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
170900a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
171000a23bdaSmrg					pm4[i++] = sdma_write_length;
171100a23bdaSmrg				}
1712d8807b2fSmrg			}
17133f012e29Smrg
171400a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
171500a23bdaSmrg						   ip_type, ring_id,
171600a23bdaSmrg						   i, pm4,
171700a23bdaSmrg						   1, resources,
171800a23bdaSmrg						   ib_info, ibs_request);
17193f012e29Smrg
172000a23bdaSmrg			/* verify if SDMA test result meets with expected */
172100a23bdaSmrg			i = 0;
172200a23bdaSmrg			while(i < (sdma_write_length / 4)) {
172300a23bdaSmrg				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
172400a23bdaSmrg			}
17253f012e29Smrg
172600a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
172700a23bdaSmrg						     sdma_write_length);
172800a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
172900a23bdaSmrg			loop++;
173000a23bdaSmrg		}
17313f012e29Smrg	}
17323f012e29Smrg	/* clean resources */
17333f012e29Smrg	free(resources);
17343f012e29Smrg	free(ibs_request);
17353f012e29Smrg	free(ib_info);
17363f012e29Smrg	free(pm4);
17373f012e29Smrg
17383f012e29Smrg	/* end of test */
17393f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
17403f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17413f012e29Smrg}
17423f012e29Smrg
17433f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void)
17443f012e29Smrg{
17453f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
17463f012e29Smrg}
17473f012e29Smrg
17483f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
17493f012e29Smrg{
17503f012e29Smrg	const int sdma_write_length = 1024;
17513f012e29Smrg	const int pm4_dw = 256;
17523f012e29Smrg	amdgpu_context_handle context_handle;
17533f012e29Smrg	amdgpu_bo_handle bo1, bo2;
17543f012e29Smrg	amdgpu_bo_handle *resources;
17553f012e29Smrg	uint32_t *pm4;
17563f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
17573f012e29Smrg	struct amdgpu_cs_request *ibs_request;
17583f012e29Smrg	uint64_t bo1_mc, bo2_mc;
17593f012e29Smrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
176000a23bdaSmrg	int i, j, r, loop1, loop2, ring_id;
17613f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
17623f012e29Smrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
176300a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
17643f012e29Smrg
17653f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
17663f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
17673f012e29Smrg
17683f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
17693f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
17703f012e29Smrg
17713f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
17723f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
17733f012e29Smrg
177400a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
177500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
177600a23bdaSmrg
17773f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
17783f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17793f012e29Smrg
17803f012e29Smrg	/* prepare resource */
17813f012e29Smrg	resources = calloc(2, sizeof(amdgpu_bo_handle));
17823f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
17833f012e29Smrg
178400a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
178500a23bdaSmrg		loop1 = loop2 = 0;
178600a23bdaSmrg		/* run 9 circle to test all mapping combination */
178700a23bdaSmrg		while(loop1 < 2) {
178800a23bdaSmrg			while(loop2 < 2) {
178900a23bdaSmrg				/* allocate UC bo1for sDMA use */
179000a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
179100a23bdaSmrg							    sdma_write_length, 4096,
179200a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
179300a23bdaSmrg							    gtt_flags[loop1], &bo1,
179400a23bdaSmrg							    (void**)&bo1_cpu, &bo1_mc,
179500a23bdaSmrg							    &bo1_va_handle);
179600a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
179700a23bdaSmrg
179800a23bdaSmrg				/* set bo1 */
179900a23bdaSmrg				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
180000a23bdaSmrg
180100a23bdaSmrg				/* allocate UC bo2 for sDMA use */
180200a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
180300a23bdaSmrg							    sdma_write_length, 4096,
180400a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
180500a23bdaSmrg							    gtt_flags[loop2], &bo2,
180600a23bdaSmrg							    (void**)&bo2_cpu, &bo2_mc,
180700a23bdaSmrg							    &bo2_va_handle);
180800a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
180900a23bdaSmrg
181000a23bdaSmrg				/* clear bo2 */
181100a23bdaSmrg				memset((void*)bo2_cpu, 0, sdma_write_length);
181200a23bdaSmrg
181300a23bdaSmrg				resources[0] = bo1;
181400a23bdaSmrg				resources[1] = bo2;
181500a23bdaSmrg
181600a23bdaSmrg				/* fulfill PM4: test DMA copy linear */
181700a23bdaSmrg				i = j = 0;
181800a23bdaSmrg				if (ip_type == AMDGPU_HW_IP_DMA) {
181900a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
182000a23bdaSmrg						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
182100a23bdaSmrg									  0, 0, 0,
182200a23bdaSmrg									  sdma_write_length);
182300a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
182400a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
182500a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
182600a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
182700a23bdaSmrg					} else {
182800a23bdaSmrg						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
182900a23bdaSmrg								       SDMA_COPY_SUB_OPCODE_LINEAR,
183000a23bdaSmrg								       0);
183100a23bdaSmrg						if (family_id >= AMDGPU_FAMILY_AI)
183200a23bdaSmrg							pm4[i++] = sdma_write_length - 1;
183300a23bdaSmrg						else
183400a23bdaSmrg							pm4[i++] = sdma_write_length;
183500a23bdaSmrg						pm4[i++] = 0;
183600a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
183700a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
183800a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
183900a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
184000a23bdaSmrg					}
184100a23bdaSmrg				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
184200a23bdaSmrg					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
184300a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
184400a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
184500a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
184600a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
184700a23bdaSmrg							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
184800a23bdaSmrg							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
184900a23bdaSmrg							   PACKET3_DMA_DATA_SI_CP_SYNC |
185000a23bdaSmrg							   (0xffff00000000 & bo1_mc) >> 32;
185100a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
185200a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1853d8807b2fSmrg						pm4[i++] = sdma_write_length;
185400a23bdaSmrg					} else {
185500a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
185600a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
185700a23bdaSmrg							   PACKET3_DMA_DATA_DST_SEL(0) |
185800a23bdaSmrg							   PACKET3_DMA_DATA_SRC_SEL(0) |
185900a23bdaSmrg							   PACKET3_DMA_DATA_CP_SYNC;
186000a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
186100a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
186200a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
186300a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
186400a23bdaSmrg						pm4[i++] = sdma_write_length;
186500a23bdaSmrg					}
1866d8807b2fSmrg				}
18673f012e29Smrg
186800a23bdaSmrg				amdgpu_test_exec_cs_helper(context_handle,
186900a23bdaSmrg							   ip_type, ring_id,
187000a23bdaSmrg							   i, pm4,
187100a23bdaSmrg							   2, resources,
187200a23bdaSmrg							   ib_info, ibs_request);
18733f012e29Smrg
187400a23bdaSmrg				/* verify if SDMA test result meets with expected */
187500a23bdaSmrg				i = 0;
187600a23bdaSmrg				while(i < sdma_write_length) {
187700a23bdaSmrg					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
187800a23bdaSmrg				}
187900a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
188000a23bdaSmrg							     sdma_write_length);
188100a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
188200a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
188300a23bdaSmrg							     sdma_write_length);
188400a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
188500a23bdaSmrg				loop2++;
18863f012e29Smrg			}
188700a23bdaSmrg			loop1++;
18883f012e29Smrg		}
18893f012e29Smrg	}
18903f012e29Smrg	/* clean resources */
18913f012e29Smrg	free(resources);
18923f012e29Smrg	free(ibs_request);
18933f012e29Smrg	free(ib_info);
18943f012e29Smrg	free(pm4);
18953f012e29Smrg
18963f012e29Smrg	/* end of test */
18973f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
18983f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18993f012e29Smrg}
19003f012e29Smrg
19013f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void)
19023f012e29Smrg{
19033f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
19043f012e29Smrg}
19053f012e29Smrg
19063f012e29Smrgstatic void amdgpu_command_submission_sdma(void)
19073f012e29Smrg{
19083f012e29Smrg	amdgpu_command_submission_sdma_write_linear();
19093f012e29Smrg	amdgpu_command_submission_sdma_const_fill();
19103f012e29Smrg	amdgpu_command_submission_sdma_copy_linear();
19113f012e29Smrg}
19123f012e29Smrg
1913d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1914d8807b2fSmrg{
1915d8807b2fSmrg	amdgpu_context_handle context_handle;
1916d8807b2fSmrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1917d8807b2fSmrg	void *ib_result_cpu, *ib_result_ce_cpu;
1918d8807b2fSmrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1919d8807b2fSmrg	struct amdgpu_cs_request ibs_request[2] = {0};
1920d8807b2fSmrg	struct amdgpu_cs_ib_info ib_info[2];
1921d8807b2fSmrg	struct amdgpu_cs_fence fence_status[2] = {0};
1922d8807b2fSmrg	uint32_t *ptr;
1923d8807b2fSmrg	uint32_t expired;
1924d8807b2fSmrg	amdgpu_bo_list_handle bo_list;
1925d8807b2fSmrg	amdgpu_va_handle va_handle, va_handle_ce;
1926d8807b2fSmrg	int r;
1927d8807b2fSmrg	int i = 0, ib_cs_num = 2;
1928d8807b2fSmrg
1929d8807b2fSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1930d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1931d8807b2fSmrg
1932d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1933d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
1934d8807b2fSmrg				    &ib_result_handle, &ib_result_cpu,
1935d8807b2fSmrg				    &ib_result_mc_address, &va_handle);
1936d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1937d8807b2fSmrg
1938d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1939d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
1940d8807b2fSmrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
1941d8807b2fSmrg				    &ib_result_ce_mc_address, &va_handle_ce);
1942d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1943d8807b2fSmrg
1944d8807b2fSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1945d8807b2fSmrg			       ib_result_ce_handle, &bo_list);
1946d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1947d8807b2fSmrg
1948d8807b2fSmrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1949d8807b2fSmrg
1950d8807b2fSmrg	/* IT_SET_CE_DE_COUNTERS */
1951d8807b2fSmrg	ptr = ib_result_ce_cpu;
1952d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
1953d8807b2fSmrg		ptr[i++] = 0xc0008900;
1954d8807b2fSmrg		ptr[i++] = 0;
1955d8807b2fSmrg	}
1956d8807b2fSmrg	ptr[i++] = 0xc0008400;
1957d8807b2fSmrg	ptr[i++] = 1;
1958d8807b2fSmrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1959d8807b2fSmrg	ib_info[0].size = i;
1960d8807b2fSmrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1961d8807b2fSmrg
1962d8807b2fSmrg	/* IT_WAIT_ON_CE_COUNTER */
1963d8807b2fSmrg	ptr = ib_result_cpu;
1964d8807b2fSmrg	ptr[0] = 0xc0008600;
1965d8807b2fSmrg	ptr[1] = 0x00000001;
1966d8807b2fSmrg	ib_info[1].ib_mc_address = ib_result_mc_address;
1967d8807b2fSmrg	ib_info[1].size = 2;
1968d8807b2fSmrg
1969d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
1970d8807b2fSmrg		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1971d8807b2fSmrg		ibs_request[i].number_of_ibs = 2;
1972d8807b2fSmrg		ibs_request[i].ibs = ib_info;
1973d8807b2fSmrg		ibs_request[i].resources = bo_list;
1974d8807b2fSmrg		ibs_request[i].fence_info.handle = NULL;
1975d8807b2fSmrg	}
1976d8807b2fSmrg
1977d8807b2fSmrg	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1978d8807b2fSmrg
1979d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1980d8807b2fSmrg
1981d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
1982d8807b2fSmrg		fence_status[i].context = context_handle;
1983d8807b2fSmrg		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1984d8807b2fSmrg		fence_status[i].fence = ibs_request[i].seq_no;
1985d8807b2fSmrg	}
1986d8807b2fSmrg
1987d8807b2fSmrg	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1988d8807b2fSmrg				AMDGPU_TIMEOUT_INFINITE,
1989d8807b2fSmrg				&expired, NULL);
1990d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1991d8807b2fSmrg
1992d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1993d8807b2fSmrg				     ib_result_mc_address, 4096);
1994d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1995d8807b2fSmrg
1996d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1997d8807b2fSmrg				     ib_result_ce_mc_address, 4096);
1998d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1999d8807b2fSmrg
2000d8807b2fSmrg	r = amdgpu_bo_list_destroy(bo_list);
2001d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2002d8807b2fSmrg
2003d8807b2fSmrg	r = amdgpu_cs_ctx_free(context_handle);
2004d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2005d8807b2fSmrg}
2006d8807b2fSmrg
2007d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void)
2008d8807b2fSmrg{
2009d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(true);
2010d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(false);
2011d8807b2fSmrg}
2012d8807b2fSmrg
20133f012e29Smrgstatic void amdgpu_userptr_test(void)
20143f012e29Smrg{
20153f012e29Smrg	int i, r, j;
20163f012e29Smrg	uint32_t *pm4 = NULL;
20173f012e29Smrg	uint64_t bo_mc;
20183f012e29Smrg	void *ptr = NULL;
20193f012e29Smrg	int pm4_dw = 256;
20203f012e29Smrg	int sdma_write_length = 4;
20213f012e29Smrg	amdgpu_bo_handle handle;
20223f012e29Smrg	amdgpu_context_handle context_handle;
20233f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
20243f012e29Smrg	struct amdgpu_cs_request *ibs_request;
20253f012e29Smrg	amdgpu_bo_handle buf_handle;
20263f012e29Smrg	amdgpu_va_handle va_handle;
20273f012e29Smrg
20283f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
20293f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
20303f012e29Smrg
20313f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
20323f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
20333f012e29Smrg
20343f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
20353f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
20363f012e29Smrg
20373f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
20383f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
20393f012e29Smrg
20403f012e29Smrg	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
20413f012e29Smrg	CU_ASSERT_NOT_EQUAL(ptr, NULL);
20423f012e29Smrg	memset(ptr, 0, BUFFER_SIZE);
20433f012e29Smrg
20443f012e29Smrg	r = amdgpu_create_bo_from_user_mem(device_handle,
20453f012e29Smrg					   ptr, BUFFER_SIZE, &buf_handle);
20463f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
20473f012e29Smrg
20483f012e29Smrg	r = amdgpu_va_range_alloc(device_handle,
20493f012e29Smrg				  amdgpu_gpu_va_range_general,
20503f012e29Smrg				  BUFFER_SIZE, 1, 0, &bo_mc,
20513f012e29Smrg				  &va_handle, 0);
20523f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
20533f012e29Smrg
20543f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
20553f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
20563f012e29Smrg
20573f012e29Smrg	handle = buf_handle;
20583f012e29Smrg
20593f012e29Smrg	j = i = 0;
2060d8807b2fSmrg
2061d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI)
2062d8807b2fSmrg		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2063d8807b2fSmrg				sdma_write_length);
2064d8807b2fSmrg	else
2065d8807b2fSmrg		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2066d8807b2fSmrg				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
20673f012e29Smrg	pm4[i++] = 0xffffffff & bo_mc;
20683f012e29Smrg	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2069d8807b2fSmrg	if (family_id >= AMDGPU_FAMILY_AI)
2070d8807b2fSmrg		pm4[i++] = sdma_write_length - 1;
2071d8807b2fSmrg	else if (family_id != AMDGPU_FAMILY_SI)
2072d8807b2fSmrg		pm4[i++] = sdma_write_length;
20733f012e29Smrg
20743f012e29Smrg	while (j++ < sdma_write_length)
20753f012e29Smrg		pm4[i++] = 0xdeadbeaf;
20763f012e29Smrg
207700a23bdaSmrg	if (!fork()) {
207800a23bdaSmrg		pm4[0] = 0x0;
207900a23bdaSmrg		exit(0);
208000a23bdaSmrg	}
208100a23bdaSmrg
20823f012e29Smrg	amdgpu_test_exec_cs_helper(context_handle,
20833f012e29Smrg				   AMDGPU_HW_IP_DMA, 0,
20843f012e29Smrg				   i, pm4,
20853f012e29Smrg				   1, &handle,
20863f012e29Smrg				   ib_info, ibs_request);
20873f012e29Smrg	i = 0;
20883f012e29Smrg	while (i < sdma_write_length) {
20893f012e29Smrg		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
20903f012e29Smrg	}
20913f012e29Smrg	free(ibs_request);
20923f012e29Smrg	free(ib_info);
20933f012e29Smrg	free(pm4);
20943f012e29Smrg
20953f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
20963f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
20973f012e29Smrg	r = amdgpu_va_range_free(va_handle);
20983f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
20993f012e29Smrg	r = amdgpu_bo_free(buf_handle);
21003f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
21013f012e29Smrg	free(ptr);
21023f012e29Smrg
21033f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
21043f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
210500a23bdaSmrg
210600a23bdaSmrg	wait(NULL);
210700a23bdaSmrg}
210800a23bdaSmrg
210900a23bdaSmrgstatic void amdgpu_sync_dependency_test(void)
211000a23bdaSmrg{
211100a23bdaSmrg	amdgpu_context_handle context_handle[2];
211200a23bdaSmrg	amdgpu_bo_handle ib_result_handle;
211300a23bdaSmrg	void *ib_result_cpu;
211400a23bdaSmrg	uint64_t ib_result_mc_address;
211500a23bdaSmrg	struct amdgpu_cs_request ibs_request;
211600a23bdaSmrg	struct amdgpu_cs_ib_info ib_info;
211700a23bdaSmrg	struct amdgpu_cs_fence fence_status;
211800a23bdaSmrg	uint32_t expired;
211900a23bdaSmrg	int i, j, r;
212000a23bdaSmrg	amdgpu_bo_list_handle bo_list;
212100a23bdaSmrg	amdgpu_va_handle va_handle;
212200a23bdaSmrg	static uint32_t *ptr;
212300a23bdaSmrg	uint64_t seq_no;
212400a23bdaSmrg
212500a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
212600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
212700a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
212800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
212900a23bdaSmrg
213000a23bdaSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
213100a23bdaSmrg			AMDGPU_GEM_DOMAIN_GTT, 0,
213200a23bdaSmrg						    &ib_result_handle, &ib_result_cpu,
213300a23bdaSmrg						    &ib_result_mc_address, &va_handle);
213400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
213500a23bdaSmrg
213600a23bdaSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
213700a23bdaSmrg			       &bo_list);
213800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
213900a23bdaSmrg
214000a23bdaSmrg	ptr = ib_result_cpu;
214100a23bdaSmrg	i = 0;
214200a23bdaSmrg
214300a23bdaSmrg	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
214400a23bdaSmrg
214500a23bdaSmrg	/* Dispatch minimal init config and verify it's executed */
214600a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
214700a23bdaSmrg	ptr[i++] = 0x80000000;
214800a23bdaSmrg	ptr[i++] = 0x80000000;
214900a23bdaSmrg
215000a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
215100a23bdaSmrg	ptr[i++] = 0x80000000;
215200a23bdaSmrg
215300a23bdaSmrg
215400a23bdaSmrg	/* Program compute regs */
215500a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
215600a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
215700a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
215800a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
215900a23bdaSmrg
216000a23bdaSmrg
216100a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
216200a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
216300a23bdaSmrg	/*
216400a23bdaSmrg	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
216500a23bdaSmrg	                                      SGPRS = 1
216600a23bdaSmrg	                                      PRIORITY = 0
216700a23bdaSmrg	                                      FLOAT_MODE = 192 (0xc0)
216800a23bdaSmrg	                                      PRIV = 0
216900a23bdaSmrg	                                      DX10_CLAMP = 1
217000a23bdaSmrg	                                      DEBUG_MODE = 0
217100a23bdaSmrg	                                      IEEE_MODE = 0
217200a23bdaSmrg	                                      BULKY = 0
217300a23bdaSmrg	                                      CDBG_USER = 0
217400a23bdaSmrg	 *
217500a23bdaSmrg	 */
217600a23bdaSmrg	ptr[i++] = 0x002c0040;
217700a23bdaSmrg
217800a23bdaSmrg
217900a23bdaSmrg	/*
218000a23bdaSmrg	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
218100a23bdaSmrg	                                      USER_SGPR = 8
218200a23bdaSmrg	                                      TRAP_PRESENT = 0
218300a23bdaSmrg	                                      TGID_X_EN = 0
218400a23bdaSmrg	                                      TGID_Y_EN = 0
218500a23bdaSmrg	                                      TGID_Z_EN = 0
218600a23bdaSmrg	                                      TG_SIZE_EN = 0
218700a23bdaSmrg	                                      TIDIG_COMP_CNT = 0
218800a23bdaSmrg	                                      EXCP_EN_MSB = 0
218900a23bdaSmrg	                                      LDS_SIZE = 0
219000a23bdaSmrg	                                      EXCP_EN = 0
219100a23bdaSmrg	 *
219200a23bdaSmrg	 */
219300a23bdaSmrg	ptr[i++] = 0x00000010;
219400a23bdaSmrg
219500a23bdaSmrg
219600a23bdaSmrg/*
219700a23bdaSmrg * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
219800a23bdaSmrg                                         WAVESIZE = 0
219900a23bdaSmrg *
220000a23bdaSmrg */
220100a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
220200a23bdaSmrg	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
220300a23bdaSmrg	ptr[i++] = 0x00000100;
220400a23bdaSmrg
220500a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
220600a23bdaSmrg	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
220700a23bdaSmrg	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
220800a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
220900a23bdaSmrg
221000a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
221100a23bdaSmrg	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
221200a23bdaSmrg	ptr[i++] = 0;
221300a23bdaSmrg
221400a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
221500a23bdaSmrg	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
221600a23bdaSmrg	ptr[i++] = 1;
221700a23bdaSmrg	ptr[i++] = 1;
221800a23bdaSmrg	ptr[i++] = 1;
221900a23bdaSmrg
222000a23bdaSmrg
222100a23bdaSmrg	/* Dispatch */
222200a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
222300a23bdaSmrg	ptr[i++] = 1;
222400a23bdaSmrg	ptr[i++] = 1;
222500a23bdaSmrg	ptr[i++] = 1;
222600a23bdaSmrg	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
222700a23bdaSmrg
222800a23bdaSmrg
222900a23bdaSmrg	while (i & 7)
223000a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
223100a23bdaSmrg
223200a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
223300a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address;
223400a23bdaSmrg	ib_info.size = i;
223500a23bdaSmrg
223600a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
223700a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
223800a23bdaSmrg	ibs_request.ring = 0;
223900a23bdaSmrg	ibs_request.number_of_ibs = 1;
224000a23bdaSmrg	ibs_request.ibs = &ib_info;
224100a23bdaSmrg	ibs_request.resources = bo_list;
224200a23bdaSmrg	ibs_request.fence_info.handle = NULL;
224300a23bdaSmrg
224400a23bdaSmrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
224500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
224600a23bdaSmrg	seq_no = ibs_request.seq_no;
224700a23bdaSmrg
224800a23bdaSmrg
224900a23bdaSmrg
225000a23bdaSmrg	/* Prepare second command with dependency on the first */
225100a23bdaSmrg	j = i;
225200a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
225300a23bdaSmrg	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
225400a23bdaSmrg	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
225500a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
225600a23bdaSmrg	ptr[i++] = 99;
225700a23bdaSmrg
225800a23bdaSmrg	while (i & 7)
225900a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
226000a23bdaSmrg
226100a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
226200a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
226300a23bdaSmrg	ib_info.size = i - j;
226400a23bdaSmrg
226500a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
226600a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
226700a23bdaSmrg	ibs_request.ring = 0;
226800a23bdaSmrg	ibs_request.number_of_ibs = 1;
226900a23bdaSmrg	ibs_request.ibs = &ib_info;
227000a23bdaSmrg	ibs_request.resources = bo_list;
227100a23bdaSmrg	ibs_request.fence_info.handle = NULL;
227200a23bdaSmrg
227300a23bdaSmrg	ibs_request.number_of_dependencies = 1;
227400a23bdaSmrg
227500a23bdaSmrg	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
227600a23bdaSmrg	ibs_request.dependencies[0].context = context_handle[1];
227700a23bdaSmrg	ibs_request.dependencies[0].ip_instance = 0;
227800a23bdaSmrg	ibs_request.dependencies[0].ring = 0;
227900a23bdaSmrg	ibs_request.dependencies[0].fence = seq_no;
228000a23bdaSmrg
228100a23bdaSmrg
228200a23bdaSmrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
228300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
228400a23bdaSmrg
228500a23bdaSmrg
228600a23bdaSmrg	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
228700a23bdaSmrg	fence_status.context = context_handle[0];
228800a23bdaSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
228900a23bdaSmrg	fence_status.ip_instance = 0;
229000a23bdaSmrg	fence_status.ring = 0;
229100a23bdaSmrg	fence_status.fence = ibs_request.seq_no;
229200a23bdaSmrg
229300a23bdaSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
229400a23bdaSmrg		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
229500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
229600a23bdaSmrg
229700a23bdaSmrg	/* Expect the second command to wait for shader to complete */
229800a23bdaSmrg	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
229900a23bdaSmrg
230000a23bdaSmrg	r = amdgpu_bo_list_destroy(bo_list);
230100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
230200a23bdaSmrg
230300a23bdaSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
230400a23bdaSmrg				     ib_result_mc_address, 4096);
230500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
230600a23bdaSmrg
230700a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[0]);
230800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
230900a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[1]);
231000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
231100a23bdaSmrg
231200a23bdaSmrg	free(ibs_request.dependencies);
23133f012e29Smrg}
23145324fb0dSmrg
23159bd392adSmrgstatic int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
23169bd392adSmrg{
23179bd392adSmrg	struct amdgpu_test_shader *shader;
23189bd392adSmrg	int i, loop = 0x10000;
23199bd392adSmrg
23209bd392adSmrg	switch (family) {
23219bd392adSmrg		case AMDGPU_FAMILY_AI:
23229bd392adSmrg			shader = &memcpy_cs_hang_slow_ai;
23239bd392adSmrg			break;
23249bd392adSmrg		case AMDGPU_FAMILY_RV:
23259bd392adSmrg			shader = &memcpy_cs_hang_slow_rv;
23269bd392adSmrg			break;
23279bd392adSmrg		default:
23289bd392adSmrg			return -1;
23299bd392adSmrg			break;
23309bd392adSmrg	}
23319bd392adSmrg
23329bd392adSmrg	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
23339bd392adSmrg
23349bd392adSmrg	for (i = 0; i < loop; i++)
23359bd392adSmrg		memcpy(ptr + shader->header_length + shader->body_length * i,
23369bd392adSmrg			shader->shader + shader->header_length,
23379bd392adSmrg			shader->body_length * sizeof(uint32_t));
23389bd392adSmrg
23399bd392adSmrg	memcpy(ptr + shader->header_length + shader->body_length * loop,
23409bd392adSmrg		shader->shader + shader->header_length + shader->body_length,
23419bd392adSmrg		shader->foot_length * sizeof(uint32_t));
23429bd392adSmrg
23439bd392adSmrg	return 0;
23449bd392adSmrg}
23459bd392adSmrg
23465324fb0dSmrgstatic int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
23475324fb0dSmrg					   int cs_type)
23485324fb0dSmrg{
23495324fb0dSmrg	uint32_t shader_size;
23505324fb0dSmrg	const uint32_t *shader;
23515324fb0dSmrg
23525324fb0dSmrg	switch (cs_type) {
23535324fb0dSmrg		case CS_BUFFERCLEAR:
23545324fb0dSmrg			shader = bufferclear_cs_shader_gfx9;
23555324fb0dSmrg			shader_size = sizeof(bufferclear_cs_shader_gfx9);
23565324fb0dSmrg			break;
23575324fb0dSmrg		case CS_BUFFERCOPY:
23585324fb0dSmrg			shader = buffercopy_cs_shader_gfx9;
23595324fb0dSmrg			shader_size = sizeof(buffercopy_cs_shader_gfx9);
23605324fb0dSmrg			break;
23619bd392adSmrg		case CS_HANG:
23629bd392adSmrg			shader = memcpy_ps_hang;
23639bd392adSmrg			shader_size = sizeof(memcpy_ps_hang);
23649bd392adSmrg			break;
23655324fb0dSmrg		default:
23665324fb0dSmrg			return -1;
23675324fb0dSmrg			break;
23685324fb0dSmrg	}
23695324fb0dSmrg
23705324fb0dSmrg	memcpy(ptr, shader, shader_size);
23715324fb0dSmrg	return 0;
23725324fb0dSmrg}
23735324fb0dSmrg
23745324fb0dSmrgstatic int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
23755324fb0dSmrg{
23765324fb0dSmrg	int i = 0;
23775324fb0dSmrg
23785324fb0dSmrg	/* Write context control and load shadowing register if necessary */
23795324fb0dSmrg	if (ip_type == AMDGPU_HW_IP_GFX) {
23805324fb0dSmrg		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
23815324fb0dSmrg		ptr[i++] = 0x80000000;
23825324fb0dSmrg		ptr[i++] = 0x80000000;
23835324fb0dSmrg	}
23845324fb0dSmrg
23855324fb0dSmrg	/* Issue commands to set default compute state. */
23865324fb0dSmrg	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
23875324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
23885324fb0dSmrg	ptr[i++] = 0x204;
23895324fb0dSmrg	i += 3;
239088f8a8d2Smrg
23915324fb0dSmrg	/* clear mmCOMPUTE_TMPRING_SIZE */
23925324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
23935324fb0dSmrg	ptr[i++] = 0x218;
23945324fb0dSmrg	ptr[i++] = 0;
23955324fb0dSmrg
23965324fb0dSmrg	return i;
23975324fb0dSmrg}
23985324fb0dSmrg
23995324fb0dSmrgstatic int amdgpu_dispatch_write_cumask(uint32_t *ptr)
24005324fb0dSmrg{
24015324fb0dSmrg	int i = 0;
24025324fb0dSmrg
24035324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
24045324fb0dSmrg	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
24055324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
24065324fb0dSmrg	ptr[i++] = 0x216;
24075324fb0dSmrg	ptr[i++] = 0xffffffff;
24085324fb0dSmrg	ptr[i++] = 0xffffffff;
24095324fb0dSmrg	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
24105324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
24115324fb0dSmrg	ptr[i++] = 0x219;
24125324fb0dSmrg	ptr[i++] = 0xffffffff;
24135324fb0dSmrg	ptr[i++] = 0xffffffff;
24145324fb0dSmrg
24155324fb0dSmrg	return i;
24165324fb0dSmrg}
24175324fb0dSmrg
24185324fb0dSmrgstatic int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
24195324fb0dSmrg{
24205324fb0dSmrg	int i, j;
24215324fb0dSmrg
24225324fb0dSmrg	i = 0;
24235324fb0dSmrg
24245324fb0dSmrg	/* Writes shader state to HW */
24255324fb0dSmrg	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
24265324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
24275324fb0dSmrg	ptr[i++] = 0x20c;
24285324fb0dSmrg	ptr[i++] = (shader_addr >> 8);
24295324fb0dSmrg	ptr[i++] = (shader_addr >> 40);
24305324fb0dSmrg	/* write sh regs*/
24315324fb0dSmrg	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
24325324fb0dSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
24335324fb0dSmrg		/* - Gfx9ShRegBase */
24345324fb0dSmrg		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
24355324fb0dSmrg		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
24365324fb0dSmrg	}
24375324fb0dSmrg
24385324fb0dSmrg	return i;
24395324fb0dSmrg}
24405324fb0dSmrg
24415324fb0dSmrgstatic void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
24425324fb0dSmrg					 uint32_t ip_type,
24435324fb0dSmrg					 uint32_t ring)
24445324fb0dSmrg{
24455324fb0dSmrg	amdgpu_context_handle context_handle;
24465324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
24475324fb0dSmrg	volatile unsigned char *ptr_dst;
24485324fb0dSmrg	void *ptr_shader;
24495324fb0dSmrg	uint32_t *ptr_cmd;
24505324fb0dSmrg	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
24515324fb0dSmrg	amdgpu_va_handle va_dst, va_shader, va_cmd;
24525324fb0dSmrg	int i, r;
24535324fb0dSmrg	int bo_dst_size = 16384;
24545324fb0dSmrg	int bo_shader_size = 4096;
24555324fb0dSmrg	int bo_cmd_size = 4096;
24565324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
24575324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
24585324fb0dSmrg	amdgpu_bo_list_handle bo_list;
24595324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
24605324fb0dSmrg	uint32_t expired;
24615324fb0dSmrg
24625324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
24635324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24645324fb0dSmrg
24655324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
24665324fb0dSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
24675324fb0dSmrg					&bo_cmd, (void **)&ptr_cmd,
24685324fb0dSmrg					&mc_address_cmd, &va_cmd);
24695324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24705324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
24715324fb0dSmrg
24725324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
24735324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
24745324fb0dSmrg					&bo_shader, &ptr_shader,
24755324fb0dSmrg					&mc_address_shader, &va_shader);
24765324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
247788f8a8d2Smrg	memset(ptr_shader, 0, bo_shader_size);
24785324fb0dSmrg
24795324fb0dSmrg	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
24805324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24815324fb0dSmrg
24825324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
24835324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
24845324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
24855324fb0dSmrg					&mc_address_dst, &va_dst);
24865324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24875324fb0dSmrg
24885324fb0dSmrg	i = 0;
24895324fb0dSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
24905324fb0dSmrg
24915324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
24925324fb0dSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
24935324fb0dSmrg
24945324fb0dSmrg	/* Writes shader state to HW */
24955324fb0dSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
24965324fb0dSmrg
24975324fb0dSmrg	/* Write constant data */
24985324fb0dSmrg	/* Writes the UAV constant data to the SGPRs. */
24995324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
25005324fb0dSmrg	ptr_cmd[i++] = 0x240;
25015324fb0dSmrg	ptr_cmd[i++] = mc_address_dst;
25025324fb0dSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
25035324fb0dSmrg	ptr_cmd[i++] = 0x400;
25045324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
25055324fb0dSmrg
25065324fb0dSmrg	/* Sets a range of pixel shader constants */
25075324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
25085324fb0dSmrg	ptr_cmd[i++] = 0x244;
25095324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
25105324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
25115324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
25125324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
25135324fb0dSmrg
251488f8a8d2Smrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
251588f8a8d2Smrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
251688f8a8d2Smrg	ptr_cmd[i++] = 0x215;
251788f8a8d2Smrg	ptr_cmd[i++] = 0;
251888f8a8d2Smrg
25195324fb0dSmrg	/* dispatch direct command */
25205324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
25215324fb0dSmrg	ptr_cmd[i++] = 0x10;
25225324fb0dSmrg	ptr_cmd[i++] = 1;
25235324fb0dSmrg	ptr_cmd[i++] = 1;
25245324fb0dSmrg	ptr_cmd[i++] = 1;
25255324fb0dSmrg
25265324fb0dSmrg	while (i & 7)
25275324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
25285324fb0dSmrg
25295324fb0dSmrg	resources[0] = bo_dst;
25305324fb0dSmrg	resources[1] = bo_shader;
25315324fb0dSmrg	resources[2] = bo_cmd;
25325324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
25335324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25345324fb0dSmrg
25355324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
25365324fb0dSmrg	ib_info.size = i;
25375324fb0dSmrg	ibs_request.ip_type = ip_type;
25385324fb0dSmrg	ibs_request.ring = ring;
25395324fb0dSmrg	ibs_request.resources = bo_list;
25405324fb0dSmrg	ibs_request.number_of_ibs = 1;
25415324fb0dSmrg	ibs_request.ibs = &ib_info;
25425324fb0dSmrg	ibs_request.fence_info.handle = NULL;
25435324fb0dSmrg
25445324fb0dSmrg	/* submit CS */
25455324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
25465324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25475324fb0dSmrg
25485324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
25495324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25505324fb0dSmrg
25515324fb0dSmrg	fence_status.ip_type = ip_type;
25525324fb0dSmrg	fence_status.ip_instance = 0;
25535324fb0dSmrg	fence_status.ring = ring;
25545324fb0dSmrg	fence_status.context = context_handle;
25555324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
25565324fb0dSmrg
25575324fb0dSmrg	/* wait for IB accomplished */
25585324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
25595324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
25605324fb0dSmrg					 0, &expired);
25615324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25625324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
25635324fb0dSmrg
25645324fb0dSmrg	/* verify if memset test result meets with expected */
25655324fb0dSmrg	i = 0;
25665324fb0dSmrg	while(i < bo_dst_size) {
25675324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
25685324fb0dSmrg	}
25695324fb0dSmrg
25705324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
25715324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25725324fb0dSmrg
25735324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
25745324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25755324fb0dSmrg
25765324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
25775324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25785324fb0dSmrg
25795324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
25805324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25815324fb0dSmrg}
25825324fb0dSmrg
25835324fb0dSmrgstatic void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
25845324fb0dSmrg					uint32_t ip_type,
25859bd392adSmrg					uint32_t ring,
25869bd392adSmrg					int hang)
25875324fb0dSmrg{
25885324fb0dSmrg	amdgpu_context_handle context_handle;
25895324fb0dSmrg	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
25905324fb0dSmrg	volatile unsigned char *ptr_dst;
25915324fb0dSmrg	void *ptr_shader;
25925324fb0dSmrg	unsigned char *ptr_src;
25935324fb0dSmrg	uint32_t *ptr_cmd;
25945324fb0dSmrg	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
25955324fb0dSmrg	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
25965324fb0dSmrg	int i, r;
25975324fb0dSmrg	int bo_dst_size = 16384;
25985324fb0dSmrg	int bo_shader_size = 4096;
25995324fb0dSmrg	int bo_cmd_size = 4096;
26005324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
26015324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
26029bd392adSmrg	uint32_t expired, hang_state, hangs;
26039bd392adSmrg	enum cs_type cs_type;
26045324fb0dSmrg	amdgpu_bo_list_handle bo_list;
26055324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
26065324fb0dSmrg
26075324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
26085324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26095324fb0dSmrg
26105324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
26115324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
26125324fb0dSmrg				    &bo_cmd, (void **)&ptr_cmd,
26135324fb0dSmrg				    &mc_address_cmd, &va_cmd);
26145324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26155324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
26165324fb0dSmrg
26175324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
26185324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
26195324fb0dSmrg					&bo_shader, &ptr_shader,
26205324fb0dSmrg					&mc_address_shader, &va_shader);
26215324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
262288f8a8d2Smrg	memset(ptr_shader, 0, bo_shader_size);
26235324fb0dSmrg
26249bd392adSmrg	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
26259bd392adSmrg	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
26265324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26275324fb0dSmrg
26285324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
26295324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
26305324fb0dSmrg					&bo_src, (void **)&ptr_src,
26315324fb0dSmrg					&mc_address_src, &va_src);
26325324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26335324fb0dSmrg
26345324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
26355324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
26365324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
26375324fb0dSmrg					&mc_address_dst, &va_dst);
26385324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26395324fb0dSmrg
26405324fb0dSmrg	memset(ptr_src, 0x55, bo_dst_size);
26415324fb0dSmrg
26425324fb0dSmrg	i = 0;
26435324fb0dSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
26445324fb0dSmrg
26455324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
26465324fb0dSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
26475324fb0dSmrg
26485324fb0dSmrg	/* Writes shader state to HW */
26495324fb0dSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
26505324fb0dSmrg
26515324fb0dSmrg	/* Write constant data */
26525324fb0dSmrg	/* Writes the texture resource constants data to the SGPRs */
26535324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
26545324fb0dSmrg	ptr_cmd[i++] = 0x240;
26555324fb0dSmrg	ptr_cmd[i++] = mc_address_src;
26565324fb0dSmrg	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
26575324fb0dSmrg	ptr_cmd[i++] = 0x400;
26585324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
26595324fb0dSmrg
26605324fb0dSmrg	/* Writes the UAV constant data to the SGPRs. */
26615324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
26625324fb0dSmrg	ptr_cmd[i++] = 0x244;
26635324fb0dSmrg	ptr_cmd[i++] = mc_address_dst;
26645324fb0dSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
26655324fb0dSmrg	ptr_cmd[i++] = 0x400;
26665324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
26675324fb0dSmrg
266888f8a8d2Smrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
266988f8a8d2Smrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
267088f8a8d2Smrg	ptr_cmd[i++] = 0x215;
267188f8a8d2Smrg	ptr_cmd[i++] = 0;
267288f8a8d2Smrg
26735324fb0dSmrg	/* dispatch direct command */
26745324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
26755324fb0dSmrg	ptr_cmd[i++] = 0x10;
26765324fb0dSmrg	ptr_cmd[i++] = 1;
26775324fb0dSmrg	ptr_cmd[i++] = 1;
26785324fb0dSmrg	ptr_cmd[i++] = 1;
26795324fb0dSmrg
26805324fb0dSmrg	while (i & 7)
26815324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
26825324fb0dSmrg
26835324fb0dSmrg	resources[0] = bo_shader;
26845324fb0dSmrg	resources[1] = bo_src;
26855324fb0dSmrg	resources[2] = bo_dst;
26865324fb0dSmrg	resources[3] = bo_cmd;
26875324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
26885324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26895324fb0dSmrg
26905324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
26915324fb0dSmrg	ib_info.size = i;
26925324fb0dSmrg	ibs_request.ip_type = ip_type;
26935324fb0dSmrg	ibs_request.ring = ring;
26945324fb0dSmrg	ibs_request.resources = bo_list;
26955324fb0dSmrg	ibs_request.number_of_ibs = 1;
26965324fb0dSmrg	ibs_request.ibs = &ib_info;
26975324fb0dSmrg	ibs_request.fence_info.handle = NULL;
26985324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
26995324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27005324fb0dSmrg
27015324fb0dSmrg	fence_status.ip_type = ip_type;
27025324fb0dSmrg	fence_status.ip_instance = 0;
27035324fb0dSmrg	fence_status.ring = ring;
27045324fb0dSmrg	fence_status.context = context_handle;
27055324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
27065324fb0dSmrg
27075324fb0dSmrg	/* wait for IB accomplished */
27085324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
27095324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
27105324fb0dSmrg					 0, &expired);
27115324fb0dSmrg
27129bd392adSmrg	if (!hang) {
27139bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
27149bd392adSmrg		CU_ASSERT_EQUAL(expired, true);
27159bd392adSmrg
27169bd392adSmrg		/* verify if memcpy test result meets with expected */
27179bd392adSmrg		i = 0;
27189bd392adSmrg		while(i < bo_dst_size) {
27199bd392adSmrg			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
27209bd392adSmrg			i++;
27219bd392adSmrg		}
27229bd392adSmrg	} else {
27239bd392adSmrg		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
27249bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
27259bd392adSmrg		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
27265324fb0dSmrg	}
27275324fb0dSmrg
27285324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
27295324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27305324fb0dSmrg
27315324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
27325324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27335324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
27345324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27355324fb0dSmrg
27365324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
27375324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27385324fb0dSmrg
27395324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
27405324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27415324fb0dSmrg
27425324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
27435324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27445324fb0dSmrg}
274588f8a8d2Smrg
274688f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void)
27475324fb0dSmrg{
27485324fb0dSmrg	int r;
27495324fb0dSmrg	struct drm_amdgpu_info_hw_ip info;
27505324fb0dSmrg	uint32_t ring_id;
27515324fb0dSmrg
27525324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
27535324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
275488f8a8d2Smrg	if (!info.available_rings)
275588f8a8d2Smrg		printf("SKIP ... as there's no compute ring\n");
27565324fb0dSmrg
27575324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
27585324fb0dSmrg		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
27599bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
27605324fb0dSmrg	}
276188f8a8d2Smrg}
276288f8a8d2Smrg
276388f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void)
276488f8a8d2Smrg{
276588f8a8d2Smrg	int r;
276688f8a8d2Smrg	struct drm_amdgpu_info_hw_ip info;
276788f8a8d2Smrg	uint32_t ring_id;
27685324fb0dSmrg
27695324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
27705324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
277188f8a8d2Smrg	if (!info.available_rings)
277288f8a8d2Smrg		printf("SKIP ... as there's no graphics ring\n");
27735324fb0dSmrg
27745324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
27755324fb0dSmrg		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
27769bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
27779bd392adSmrg	}
27789bd392adSmrg}
27799bd392adSmrg
27809bd392adSmrgvoid amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
27819bd392adSmrg{
27829bd392adSmrg	int r;
27839bd392adSmrg	struct drm_amdgpu_info_hw_ip info;
27849bd392adSmrg	uint32_t ring_id;
27859bd392adSmrg
27869bd392adSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
27879bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
27889bd392adSmrg	if (!info.available_rings)
27899bd392adSmrg		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
27909bd392adSmrg
27919bd392adSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
27929bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
27939bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
27949bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
27959bd392adSmrg	}
27969bd392adSmrg}
27979bd392adSmrg
27989bd392adSmrgstatic void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
27999bd392adSmrg						  uint32_t ip_type, uint32_t ring)
28009bd392adSmrg{
28019bd392adSmrg	amdgpu_context_handle context_handle;
28029bd392adSmrg	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
28039bd392adSmrg	volatile unsigned char *ptr_dst;
28049bd392adSmrg	void *ptr_shader;
28059bd392adSmrg	unsigned char *ptr_src;
28069bd392adSmrg	uint32_t *ptr_cmd;
28079bd392adSmrg	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
28089bd392adSmrg	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
28099bd392adSmrg	int i, r;
28109bd392adSmrg	int bo_dst_size = 0x4000000;
28119bd392adSmrg	int bo_shader_size = 0x400000;
28129bd392adSmrg	int bo_cmd_size = 4096;
28139bd392adSmrg	struct amdgpu_cs_request ibs_request = {0};
28149bd392adSmrg	struct amdgpu_cs_ib_info ib_info= {0};
28159bd392adSmrg	uint32_t hang_state, hangs, expired;
28169bd392adSmrg	struct amdgpu_gpu_info gpu_info = {0};
28179bd392adSmrg	amdgpu_bo_list_handle bo_list;
28189bd392adSmrg	struct amdgpu_cs_fence fence_status = {0};
28199bd392adSmrg
28209bd392adSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
28219bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28229bd392adSmrg
28239bd392adSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
28249bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28259bd392adSmrg
28269bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
28279bd392adSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
28289bd392adSmrg				    &bo_cmd, (void **)&ptr_cmd,
28299bd392adSmrg				    &mc_address_cmd, &va_cmd);
28309bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28319bd392adSmrg	memset(ptr_cmd, 0, bo_cmd_size);
28329bd392adSmrg
28339bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
28349bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
28359bd392adSmrg					&bo_shader, &ptr_shader,
28369bd392adSmrg					&mc_address_shader, &va_shader);
28379bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28389bd392adSmrg	memset(ptr_shader, 0, bo_shader_size);
28399bd392adSmrg
28409bd392adSmrg	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
28419bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28429bd392adSmrg
28439bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
28449bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
28459bd392adSmrg					&bo_src, (void **)&ptr_src,
28469bd392adSmrg					&mc_address_src, &va_src);
28479bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28489bd392adSmrg
28499bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
28509bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
28519bd392adSmrg					&bo_dst, (void **)&ptr_dst,
28529bd392adSmrg					&mc_address_dst, &va_dst);
28539bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28549bd392adSmrg
28559bd392adSmrg	memset(ptr_src, 0x55, bo_dst_size);
28569bd392adSmrg
28579bd392adSmrg	i = 0;
28589bd392adSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
28599bd392adSmrg
28609bd392adSmrg	/*  Issue commands to set cu mask used in current dispatch */
28619bd392adSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
28629bd392adSmrg
28639bd392adSmrg	/* Writes shader state to HW */
28649bd392adSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
28659bd392adSmrg
28669bd392adSmrg	/* Write constant data */
28679bd392adSmrg	/* Writes the texture resource constants data to the SGPRs */
28689bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
28699bd392adSmrg	ptr_cmd[i++] = 0x240;
28709bd392adSmrg	ptr_cmd[i++] = mc_address_src;
28719bd392adSmrg	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
28729bd392adSmrg	ptr_cmd[i++] = 0x400000;
28739bd392adSmrg	ptr_cmd[i++] = 0x74fac;
28749bd392adSmrg
28759bd392adSmrg	/* Writes the UAV constant data to the SGPRs. */
28769bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
28779bd392adSmrg	ptr_cmd[i++] = 0x244;
28789bd392adSmrg	ptr_cmd[i++] = mc_address_dst;
28799bd392adSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
28809bd392adSmrg	ptr_cmd[i++] = 0x400000;
28819bd392adSmrg	ptr_cmd[i++] = 0x74fac;
28829bd392adSmrg
28839bd392adSmrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
28849bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
28859bd392adSmrg	ptr_cmd[i++] = 0x215;
28869bd392adSmrg	ptr_cmd[i++] = 0;
28879bd392adSmrg
28889bd392adSmrg	/* dispatch direct command */
28899bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
28909bd392adSmrg	ptr_cmd[i++] = 0x10000;
28919bd392adSmrg	ptr_cmd[i++] = 1;
28929bd392adSmrg	ptr_cmd[i++] = 1;
28939bd392adSmrg	ptr_cmd[i++] = 1;
28949bd392adSmrg
28959bd392adSmrg	while (i & 7)
28969bd392adSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
28979bd392adSmrg
28989bd392adSmrg	resources[0] = bo_shader;
28999bd392adSmrg	resources[1] = bo_src;
29009bd392adSmrg	resources[2] = bo_dst;
29019bd392adSmrg	resources[3] = bo_cmd;
29029bd392adSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
29039bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29049bd392adSmrg
29059bd392adSmrg	ib_info.ib_mc_address = mc_address_cmd;
29069bd392adSmrg	ib_info.size = i;
29079bd392adSmrg	ibs_request.ip_type = ip_type;
29089bd392adSmrg	ibs_request.ring = ring;
29099bd392adSmrg	ibs_request.resources = bo_list;
29109bd392adSmrg	ibs_request.number_of_ibs = 1;
29119bd392adSmrg	ibs_request.ibs = &ib_info;
29129bd392adSmrg	ibs_request.fence_info.handle = NULL;
29139bd392adSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
29149bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29159bd392adSmrg
29169bd392adSmrg	fence_status.ip_type = ip_type;
29179bd392adSmrg	fence_status.ip_instance = 0;
29189bd392adSmrg	fence_status.ring = ring;
29199bd392adSmrg	fence_status.context = context_handle;
29209bd392adSmrg	fence_status.fence = ibs_request.seq_no;
29219bd392adSmrg
29229bd392adSmrg	/* wait for IB accomplished */
29239bd392adSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
29249bd392adSmrg					 AMDGPU_TIMEOUT_INFINITE,
29259bd392adSmrg					 0, &expired);
29269bd392adSmrg
29279bd392adSmrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
29289bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29299bd392adSmrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
29309bd392adSmrg
29319bd392adSmrg	r = amdgpu_bo_list_destroy(bo_list);
29329bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29339bd392adSmrg
29349bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
29359bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29369bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
29379bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29389bd392adSmrg
29399bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
29409bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29419bd392adSmrg
29429bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
29439bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29449bd392adSmrg
29459bd392adSmrg	r = amdgpu_cs_ctx_free(context_handle);
29469bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29479bd392adSmrg}
29489bd392adSmrg
29499bd392adSmrgvoid amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
29509bd392adSmrg{
29519bd392adSmrg	int r;
29529bd392adSmrg	struct drm_amdgpu_info_hw_ip info;
29539bd392adSmrg	uint32_t ring_id;
29549bd392adSmrg
29559bd392adSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
29569bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29579bd392adSmrg	if (!info.available_rings)
29589bd392adSmrg		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
29599bd392adSmrg
29609bd392adSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
29619bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
29629bd392adSmrg		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
29639bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
29649bd392adSmrg	}
29659bd392adSmrg}
29669bd392adSmrg
29679bd392adSmrgstatic int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
29689bd392adSmrg{
29699bd392adSmrg	struct amdgpu_test_shader *shader;
29709bd392adSmrg	int i, loop = 0x40000;
29719bd392adSmrg
29729bd392adSmrg	switch (family) {
29739bd392adSmrg		case AMDGPU_FAMILY_AI:
29749bd392adSmrg		case AMDGPU_FAMILY_RV:
29759bd392adSmrg			shader = &memcpy_ps_hang_slow_ai;
29769bd392adSmrg			break;
29779bd392adSmrg		default:
29789bd392adSmrg			return -1;
29799bd392adSmrg			break;
29805324fb0dSmrg	}
29819bd392adSmrg
29829bd392adSmrg	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
29839bd392adSmrg
29849bd392adSmrg	for (i = 0; i < loop; i++)
29859bd392adSmrg		memcpy(ptr + shader->header_length + shader->body_length * i,
29869bd392adSmrg			shader->shader + shader->header_length,
29879bd392adSmrg			shader->body_length * sizeof(uint32_t));
29889bd392adSmrg
29899bd392adSmrg	memcpy(ptr + shader->header_length + shader->body_length * loop,
29909bd392adSmrg		shader->shader + shader->header_length + shader->body_length,
29919bd392adSmrg		shader->foot_length * sizeof(uint32_t));
29929bd392adSmrg
29939bd392adSmrg	return 0;
29945324fb0dSmrg}
29955324fb0dSmrg
29965324fb0dSmrgstatic int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
29975324fb0dSmrg{
29985324fb0dSmrg	int i;
29995324fb0dSmrg	uint32_t shader_offset= 256;
30005324fb0dSmrg	uint32_t mem_offset, patch_code_offset;
30015324fb0dSmrg	uint32_t shader_size, patchinfo_code_size;
30025324fb0dSmrg	const uint32_t *shader;
30035324fb0dSmrg	const uint32_t *patchinfo_code;
30045324fb0dSmrg	const uint32_t *patchcode_offset;
30055324fb0dSmrg
30065324fb0dSmrg	switch (ps_type) {
30075324fb0dSmrg		case PS_CONST:
30085324fb0dSmrg			shader = ps_const_shader_gfx9;
30095324fb0dSmrg			shader_size = sizeof(ps_const_shader_gfx9);
30105324fb0dSmrg			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
30115324fb0dSmrg			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
30125324fb0dSmrg			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
30135324fb0dSmrg			break;
30145324fb0dSmrg		case PS_TEX:
30155324fb0dSmrg			shader = ps_tex_shader_gfx9;
30165324fb0dSmrg			shader_size = sizeof(ps_tex_shader_gfx9);
30175324fb0dSmrg			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
30185324fb0dSmrg			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
30195324fb0dSmrg			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
30205324fb0dSmrg			break;
30219bd392adSmrg		case PS_HANG:
30229bd392adSmrg			shader = memcpy_ps_hang;
30239bd392adSmrg			shader_size = sizeof(memcpy_ps_hang);
30249bd392adSmrg
30259bd392adSmrg			memcpy(ptr, shader, shader_size);
30269bd392adSmrg			return 0;
30275324fb0dSmrg		default:
30285324fb0dSmrg			return -1;
30295324fb0dSmrg			break;
30305324fb0dSmrg	}
30315324fb0dSmrg
30325324fb0dSmrg	/* write main shader program */
30335324fb0dSmrg	for (i = 0 ; i < 10; i++) {
30345324fb0dSmrg		mem_offset = i * shader_offset;
30355324fb0dSmrg		memcpy(ptr + mem_offset, shader, shader_size);
30365324fb0dSmrg	}
30375324fb0dSmrg
30385324fb0dSmrg	/* overwrite patch codes */
30395324fb0dSmrg	for (i = 0 ; i < 10; i++) {
30405324fb0dSmrg		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
30415324fb0dSmrg		patch_code_offset = i * patchinfo_code_size;
30425324fb0dSmrg		memcpy(ptr + mem_offset,
30435324fb0dSmrg			patchinfo_code + patch_code_offset,
30445324fb0dSmrg			patchinfo_code_size * sizeof(uint32_t));
30455324fb0dSmrg	}
30465324fb0dSmrg
30475324fb0dSmrg	return 0;
30485324fb0dSmrg}
30495324fb0dSmrg
30505324fb0dSmrg/* load RectPosTexFast_VS */
30515324fb0dSmrgstatic int amdgpu_draw_load_vs_shader(uint8_t *ptr)
30525324fb0dSmrg{
30535324fb0dSmrg	const uint32_t *shader;
30545324fb0dSmrg	uint32_t shader_size;
30555324fb0dSmrg
30565324fb0dSmrg	shader = vs_RectPosTexFast_shader_gfx9;
30575324fb0dSmrg	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
30585324fb0dSmrg
30595324fb0dSmrg	memcpy(ptr, shader, shader_size);
30605324fb0dSmrg
30615324fb0dSmrg	return 0;
30625324fb0dSmrg}
30635324fb0dSmrg
30645324fb0dSmrgstatic int amdgpu_draw_init(uint32_t *ptr)
30655324fb0dSmrg{
30665324fb0dSmrg	int i = 0;
30675324fb0dSmrg	const uint32_t *preamblecache_ptr;
30685324fb0dSmrg	uint32_t preamblecache_size;
30695324fb0dSmrg
30705324fb0dSmrg	/* Write context control and load shadowing register if necessary */
30715324fb0dSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
30725324fb0dSmrg	ptr[i++] = 0x80000000;
30735324fb0dSmrg	ptr[i++] = 0x80000000;
30745324fb0dSmrg
30755324fb0dSmrg	preamblecache_ptr = preamblecache_gfx9;
30765324fb0dSmrg	preamblecache_size = sizeof(preamblecache_gfx9);
30775324fb0dSmrg
30785324fb0dSmrg	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
30795324fb0dSmrg	return i + preamblecache_size/sizeof(uint32_t);
30805324fb0dSmrg}
30815324fb0dSmrg
30825324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
30839bd392adSmrg							 uint64_t dst_addr,
30849bd392adSmrg							 int hang_slow)
30855324fb0dSmrg{
30865324fb0dSmrg	int i = 0;
30875324fb0dSmrg
30885324fb0dSmrg	/* setup color buffer */
30895324fb0dSmrg	/* offset   reg
30905324fb0dSmrg	   0xA318   CB_COLOR0_BASE
30915324fb0dSmrg	   0xA319   CB_COLOR0_BASE_EXT
30925324fb0dSmrg	   0xA31A   CB_COLOR0_ATTRIB2
30935324fb0dSmrg	   0xA31B   CB_COLOR0_VIEW
30945324fb0dSmrg	   0xA31C   CB_COLOR0_INFO
30955324fb0dSmrg	   0xA31D   CB_COLOR0_ATTRIB
30965324fb0dSmrg	   0xA31E   CB_COLOR0_DCC_CONTROL
30975324fb0dSmrg	   0xA31F   CB_COLOR0_CMASK
30985324fb0dSmrg	   0xA320   CB_COLOR0_CMASK_BASE_EXT
30995324fb0dSmrg	   0xA321   CB_COLOR0_FMASK
31005324fb0dSmrg	   0xA322   CB_COLOR0_FMASK_BASE_EXT
31015324fb0dSmrg	   0xA323   CB_COLOR0_CLEAR_WORD0
31025324fb0dSmrg	   0xA324   CB_COLOR0_CLEAR_WORD1
31035324fb0dSmrg	   0xA325   CB_COLOR0_DCC_BASE
31045324fb0dSmrg	   0xA326   CB_COLOR0_DCC_BASE_EXT */
31055324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
31065324fb0dSmrg	ptr[i++] = 0x318;
31075324fb0dSmrg	ptr[i++] = dst_addr >> 8;
31085324fb0dSmrg	ptr[i++] = dst_addr >> 40;
31099bd392adSmrg	ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
31105324fb0dSmrg	ptr[i++] = 0;
31115324fb0dSmrg	ptr[i++] = 0x50438;
31125324fb0dSmrg	ptr[i++] = 0x10140000;
31135324fb0dSmrg	i += 9;
31145324fb0dSmrg
31155324fb0dSmrg	/* mmCB_MRT0_EPITCH */
31165324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31175324fb0dSmrg	ptr[i++] = 0x1e8;
31189bd392adSmrg	ptr[i++] = hang_slow ? 0x7ff : 0x1f;
31195324fb0dSmrg
31205324fb0dSmrg	/* 0xA32B   CB_COLOR1_BASE */
31215324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31225324fb0dSmrg	ptr[i++] = 0x32b;
31235324fb0dSmrg	ptr[i++] = 0;
31245324fb0dSmrg
31255324fb0dSmrg	/* 0xA33A   CB_COLOR1_BASE */
31265324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31275324fb0dSmrg	ptr[i++] = 0x33a;
31285324fb0dSmrg	ptr[i++] = 0;
31295324fb0dSmrg
31305324fb0dSmrg	/* SPI_SHADER_COL_FORMAT */
31315324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31325324fb0dSmrg	ptr[i++] = 0x1c5;
31335324fb0dSmrg	ptr[i++] = 9;
31345324fb0dSmrg
31355324fb0dSmrg	/* Setup depth buffer */
31365324fb0dSmrg	/* mmDB_Z_INFO */
31375324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
31385324fb0dSmrg	ptr[i++] = 0xe;
31395324fb0dSmrg	i += 2;
31405324fb0dSmrg
31415324fb0dSmrg	return i;
31425324fb0dSmrg}
31435324fb0dSmrg
31449bd392adSmrgstatic int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
31455324fb0dSmrg{
31465324fb0dSmrg	int i = 0;
31475324fb0dSmrg	const uint32_t *cached_cmd_ptr;
31485324fb0dSmrg	uint32_t cached_cmd_size;
31495324fb0dSmrg
31505324fb0dSmrg	/* mmPA_SC_TILE_STEERING_OVERRIDE */
31515324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31525324fb0dSmrg	ptr[i++] = 0xd7;
31535324fb0dSmrg	ptr[i++] = 0;
31545324fb0dSmrg
31555324fb0dSmrg	ptr[i++] = 0xffff1000;
31565324fb0dSmrg	ptr[i++] = 0xc0021000;
31575324fb0dSmrg
31585324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31595324fb0dSmrg	ptr[i++] = 0xd7;
31605324fb0dSmrg	ptr[i++] = 1;
31615324fb0dSmrg
31625324fb0dSmrg	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
31635324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
31645324fb0dSmrg	ptr[i++] = 0x2fe;
31655324fb0dSmrg	i += 16;
31665324fb0dSmrg
31675324fb0dSmrg	/* mmPA_SC_CENTROID_PRIORITY_0 */
31685324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
31695324fb0dSmrg	ptr[i++] = 0x2f5;
31705324fb0dSmrg	i += 2;
31715324fb0dSmrg
31725324fb0dSmrg	cached_cmd_ptr = cached_cmd_gfx9;
31735324fb0dSmrg	cached_cmd_size = sizeof(cached_cmd_gfx9);
31745324fb0dSmrg
31755324fb0dSmrg	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
31769bd392adSmrg	if (hang_slow)
31779bd392adSmrg		*(ptr + i + 12) = 0x8000800;
31785324fb0dSmrg	i += cached_cmd_size/sizeof(uint32_t);
31795324fb0dSmrg
31805324fb0dSmrg	return i;
31815324fb0dSmrg}
31825324fb0dSmrg
31835324fb0dSmrgstatic int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
31845324fb0dSmrg						  int ps_type,
31859bd392adSmrg						  uint64_t shader_addr,
31869bd392adSmrg						  int hang_slow)
31875324fb0dSmrg{
31885324fb0dSmrg	int i = 0;
31895324fb0dSmrg
31905324fb0dSmrg	/* mmPA_CL_VS_OUT_CNTL */
31915324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31925324fb0dSmrg	ptr[i++] = 0x207;
31935324fb0dSmrg	ptr[i++] = 0;
31945324fb0dSmrg
31955324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC3_VS */
31965324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
31975324fb0dSmrg	ptr[i++] = 0x46;
31985324fb0dSmrg	ptr[i++] = 0xffff;
31995324fb0dSmrg
32005324fb0dSmrg	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
32015324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
32025324fb0dSmrg	ptr[i++] = 0x48;
32035324fb0dSmrg	ptr[i++] = shader_addr >> 8;
32045324fb0dSmrg	ptr[i++] = shader_addr >> 40;
32055324fb0dSmrg
32065324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC1_VS */
32075324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
32085324fb0dSmrg	ptr[i++] = 0x4a;
32095324fb0dSmrg	ptr[i++] = 0xc0081;
32105324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC2_VS */
32115324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
32125324fb0dSmrg	ptr[i++] = 0x4b;
32135324fb0dSmrg	ptr[i++] = 0x18;
32145324fb0dSmrg
32155324fb0dSmrg	/* mmSPI_VS_OUT_CONFIG */
32165324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
32175324fb0dSmrg	ptr[i++] = 0x1b1;
32185324fb0dSmrg	ptr[i++] = 2;
32195324fb0dSmrg
32205324fb0dSmrg	/* mmSPI_SHADER_POS_FORMAT */
32215324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
32225324fb0dSmrg	ptr[i++] = 0x1c3;
32235324fb0dSmrg	ptr[i++] = 4;
32245324fb0dSmrg
32255324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
32265324fb0dSmrg	ptr[i++] = 0x4c;
32275324fb0dSmrg	i += 2;
32289bd392adSmrg	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
32299bd392adSmrg	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
32305324fb0dSmrg
32315324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
32325324fb0dSmrg	ptr[i++] = 0x50;
32335324fb0dSmrg	i += 2;
32345324fb0dSmrg	if (ps_type == PS_CONST) {
32355324fb0dSmrg		i += 2;
32365324fb0dSmrg	} else if (ps_type == PS_TEX) {
32375324fb0dSmrg		ptr[i++] = 0x3f800000;
32385324fb0dSmrg		ptr[i++] = 0x3f800000;
32395324fb0dSmrg	}
32405324fb0dSmrg
32415324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
32425324fb0dSmrg	ptr[i++] = 0x54;
32435324fb0dSmrg	i += 4;
32445324fb0dSmrg
32455324fb0dSmrg	return i;
32465324fb0dSmrg}
32475324fb0dSmrg
32485324fb0dSmrgstatic int amdgpu_draw_ps_write2hw(uint32_t *ptr,
32495324fb0dSmrg				   int ps_type,
32505324fb0dSmrg				   uint64_t shader_addr)
32515324fb0dSmrg{
32525324fb0dSmrg	int i, j;
32535324fb0dSmrg	const uint32_t *sh_registers;
32545324fb0dSmrg	const uint32_t *context_registers;
32555324fb0dSmrg	uint32_t num_sh_reg, num_context_reg;
32565324fb0dSmrg
32575324fb0dSmrg	if (ps_type == PS_CONST) {
32585324fb0dSmrg		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
32595324fb0dSmrg		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
32605324fb0dSmrg		num_sh_reg = ps_num_sh_registers_gfx9;
32615324fb0dSmrg		num_context_reg = ps_num_context_registers_gfx9;
32625324fb0dSmrg	} else if (ps_type == PS_TEX) {
32635324fb0dSmrg		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
32645324fb0dSmrg		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
32655324fb0dSmrg		num_sh_reg = ps_num_sh_registers_gfx9;
32665324fb0dSmrg		num_context_reg = ps_num_context_registers_gfx9;
32675324fb0dSmrg	}
32685324fb0dSmrg
32695324fb0dSmrg	i = 0;
32705324fb0dSmrg
32715324fb0dSmrg	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
32725324fb0dSmrg	   0x2c08   SPI_SHADER_PGM_LO_PS
32735324fb0dSmrg	   0x2c09   SPI_SHADER_PGM_HI_PS */
32745324fb0dSmrg	shader_addr += 256 * 9;
32755324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
32765324fb0dSmrg	ptr[i++] = 0x7;
32775324fb0dSmrg	ptr[i++] = 0xffff;
32785324fb0dSmrg	ptr[i++] = shader_addr >> 8;
32795324fb0dSmrg	ptr[i++] = shader_addr >> 40;
32805324fb0dSmrg
32815324fb0dSmrg	for (j = 0; j < num_sh_reg; j++) {
32825324fb0dSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
32835324fb0dSmrg		ptr[i++] = sh_registers[j * 2] - 0x2c00;
32845324fb0dSmrg		ptr[i++] = sh_registers[j * 2 + 1];
32855324fb0dSmrg	}
32865324fb0dSmrg
32875324fb0dSmrg	for (j = 0; j < num_context_reg; j++) {
32885324fb0dSmrg		if (context_registers[j * 2] != 0xA1C5) {
32895324fb0dSmrg			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
32905324fb0dSmrg			ptr[i++] = context_registers[j * 2] - 0xa000;
32915324fb0dSmrg			ptr[i++] = context_registers[j * 2 + 1];
32925324fb0dSmrg		}
32935324fb0dSmrg
32945324fb0dSmrg		if (context_registers[j * 2] == 0xA1B4) {
32955324fb0dSmrg			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
32965324fb0dSmrg			ptr[i++] = 0x1b3;
32975324fb0dSmrg			ptr[i++] = 2;
32985324fb0dSmrg		}
32995324fb0dSmrg	}
33005324fb0dSmrg
33015324fb0dSmrg	return i;
33025324fb0dSmrg}
33035324fb0dSmrg
33045324fb0dSmrgstatic int amdgpu_draw_draw(uint32_t *ptr)
33055324fb0dSmrg{
33065324fb0dSmrg	int i = 0;
33075324fb0dSmrg
33085324fb0dSmrg	/* mmIA_MULTI_VGT_PARAM */
33095324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
33105324fb0dSmrg	ptr[i++] = 0x40000258;
33115324fb0dSmrg	ptr[i++] = 0xd00ff;
33125324fb0dSmrg
33135324fb0dSmrg	/* mmVGT_PRIMITIVE_TYPE */
33145324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
33155324fb0dSmrg	ptr[i++] = 0x10000242;
33165324fb0dSmrg	ptr[i++] = 0x11;
33175324fb0dSmrg
33185324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
33195324fb0dSmrg	ptr[i++] = 3;
33205324fb0dSmrg	ptr[i++] = 2;
33215324fb0dSmrg
33225324fb0dSmrg	return i;
33235324fb0dSmrg}
33245324fb0dSmrg
33255324fb0dSmrgvoid amdgpu_memset_draw(amdgpu_device_handle device_handle,
33265324fb0dSmrg			amdgpu_bo_handle bo_shader_ps,
33275324fb0dSmrg			amdgpu_bo_handle bo_shader_vs,
33285324fb0dSmrg			uint64_t mc_address_shader_ps,
33295324fb0dSmrg			uint64_t mc_address_shader_vs,
33305324fb0dSmrg			uint32_t ring_id)
33315324fb0dSmrg{
33325324fb0dSmrg	amdgpu_context_handle context_handle;
33335324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
33345324fb0dSmrg	volatile unsigned char *ptr_dst;
33355324fb0dSmrg	uint32_t *ptr_cmd;
33365324fb0dSmrg	uint64_t mc_address_dst, mc_address_cmd;
33375324fb0dSmrg	amdgpu_va_handle va_dst, va_cmd;
33385324fb0dSmrg	int i, r;
33395324fb0dSmrg	int bo_dst_size = 16384;
33405324fb0dSmrg	int bo_cmd_size = 4096;
33415324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
33425324fb0dSmrg	struct amdgpu_cs_ib_info ib_info = {0};
33435324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
33445324fb0dSmrg	uint32_t expired;
33455324fb0dSmrg	amdgpu_bo_list_handle bo_list;
33465324fb0dSmrg
33475324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
33485324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33495324fb0dSmrg
33505324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
33515324fb0dSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
33525324fb0dSmrg					&bo_cmd, (void **)&ptr_cmd,
33535324fb0dSmrg					&mc_address_cmd, &va_cmd);
33545324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33555324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
33565324fb0dSmrg
33575324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
33585324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
33595324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
33605324fb0dSmrg					&mc_address_dst, &va_dst);
33615324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33625324fb0dSmrg
33635324fb0dSmrg	i = 0;
33645324fb0dSmrg	i += amdgpu_draw_init(ptr_cmd + i);
33655324fb0dSmrg
33669bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
33675324fb0dSmrg
33689bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
33695324fb0dSmrg
33709bd392adSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
33715324fb0dSmrg
33725324fb0dSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
33735324fb0dSmrg
33745324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
33755324fb0dSmrg	ptr_cmd[i++] = 0xc;
33765324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
33775324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
33785324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
33795324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
33805324fb0dSmrg
33815324fb0dSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
33825324fb0dSmrg
33835324fb0dSmrg	while (i & 7)
33845324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
33855324fb0dSmrg
33865324fb0dSmrg	resources[0] = bo_dst;
33875324fb0dSmrg	resources[1] = bo_shader_ps;
33885324fb0dSmrg	resources[2] = bo_shader_vs;
33895324fb0dSmrg	resources[3] = bo_cmd;
33909bd392adSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
33915324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33925324fb0dSmrg
33935324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
33945324fb0dSmrg	ib_info.size = i;
33955324fb0dSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
33965324fb0dSmrg	ibs_request.ring = ring_id;
33975324fb0dSmrg	ibs_request.resources = bo_list;
33985324fb0dSmrg	ibs_request.number_of_ibs = 1;
33995324fb0dSmrg	ibs_request.ibs = &ib_info;
34005324fb0dSmrg	ibs_request.fence_info.handle = NULL;
34015324fb0dSmrg
34025324fb0dSmrg	/* submit CS */
34035324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
34045324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34055324fb0dSmrg
34065324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
34075324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34085324fb0dSmrg
34095324fb0dSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
34105324fb0dSmrg	fence_status.ip_instance = 0;
34115324fb0dSmrg	fence_status.ring = ring_id;
34125324fb0dSmrg	fence_status.context = context_handle;
34135324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
34145324fb0dSmrg
34155324fb0dSmrg	/* wait for IB accomplished */
34165324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
34175324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
34185324fb0dSmrg					 0, &expired);
34195324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34205324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
34215324fb0dSmrg
34225324fb0dSmrg	/* verify if memset test result meets with expected */
34235324fb0dSmrg	i = 0;
34245324fb0dSmrg	while(i < bo_dst_size) {
34255324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
34265324fb0dSmrg	}
34275324fb0dSmrg
34285324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
34295324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34305324fb0dSmrg
34315324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
34325324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34335324fb0dSmrg
34345324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
34355324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34365324fb0dSmrg}
34375324fb0dSmrg
34385324fb0dSmrgstatic void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
34395324fb0dSmrg				    uint32_t ring)
34405324fb0dSmrg{
34415324fb0dSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
34425324fb0dSmrg	void *ptr_shader_ps;
34435324fb0dSmrg	void *ptr_shader_vs;
34445324fb0dSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
34455324fb0dSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
34465324fb0dSmrg	int r;
34475324fb0dSmrg	int bo_shader_size = 4096;
34485324fb0dSmrg
34495324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
34505324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
34515324fb0dSmrg					&bo_shader_ps, &ptr_shader_ps,
34525324fb0dSmrg					&mc_address_shader_ps, &va_shader_ps);
34535324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
345488f8a8d2Smrg	memset(ptr_shader_ps, 0, bo_shader_size);
34555324fb0dSmrg
34565324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
34575324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
34585324fb0dSmrg					&bo_shader_vs, &ptr_shader_vs,
34595324fb0dSmrg					&mc_address_shader_vs, &va_shader_vs);
34605324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
346188f8a8d2Smrg	memset(ptr_shader_vs, 0, bo_shader_size);
34625324fb0dSmrg
34635324fb0dSmrg	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
34645324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34655324fb0dSmrg
34665324fb0dSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
34675324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34685324fb0dSmrg
34695324fb0dSmrg	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
34705324fb0dSmrg			mc_address_shader_ps, mc_address_shader_vs, ring);
34715324fb0dSmrg
34725324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
34735324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34745324fb0dSmrg
34755324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
34765324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34775324fb0dSmrg}
34785324fb0dSmrg
34795324fb0dSmrgstatic void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
34805324fb0dSmrg			       amdgpu_bo_handle bo_shader_ps,
34815324fb0dSmrg			       amdgpu_bo_handle bo_shader_vs,
34825324fb0dSmrg			       uint64_t mc_address_shader_ps,
34835324fb0dSmrg			       uint64_t mc_address_shader_vs,
34849bd392adSmrg			       uint32_t ring, int hang)
34855324fb0dSmrg{
34865324fb0dSmrg	amdgpu_context_handle context_handle;
34875324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
34885324fb0dSmrg	volatile unsigned char *ptr_dst;
34895324fb0dSmrg	unsigned char *ptr_src;
34905324fb0dSmrg	uint32_t *ptr_cmd;
34915324fb0dSmrg	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
34925324fb0dSmrg	amdgpu_va_handle va_dst, va_src, va_cmd;
34935324fb0dSmrg	int i, r;
34945324fb0dSmrg	int bo_size = 16384;
34955324fb0dSmrg	int bo_cmd_size = 4096;
34965324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
34975324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
34989bd392adSmrg	uint32_t hang_state, hangs;
34999bd392adSmrg	uint32_t expired;
35005324fb0dSmrg	amdgpu_bo_list_handle bo_list;
35015324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
35025324fb0dSmrg
35035324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
35045324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
35055324fb0dSmrg
35065324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
35075324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
35085324fb0dSmrg				    &bo_cmd, (void **)&ptr_cmd,
35095324fb0dSmrg				    &mc_address_cmd, &va_cmd);
35105324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
35115324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
35125324fb0dSmrg
35135324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
35145324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
35155324fb0dSmrg					&bo_src, (void **)&ptr_src,
35165324fb0dSmrg					&mc_address_src, &va_src);
35175324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
35185324fb0dSmrg
35195324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
35205324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
35215324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
35225324fb0dSmrg					&mc_address_dst, &va_dst);
35235324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
35245324fb0dSmrg
35255324fb0dSmrg	memset(ptr_src, 0x55, bo_size);
35265324fb0dSmrg
35275324fb0dSmrg	i = 0;
35285324fb0dSmrg	i += amdgpu_draw_init(ptr_cmd + i);
35295324fb0dSmrg
35309bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
35315324fb0dSmrg
35329bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
35335324fb0dSmrg
35349bd392adSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
35355324fb0dSmrg
35365324fb0dSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
35375324fb0dSmrg
35385324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
35395324fb0dSmrg	ptr_cmd[i++] = 0xc;
35405324fb0dSmrg	ptr_cmd[i++] = mc_address_src >> 8;
35415324fb0dSmrg	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
35425324fb0dSmrg	ptr_cmd[i++] = 0x7c01f;
35435324fb0dSmrg	ptr_cmd[i++] = 0x90500fac;
35445324fb0dSmrg	ptr_cmd[i++] = 0x3e000;
35455324fb0dSmrg	i += 3;
35465324fb0dSmrg
35475324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
35485324fb0dSmrg	ptr_cmd[i++] = 0x14;
35495324fb0dSmrg	ptr_cmd[i++] = 0x92;
35505324fb0dSmrg	i += 3;
35515324fb0dSmrg
355288f8a8d2Smrg	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
35535324fb0dSmrg	ptr_cmd[i++] = 0x191;
35545324fb0dSmrg	ptr_cmd[i++] = 0;
35555324fb0dSmrg
35565324fb0dSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
35575324fb0dSmrg
35585324fb0dSmrg	while (i & 7)
35595324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
35605324fb0dSmrg
35615324fb0dSmrg	resources[0] = bo_dst;
35625324fb0dSmrg	resources[1] = bo_src;
35635324fb0dSmrg	resources[2] = bo_shader_ps;
35645324fb0dSmrg	resources[3] = bo_shader_vs;
35655324fb0dSmrg	resources[4] = bo_cmd;
35665324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
35675324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
35685324fb0dSmrg
35695324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
35705324fb0dSmrg	ib_info.size = i;
35715324fb0dSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
35725324fb0dSmrg	ibs_request.ring = ring;
35735324fb0dSmrg	ibs_request.resources = bo_list;
35745324fb0dSmrg	ibs_request.number_of_ibs = 1;
35755324fb0dSmrg	ibs_request.ibs = &ib_info;
35765324fb0dSmrg	ibs_request.fence_info.handle = NULL;
35775324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
35785324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
35795324fb0dSmrg
35805324fb0dSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
35815324fb0dSmrg	fence_status.ip_instance = 0;
35825324fb0dSmrg	fence_status.ring = ring;
35835324fb0dSmrg	fence_status.context = context_handle;
35845324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
35855324fb0dSmrg
35865324fb0dSmrg	/* wait for IB accomplished */
35875324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
35885324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
35895324fb0dSmrg					 0, &expired);
35909bd392adSmrg	if (!hang) {
35919bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
35929bd392adSmrg		CU_ASSERT_EQUAL(expired, true);
35935324fb0dSmrg
35949bd392adSmrg		/* verify if memcpy test result meets with expected */
35959bd392adSmrg		i = 0;
35969bd392adSmrg		while(i < bo_size) {
35979bd392adSmrg			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
35989bd392adSmrg			i++;
35999bd392adSmrg		}
36009bd392adSmrg	} else {
36019bd392adSmrg		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
36029bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
36039bd392adSmrg		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
36045324fb0dSmrg	}
36055324fb0dSmrg
36065324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
36075324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36085324fb0dSmrg
36095324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
36105324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36115324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
36125324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36135324fb0dSmrg
36145324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
36155324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36165324fb0dSmrg
36175324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
36185324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36195324fb0dSmrg}
36205324fb0dSmrg
36219bd392adSmrgvoid amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
36229bd392adSmrg			     int hang)
36235324fb0dSmrg{
36245324fb0dSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
36255324fb0dSmrg	void *ptr_shader_ps;
36265324fb0dSmrg	void *ptr_shader_vs;
36275324fb0dSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
36285324fb0dSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
36295324fb0dSmrg	int bo_shader_size = 4096;
36309bd392adSmrg	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
36315324fb0dSmrg	int r;
36325324fb0dSmrg
36335324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
36345324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
36355324fb0dSmrg					&bo_shader_ps, &ptr_shader_ps,
36365324fb0dSmrg					&mc_address_shader_ps, &va_shader_ps);
36375324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
363888f8a8d2Smrg	memset(ptr_shader_ps, 0, bo_shader_size);
36395324fb0dSmrg
36405324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
36415324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
36425324fb0dSmrg					&bo_shader_vs, &ptr_shader_vs,
36435324fb0dSmrg					&mc_address_shader_vs, &va_shader_vs);
36445324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
364588f8a8d2Smrg	memset(ptr_shader_vs, 0, bo_shader_size);
36465324fb0dSmrg
36479bd392adSmrg	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
36485324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36495324fb0dSmrg
36505324fb0dSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
36515324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36525324fb0dSmrg
36535324fb0dSmrg	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
36549bd392adSmrg			mc_address_shader_ps, mc_address_shader_vs, ring, hang);
36555324fb0dSmrg
36565324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
36575324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36585324fb0dSmrg
36595324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
36605324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36615324fb0dSmrg}
36625324fb0dSmrg
36635324fb0dSmrgstatic void amdgpu_draw_test(void)
36645324fb0dSmrg{
36655324fb0dSmrg	int r;
36665324fb0dSmrg	struct drm_amdgpu_info_hw_ip info;
36675324fb0dSmrg	uint32_t ring_id;
36685324fb0dSmrg
36695324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
36705324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
367188f8a8d2Smrg	if (!info.available_rings)
367288f8a8d2Smrg		printf("SKIP ... as there's no graphics ring\n");
36735324fb0dSmrg
36745324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
36755324fb0dSmrg		amdgpu_memset_draw_test(device_handle, ring_id);
36769bd392adSmrg		amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
36775324fb0dSmrg	}
36785324fb0dSmrg}
367988f8a8d2Smrg
36809bd392adSmrgvoid amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
36819bd392adSmrg{
36829bd392adSmrg	amdgpu_context_handle context_handle;
36839bd392adSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
36849bd392adSmrg	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
36859bd392adSmrg	void *ptr_shader_ps;
36869bd392adSmrg	void *ptr_shader_vs;
36879bd392adSmrg	volatile unsigned char *ptr_dst;
36889bd392adSmrg	unsigned char *ptr_src;
36899bd392adSmrg	uint32_t *ptr_cmd;
36909bd392adSmrg	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
36919bd392adSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
36929bd392adSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
36939bd392adSmrg	amdgpu_va_handle va_dst, va_src, va_cmd;
36949bd392adSmrg	struct amdgpu_gpu_info gpu_info = {0};
36959bd392adSmrg	int i, r;
36969bd392adSmrg	int bo_size = 0x4000000;
36979bd392adSmrg	int bo_shader_ps_size = 0x400000;
36989bd392adSmrg	int bo_shader_vs_size = 4096;
36999bd392adSmrg	int bo_cmd_size = 4096;
37009bd392adSmrg	struct amdgpu_cs_request ibs_request = {0};
37019bd392adSmrg	struct amdgpu_cs_ib_info ib_info= {0};
37029bd392adSmrg	uint32_t hang_state, hangs, expired;
37039bd392adSmrg	amdgpu_bo_list_handle bo_list;
37049bd392adSmrg	struct amdgpu_cs_fence fence_status = {0};
37059bd392adSmrg
37069bd392adSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
37079bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37089bd392adSmrg
37099bd392adSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
37109bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37119bd392adSmrg
37129bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
37139bd392adSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
37149bd392adSmrg				    &bo_cmd, (void **)&ptr_cmd,
37159bd392adSmrg				    &mc_address_cmd, &va_cmd);
37169bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37179bd392adSmrg	memset(ptr_cmd, 0, bo_cmd_size);
37189bd392adSmrg
37199bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
37209bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
37219bd392adSmrg					&bo_shader_ps, &ptr_shader_ps,
37229bd392adSmrg					&mc_address_shader_ps, &va_shader_ps);
37239bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37249bd392adSmrg	memset(ptr_shader_ps, 0, bo_shader_ps_size);
37259bd392adSmrg
37269bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
37279bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
37289bd392adSmrg					&bo_shader_vs, &ptr_shader_vs,
37299bd392adSmrg					&mc_address_shader_vs, &va_shader_vs);
37309bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37319bd392adSmrg	memset(ptr_shader_vs, 0, bo_shader_vs_size);
37329bd392adSmrg
37339bd392adSmrg	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
37349bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37359bd392adSmrg
37369bd392adSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
37379bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37389bd392adSmrg
37399bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
37409bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
37419bd392adSmrg					&bo_src, (void **)&ptr_src,
37429bd392adSmrg					&mc_address_src, &va_src);
37439bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37449bd392adSmrg
37459bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
37469bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
37479bd392adSmrg					&bo_dst, (void **)&ptr_dst,
37489bd392adSmrg					&mc_address_dst, &va_dst);
37499bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37509bd392adSmrg
37519bd392adSmrg	memset(ptr_src, 0x55, bo_size);
37529bd392adSmrg
37539bd392adSmrg	i = 0;
37549bd392adSmrg	i += amdgpu_draw_init(ptr_cmd + i);
37559bd392adSmrg
37569bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
37579bd392adSmrg
37589bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
37599bd392adSmrg
37609bd392adSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
37619bd392adSmrg							mc_address_shader_vs, 1);
37629bd392adSmrg
37639bd392adSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
37649bd392adSmrg
37659bd392adSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
37669bd392adSmrg	ptr_cmd[i++] = 0xc;
37679bd392adSmrg	ptr_cmd[i++] = mc_address_src >> 8;
37689bd392adSmrg	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
37699bd392adSmrg	ptr_cmd[i++] = 0x1ffc7ff;
37709bd392adSmrg	ptr_cmd[i++] = 0x90500fac;
37719bd392adSmrg	ptr_cmd[i++] = 0xffe000;
37729bd392adSmrg	i += 3;
37739bd392adSmrg
37749bd392adSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
37759bd392adSmrg	ptr_cmd[i++] = 0x14;
37769bd392adSmrg	ptr_cmd[i++] = 0x92;
37779bd392adSmrg	i += 3;
37789bd392adSmrg
37799bd392adSmrg	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
37809bd392adSmrg	ptr_cmd[i++] = 0x191;
37819bd392adSmrg	ptr_cmd[i++] = 0;
37829bd392adSmrg
37839bd392adSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
37849bd392adSmrg
37859bd392adSmrg	while (i & 7)
37869bd392adSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
37879bd392adSmrg
37889bd392adSmrg	resources[0] = bo_dst;
37899bd392adSmrg	resources[1] = bo_src;
37909bd392adSmrg	resources[2] = bo_shader_ps;
37919bd392adSmrg	resources[3] = bo_shader_vs;
37929bd392adSmrg	resources[4] = bo_cmd;
37939bd392adSmrg	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
37949bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37959bd392adSmrg
37969bd392adSmrg	ib_info.ib_mc_address = mc_address_cmd;
37979bd392adSmrg	ib_info.size = i;
37989bd392adSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
37999bd392adSmrg	ibs_request.ring = ring;
38009bd392adSmrg	ibs_request.resources = bo_list;
38019bd392adSmrg	ibs_request.number_of_ibs = 1;
38029bd392adSmrg	ibs_request.ibs = &ib_info;
38039bd392adSmrg	ibs_request.fence_info.handle = NULL;
38049bd392adSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
38059bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38069bd392adSmrg
38079bd392adSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
38089bd392adSmrg	fence_status.ip_instance = 0;
38099bd392adSmrg	fence_status.ring = ring;
38109bd392adSmrg	fence_status.context = context_handle;
38119bd392adSmrg	fence_status.fence = ibs_request.seq_no;
38129bd392adSmrg
38139bd392adSmrg	/* wait for IB accomplished */
38149bd392adSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
38159bd392adSmrg					 AMDGPU_TIMEOUT_INFINITE,
38169bd392adSmrg					 0, &expired);
38179bd392adSmrg
38189bd392adSmrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
38199bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38209bd392adSmrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
38219bd392adSmrg
38229bd392adSmrg	r = amdgpu_bo_list_destroy(bo_list);
38239bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38249bd392adSmrg
38259bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
38269bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38279bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
38289bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38299bd392adSmrg
38309bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
38319bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38329bd392adSmrg
38339bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
38349bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38359bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
38369bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38379bd392adSmrg
38389bd392adSmrg	r = amdgpu_cs_ctx_free(context_handle);
38399bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38409bd392adSmrg}
38419bd392adSmrg
384288f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void)
384388f8a8d2Smrg{
384488f8a8d2Smrg	int r;
384588f8a8d2Smrg	char debugfs_path[256], tmp[10];
384688f8a8d2Smrg	int fd;
384788f8a8d2Smrg	struct stat sbuf;
384888f8a8d2Smrg	amdgpu_context_handle context_handle;
384988f8a8d2Smrg	uint32_t hang_state, hangs;
385088f8a8d2Smrg
385188f8a8d2Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
385288f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
385388f8a8d2Smrg
385488f8a8d2Smrg	r = fstat(drm_amdgpu[0], &sbuf);
385588f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
385688f8a8d2Smrg
385788f8a8d2Smrg	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
385888f8a8d2Smrg	fd = open(debugfs_path, O_RDONLY);
385988f8a8d2Smrg	CU_ASSERT(fd >= 0);
386088f8a8d2Smrg
386188f8a8d2Smrg	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
386288f8a8d2Smrg	CU_ASSERT(r > 0);
386388f8a8d2Smrg
386488f8a8d2Smrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
386588f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
386688f8a8d2Smrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
386788f8a8d2Smrg
386888f8a8d2Smrg	close(fd);
386988f8a8d2Smrg	r = amdgpu_cs_ctx_free(context_handle);
387088f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
387188f8a8d2Smrg
387288f8a8d2Smrg	amdgpu_compute_dispatch_test();
387388f8a8d2Smrg	amdgpu_gfx_dispatch_test();
387488f8a8d2Smrg}
3875