basic_tests.c revision 0ed5401b
13f012e29Smrg/*
23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc.
33f012e29Smrg *
43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a
53f012e29Smrg * copy of this software and associated documentation files (the "Software"),
63f012e29Smrg * to deal in the Software without restriction, including without limitation
73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the
93f012e29Smrg * Software is furnished to do so, subject to the following conditions:
103f012e29Smrg *
113f012e29Smrg * The above copyright notice and this permission notice shall be included in
123f012e29Smrg * all copies or substantial portions of the Software.
133f012e29Smrg *
143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE.
213f012e29Smrg *
223f012e29Smrg*/
233f012e29Smrg
243f012e29Smrg#include <stdio.h>
253f012e29Smrg#include <stdlib.h>
263f012e29Smrg#include <unistd.h>
2788f8a8d2Smrg#include <sys/types.h>
2888f8a8d2Smrg#ifdef MAJOR_IN_SYSMACROS
2988f8a8d2Smrg#include <sys/sysmacros.h>
3088f8a8d2Smrg#endif
3188f8a8d2Smrg#include <sys/stat.h>
3288f8a8d2Smrg#include <fcntl.h>
339bd392adSmrg#if HAVE_ALLOCA_H
343f012e29Smrg# include <alloca.h>
353f012e29Smrg#endif
3600a23bdaSmrg#include <sys/wait.h>
373f012e29Smrg
383f012e29Smrg#include "CUnit/Basic.h"
393f012e29Smrg
403f012e29Smrg#include "amdgpu_test.h"
413f012e29Smrg#include "amdgpu_drm.h"
4241687f09Smrg#include "amdgpu_internal.h"
437cdc0497Smrg#include "util_math.h"
443f012e29Smrg
453f012e29Smrgstatic  amdgpu_device_handle device_handle;
463f012e29Smrgstatic  uint32_t  major_version;
473f012e29Smrgstatic  uint32_t  minor_version;
48d8807b2fSmrgstatic  uint32_t  family_id;
494babd585Smrgstatic  uint32_t  chip_id;
504babd585Smrgstatic  uint32_t  chip_rev;
513f012e29Smrg
523f012e29Smrgstatic void amdgpu_query_info_test(void);
533f012e29Smrgstatic void amdgpu_command_submission_gfx(void);
543f012e29Smrgstatic void amdgpu_command_submission_compute(void);
55d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void);
563f012e29Smrgstatic void amdgpu_command_submission_sdma(void);
573f012e29Smrgstatic void amdgpu_userptr_test(void);
583f012e29Smrgstatic void amdgpu_semaphore_test(void);
5900a23bdaSmrgstatic void amdgpu_sync_dependency_test(void);
6000a23bdaSmrgstatic void amdgpu_bo_eviction_test(void);
6188f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void);
6288f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void);
635324fb0dSmrgstatic void amdgpu_draw_test(void);
6488f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void);
650ed5401bSmrgstatic void amdgpu_stable_pstate_test(void);
663f012e29Smrg
673f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
683f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
693f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
7000a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
7100a23bdaSmrg				       unsigned ip_type,
7200a23bdaSmrg				       int instance, int pm4_dw, uint32_t *pm4_src,
7300a23bdaSmrg				       int res_cnt, amdgpu_bo_handle *resources,
7400a23bdaSmrg				       struct amdgpu_cs_ib_info *ib_info,
7500a23bdaSmrg				       struct amdgpu_cs_request *ibs_request);
7641687f09Smrg
773f012e29SmrgCU_TestInfo basic_tests[] = {
783f012e29Smrg	{ "Query Info Test",  amdgpu_query_info_test },
793f012e29Smrg	{ "Userptr Test",  amdgpu_userptr_test },
8000a23bdaSmrg	{ "bo eviction Test",  amdgpu_bo_eviction_test },
813f012e29Smrg	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
823f012e29Smrg	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
83d8807b2fSmrg	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
843f012e29Smrg	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
853f012e29Smrg	{ "SW semaphore Test",  amdgpu_semaphore_test },
8600a23bdaSmrg	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
8788f8a8d2Smrg	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
8888f8a8d2Smrg	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
895324fb0dSmrg	{ "Draw Test",  amdgpu_draw_test },
9088f8a8d2Smrg	{ "GPU reset Test", amdgpu_gpu_reset_test },
910ed5401bSmrg	{ "Stable pstate Test", amdgpu_stable_pstate_test },
923f012e29Smrg	CU_TEST_INFO_NULL,
933f012e29Smrg};
949bd392adSmrg#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
953f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0
963f012e29Smrg#define SDMA_PKT_HEADER_op_mask   0x000000FF
973f012e29Smrg#define SDMA_PKT_HEADER_op_shift  0
983f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
993f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL  11
1003f012e29Smrg#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
1013f012e29Smrg	/* 0 = byte fill
1023f012e29Smrg	 * 2 = DW fill
1033f012e29Smrg	 */
1043f012e29Smrg#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
1053f012e29Smrg					(((sub_op) & 0xFF) << 8) |	\
1063f012e29Smrg					(((op) & 0xFF) << 0))
1073f012e29Smrg#define	SDMA_OPCODE_WRITE				  2
1083f012e29Smrg#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
1093f012e29Smrg#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
1103f012e29Smrg
1113f012e29Smrg#define	SDMA_OPCODE_COPY				  1
1123f012e29Smrg#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
1133f012e29Smrg
11441687f09Smrg#define	SDMA_OPCODE_ATOMIC				  10
11541687f09Smrg#		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
11641687f09Smrg        /* 0 - single_pass_atomic.
11741687f09Smrg         * 1 - loop_until_compare_satisfied.
11841687f09Smrg         */
11941687f09Smrg#		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
12041687f09Smrg		/* 0 - non-TMZ.
12141687f09Smrg		 * 1 - TMZ.
12241687f09Smrg	     */
12341687f09Smrg#		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
12441687f09Smrg		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
12541687f09Smrg		 * same as Packet 3
12641687f09Smrg		 */
12741687f09Smrg
1283f012e29Smrg#define GFX_COMPUTE_NOP  0xffff1000
1293f012e29Smrg#define SDMA_NOP  0x0
1303f012e29Smrg
1313f012e29Smrg/* PM4 */
1323f012e29Smrg#define	PACKET_TYPE0	0
1333f012e29Smrg#define	PACKET_TYPE1	1
1343f012e29Smrg#define	PACKET_TYPE2	2
1353f012e29Smrg#define	PACKET_TYPE3	3
1363f012e29Smrg
1373f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
1383f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
1393f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
1403f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
1413f012e29Smrg#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
1423f012e29Smrg			 ((reg) & 0xFFFF) |			\
1433f012e29Smrg			 ((n) & 0x3FFF) << 16)
1443f012e29Smrg#define CP_PACKET2			0x80000000
1453f012e29Smrg#define		PACKET2_PAD_SHIFT		0
1463f012e29Smrg#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
1473f012e29Smrg
1483f012e29Smrg#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
1493f012e29Smrg
1503f012e29Smrg#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
1513f012e29Smrg			 (((op) & 0xFF) << 8) |				\
1523f012e29Smrg			 ((n) & 0x3FFF) << 16)
1535324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
1543f012e29Smrg
1553f012e29Smrg/* Packet 3 types */
1563f012e29Smrg#define	PACKET3_NOP					0x10
1573f012e29Smrg
1583f012e29Smrg#define	PACKET3_WRITE_DATA				0x37
1593f012e29Smrg#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
1603f012e29Smrg		/* 0 - register
1613f012e29Smrg		 * 1 - memory (sync - via GRBM)
1623f012e29Smrg		 * 2 - gl2
1633f012e29Smrg		 * 3 - gds
1643f012e29Smrg		 * 4 - reserved
1653f012e29Smrg		 * 5 - memory (async - direct)
1663f012e29Smrg		 */
1673f012e29Smrg#define		WR_ONE_ADDR                             (1 << 16)
1683f012e29Smrg#define		WR_CONFIRM                              (1 << 20)
1693f012e29Smrg#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
1703f012e29Smrg		/* 0 - LRU
1713f012e29Smrg		 * 1 - Stream
1723f012e29Smrg		 */
1733f012e29Smrg#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
1743f012e29Smrg		/* 0 - me
1753f012e29Smrg		 * 1 - pfp
1763f012e29Smrg		 * 2 - ce
1773f012e29Smrg		 */
1783f012e29Smrg
17941687f09Smrg#define	PACKET3_ATOMIC_MEM				0x1E
18041687f09Smrg#define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
18141687f09Smrg#define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
18241687f09Smrg            /* 0 - single_pass_atomic.
18341687f09Smrg             * 1 - loop_until_compare_satisfied.
18441687f09Smrg             */
18541687f09Smrg#define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
18641687f09Smrg            /* 0 - lru.
18741687f09Smrg             * 1 - stream.
18841687f09Smrg             */
18941687f09Smrg#define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
19041687f09Smrg            /* 0 - micro_engine.
19141687f09Smrg			 */
19241687f09Smrg
1933f012e29Smrg#define	PACKET3_DMA_DATA				0x50
1943f012e29Smrg/* 1. header
1953f012e29Smrg * 2. CONTROL
1963f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0]
1973f012e29Smrg * 4. SRC_ADDR_HI [31:0]
1983f012e29Smrg * 5. DST_ADDR_LO [31:0]
1993f012e29Smrg * 6. DST_ADDR_HI [7:0]
2003f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
2013f012e29Smrg */
2023f012e29Smrg/* CONTROL */
2033f012e29Smrg#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
2043f012e29Smrg		/* 0 - ME
2053f012e29Smrg		 * 1 - PFP
2063f012e29Smrg		 */
2073f012e29Smrg#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
2083f012e29Smrg		/* 0 - LRU
2093f012e29Smrg		 * 1 - Stream
2103f012e29Smrg		 * 2 - Bypass
2113f012e29Smrg		 */
2123f012e29Smrg#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
2133f012e29Smrg#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
2143f012e29Smrg		/* 0 - DST_ADDR using DAS
2153f012e29Smrg		 * 1 - GDS
2163f012e29Smrg		 * 3 - DST_ADDR using L2
2173f012e29Smrg		 */
2183f012e29Smrg#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
2193f012e29Smrg		/* 0 - LRU
2203f012e29Smrg		 * 1 - Stream
2213f012e29Smrg		 * 2 - Bypass
2223f012e29Smrg		 */
2233f012e29Smrg#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
2243f012e29Smrg#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
2253f012e29Smrg		/* 0 - SRC_ADDR using SAS
2263f012e29Smrg		 * 1 - GDS
2273f012e29Smrg		 * 2 - DATA
2283f012e29Smrg		 * 3 - SRC_ADDR using L2
2293f012e29Smrg		 */
2303f012e29Smrg#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
2313f012e29Smrg/* COMMAND */
2323f012e29Smrg#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
2333f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
2343f012e29Smrg		/* 0 - none
2353f012e29Smrg		 * 1 - 8 in 16
2363f012e29Smrg		 * 2 - 8 in 32
2373f012e29Smrg		 * 3 - 8 in 64
2383f012e29Smrg		 */
2393f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
2403f012e29Smrg		/* 0 - none
2413f012e29Smrg		 * 1 - 8 in 16
2423f012e29Smrg		 * 2 - 8 in 32
2433f012e29Smrg		 * 3 - 8 in 64
2443f012e29Smrg		 */
2453f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
2463f012e29Smrg		/* 0 - memory
2473f012e29Smrg		 * 1 - register
2483f012e29Smrg		 */
2493f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
2503f012e29Smrg		/* 0 - memory
2513f012e29Smrg		 * 1 - register
2523f012e29Smrg		 */
2533f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
2543f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
2553f012e29Smrg#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
2563f012e29Smrg
257d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
258d8807b2fSmrg						(((b) & 0x1) << 26) |		\
259d8807b2fSmrg						(((t) & 0x1) << 23) |		\
260d8807b2fSmrg						(((s) & 0x1) << 22) |		\
261d8807b2fSmrg						(((cnt) & 0xFFFFF) << 0))
262d8807b2fSmrg#define	SDMA_OPCODE_COPY_SI	3
263d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI	13
264d8807b2fSmrg#define SDMA_NOP_SI  0xf
265d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000
266d8807b2fSmrg#define	PACKET3_DMA_DATA_SI	0x41
267d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
268d8807b2fSmrg		/* 0 - ME
269d8807b2fSmrg		 * 1 - PFP
270d8807b2fSmrg		 */
271d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
272d8807b2fSmrg		/* 0 - DST_ADDR using DAS
273d8807b2fSmrg		 * 1 - GDS
274d8807b2fSmrg		 * 3 - DST_ADDR using L2
275d8807b2fSmrg		 */
276d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
277d8807b2fSmrg		/* 0 - SRC_ADDR using SAS
278d8807b2fSmrg		 * 1 - GDS
279d8807b2fSmrg		 * 2 - DATA
280d8807b2fSmrg		 * 3 - SRC_ADDR using L2
281d8807b2fSmrg		 */
282d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
283d8807b2fSmrg
28400a23bdaSmrg
28500a23bdaSmrg#define PKT3_CONTEXT_CONTROL                   0x28
28600a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
28700a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
28800a23bdaSmrg#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
28900a23bdaSmrg
29000a23bdaSmrg#define PKT3_CLEAR_STATE                       0x12
29100a23bdaSmrg
29200a23bdaSmrg#define PKT3_SET_SH_REG                        0x76
29300a23bdaSmrg#define		PACKET3_SET_SH_REG_START			0x00002c00
29400a23bdaSmrg
2950ed5401bSmrg#define PKT3_SET_SH_REG_INDEX			0x9B
2960ed5401bSmrg
29700a23bdaSmrg#define	PACKET3_DISPATCH_DIRECT				0x15
2985324fb0dSmrg#define PACKET3_EVENT_WRITE				0x46
2995324fb0dSmrg#define PACKET3_ACQUIRE_MEM				0x58
3005324fb0dSmrg#define PACKET3_SET_CONTEXT_REG				0x69
3015324fb0dSmrg#define PACKET3_SET_UCONFIG_REG				0x79
3025324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO				0x2D
30300a23bdaSmrg/* gfx 8 */
30400a23bdaSmrg#define mmCOMPUTE_PGM_LO                                                        0x2e0c
30500a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
30600a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
30700a23bdaSmrg#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
30800a23bdaSmrg#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
30900a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
31000a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
31100a23bdaSmrg
31200a23bdaSmrg
31300a23bdaSmrg
31400a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
31500a23bdaSmrg		      ((num & 0x0000ff00) << 8) | \
31600a23bdaSmrg		      ((num & 0x00ff0000) >> 8) | \
31700a23bdaSmrg		      ((num & 0x000000ff) << 24))
31800a23bdaSmrg
31900a23bdaSmrg
32000a23bdaSmrg/* Shader code
32100a23bdaSmrg * void main()
32200a23bdaSmrg{
32300a23bdaSmrg
32400a23bdaSmrg	float x = some_input;
32500a23bdaSmrg		for (unsigned i = 0; i < 1000000; i++)
32600a23bdaSmrg  	x = sin(x);
32700a23bdaSmrg
32800a23bdaSmrg	u[0] = 42u;
32900a23bdaSmrg}
33000a23bdaSmrg*/
33100a23bdaSmrg
33200a23bdaSmrgstatic  uint32_t shader_bin[] = {
33300a23bdaSmrg	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
33400a23bdaSmrg	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
33500a23bdaSmrg	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
33600a23bdaSmrg	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
33700a23bdaSmrg};
33800a23bdaSmrg
33900a23bdaSmrg#define CODE_OFFSET 512
34000a23bdaSmrg#define DATA_OFFSET 1024
34100a23bdaSmrg
3425324fb0dSmrgenum cs_type {
3435324fb0dSmrg	CS_BUFFERCLEAR,
3449bd392adSmrg	CS_BUFFERCOPY,
3459bd392adSmrg	CS_HANG,
3469bd392adSmrg	CS_HANG_SLOW
3475324fb0dSmrg};
3485324fb0dSmrg
3495324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = {
3504babd585Smrg    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
3514babd585Smrg    0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
3524babd585Smrg    0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
3534babd585Smrg    0xbf810000
3545324fb0dSmrg};
3555324fb0dSmrg
3560ed5401bSmrgstatic const uint32_t bufferclear_cs_shader_gfx10[] = {
3570ed5401bSmrg	0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205,
3580ed5401bSmrg	0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004,
3590ed5401bSmrg	0xBF810000
3600ed5401bSmrg};
3610ed5401bSmrg
3625324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
3635324fb0dSmrg	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
3645324fb0dSmrg	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
3655324fb0dSmrg	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
3665324fb0dSmrg	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
3675324fb0dSmrg	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
3685324fb0dSmrg};
3695324fb0dSmrg
3705324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
3715324fb0dSmrg
3725324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = {
3734babd585Smrg    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
3744babd585Smrg    0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
3754babd585Smrg    0xe01c2000, 0x80010200, 0xbf810000
3765324fb0dSmrg};
3775324fb0dSmrg
3780ed5401bSmrgstatic const uint32_t buffercopy_cs_shader_gfx10[] = {
3790ed5401bSmrg	0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201,
3800ed5401bSmrg	0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000
3810ed5401bSmrg};
3820ed5401bSmrg
3835324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = {
3845324fb0dSmrg	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
3855324fb0dSmrg	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
3865324fb0dSmrg	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
3875324fb0dSmrg	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
3885324fb0dSmrg	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
3895324fb0dSmrg	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
3905324fb0dSmrg	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
3915324fb0dSmrg	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
39288f8a8d2Smrg	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
3935324fb0dSmrg	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
3945324fb0dSmrg	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
3955324fb0dSmrg	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
3965324fb0dSmrg	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
3975324fb0dSmrg	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
3985324fb0dSmrg	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
39988f8a8d2Smrg	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
40088f8a8d2Smrg	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
4015324fb0dSmrg	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
4025324fb0dSmrg	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
4035324fb0dSmrg	0xc0017900, 0x24b, 0x0
4045324fb0dSmrg};
4055324fb0dSmrg
4060ed5401bSmrgstatic const uint32_t preamblecache_gfx10[] = {
4070ed5401bSmrg	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
4080ed5401bSmrg	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
4090ed5401bSmrg	0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
4100ed5401bSmrg	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
4110ed5401bSmrg	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
4120ed5401bSmrg	0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
4130ed5401bSmrg	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
4140ed5401bSmrg	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
4150ed5401bSmrg	0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20,
4160ed5401bSmrg	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
4170ed5401bSmrg	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0,
4180ed5401bSmrg	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
4190ed5401bSmrg	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
4200ed5401bSmrg	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
4210ed5401bSmrg	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
4220ed5401bSmrg	0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
4230ed5401bSmrg	0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2,
4240ed5401bSmrg	0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0,
4250ed5401bSmrg	0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff,
4260ed5401bSmrg	0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0,
4270ed5401bSmrg	0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0,
4280ed5401bSmrg	0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
4290ed5401bSmrg	0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
4300ed5401bSmrg};
4310ed5401bSmrg
4325324fb0dSmrgenum ps_type {
4335324fb0dSmrg	PS_CONST,
4349bd392adSmrg	PS_TEX,
4359bd392adSmrg	PS_HANG,
4369bd392adSmrg	PS_HANG_SLOW
4375324fb0dSmrg};
4385324fb0dSmrg
4395324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = {
4405324fb0dSmrg    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
4415324fb0dSmrg    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
4425324fb0dSmrg    0xC4001C0F, 0x00000100, 0xBF810000
4435324fb0dSmrg};
4445324fb0dSmrg
4455324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
4465324fb0dSmrg
4475324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
4485324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
4495324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
4505324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
4515324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
4525324fb0dSmrg     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
4535324fb0dSmrg     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
4545324fb0dSmrg     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
4555324fb0dSmrg     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
4565324fb0dSmrg     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
4575324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
4585324fb0dSmrg    }
4595324fb0dSmrg};
4605324fb0dSmrg
4615324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
4625324fb0dSmrg    0x00000004
4635324fb0dSmrg};
4645324fb0dSmrg
4655324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2;
4665324fb0dSmrg
4675324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = {
4685324fb0dSmrg    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
4695324fb0dSmrg    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
4705324fb0dSmrg};
4715324fb0dSmrg
4725324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7;
4735324fb0dSmrg
4745324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = {
4755324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
4765324fb0dSmrg    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
4775324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
4785324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
4795324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
4805324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
4815324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
4825324fb0dSmrg};
4835324fb0dSmrg
4840ed5401bSmrgstatic const uint32_t ps_const_shader_gfx10[] = {
4850ed5401bSmrg    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
4860ed5401bSmrg    0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000,
4870ed5401bSmrg    0xF8001C0F, 0x00000100, 0xBF810000
4880ed5401bSmrg};
4890ed5401bSmrg
4900ed5401bSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6;
4910ed5401bSmrg
4920ed5401bSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = {
4930ed5401bSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
4940ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 },
4950ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 },
4960ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 },
4970ed5401bSmrg     { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
4980ed5401bSmrg     { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 },
4990ed5401bSmrg     { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 },
5000ed5401bSmrg     { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 },
5010ed5401bSmrg     { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 },
5020ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 }
5030ed5401bSmrg    }
5040ed5401bSmrg};
5050ed5401bSmrg
5060ed5401bSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = {
5070ed5401bSmrg    0x00000004
5080ed5401bSmrg};
5090ed5401bSmrg
5100ed5401bSmrgstatic const uint32_t ps_num_sh_registers_gfx10 = 2;
5110ed5401bSmrg
5120ed5401bSmrgstatic const uint32_t ps_const_sh_registers_gfx10[][2] = {
5130ed5401bSmrg    {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 },
5140ed5401bSmrg    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
5150ed5401bSmrg};
5160ed5401bSmrg
5175324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = {
5185324fb0dSmrg    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
5195324fb0dSmrg    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
5205324fb0dSmrg    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
5215324fb0dSmrg    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
5225324fb0dSmrg    0x00000100, 0xBF810000
5235324fb0dSmrg};
5245324fb0dSmrg
5255324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
5265324fb0dSmrg    0x0000000B
5275324fb0dSmrg};
5285324fb0dSmrg
5295324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
5305324fb0dSmrg
5315324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
5325324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
5335324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
5345324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
5355324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
5365324fb0dSmrg     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
5375324fb0dSmrg     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
5385324fb0dSmrg     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
5395324fb0dSmrg     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
5405324fb0dSmrg     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
5415324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
5425324fb0dSmrg    }
5435324fb0dSmrg};
5445324fb0dSmrg
5455324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = {
5465324fb0dSmrg    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
5475324fb0dSmrg    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
5485324fb0dSmrg};
5495324fb0dSmrg
5505324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = {
5515324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
5525324fb0dSmrg    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
5535324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
5545324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
5555324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
5565324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
5575324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
5585324fb0dSmrg};
5595324fb0dSmrg
5600ed5401bSmrgstatic const uint32_t ps_tex_shader_gfx10[] = {
5610ed5401bSmrg    0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000,
5620ed5401bSmrg    0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A,
5630ed5401bSmrg    0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70,
5640ed5401bSmrg    0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000,
5650ed5401bSmrg    0xF8001C0F, 0x00000100, 0xBF810000
5660ed5401bSmrg};
5670ed5401bSmrg
5680ed5401bSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = {
5690ed5401bSmrg    0x0000000C
5700ed5401bSmrg};
5710ed5401bSmrg
5720ed5401bSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6;
5730ed5401bSmrg
5740ed5401bSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = {
5750ed5401bSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
5760ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 },
5770ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 },
5780ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 },
5790ed5401bSmrg     { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
5800ed5401bSmrg     { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 },
5810ed5401bSmrg     { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 },
5820ed5401bSmrg     { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
5830ed5401bSmrg     { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
5840ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 }
5850ed5401bSmrg    }
5860ed5401bSmrg};
5870ed5401bSmrg
5885324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
5895324fb0dSmrg    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
5905324fb0dSmrg    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
5915324fb0dSmrg    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
5925324fb0dSmrg    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
5935324fb0dSmrg    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
5945324fb0dSmrg    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
5955324fb0dSmrg    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
5965324fb0dSmrg    0xC400020F, 0x05060403, 0xBF810000
5975324fb0dSmrg};
5985324fb0dSmrg
5990ed5401bSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx10[] = {
6000ed5401bSmrg    0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206,
6010ed5401bSmrg    0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200,
6020ed5401bSmrg    0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207,
6030ed5401bSmrg    0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001,
6040ed5401bSmrg    0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002,
6050ed5401bSmrg    0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209,
6060ed5401bSmrg    0xF80008CF, 0x05030100, 0xF800020F, 0x05060402,
6070ed5401bSmrg    0xBF810000
6080ed5401bSmrg};
6090ed5401bSmrg
6105324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = {
6115324fb0dSmrg	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
6125324fb0dSmrg	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
6135324fb0dSmrg	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
6149bd392adSmrg	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
6155324fb0dSmrg	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
6165324fb0dSmrg	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
6175324fb0dSmrg	0xc0026900, 0x292, 0x20, 0x60201b8,
6185324fb0dSmrg	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
6195324fb0dSmrg};
62000a23bdaSmrg
6210ed5401bSmrgstatic const uint32_t cached_cmd_gfx10[] = {
6220ed5401bSmrg	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
6230ed5401bSmrg	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
6240ed5401bSmrg	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
6250ed5401bSmrg	0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18,
6260ed5401bSmrg	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
6270ed5401bSmrg	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
6280ed5401bSmrg	0xc0026900, 0x292, 0x20, 0x6020000,
6290ed5401bSmrg	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
6300ed5401bSmrg};
6310ed5401bSmrg
6329bd392adSmrgunsigned int memcpy_ps_hang[] = {
6339bd392adSmrg        0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
6349bd392adSmrg        0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
6359bd392adSmrg        0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
6369bd392adSmrg        0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
6379bd392adSmrg        0xF800180F, 0x03020100, 0xBF810000
6389bd392adSmrg};
6399bd392adSmrg
6409bd392adSmrgstruct amdgpu_test_shader {
6419bd392adSmrg	uint32_t *shader;
6429bd392adSmrg	uint32_t header_length;
6439bd392adSmrg	uint32_t body_length;
6449bd392adSmrg	uint32_t foot_length;
6459bd392adSmrg};
6469bd392adSmrg
6479bd392adSmrgunsigned int memcpy_cs_hang_slow_ai_codes[] = {
6489bd392adSmrg    0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
6499bd392adSmrg    0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
6509bd392adSmrg};
6519bd392adSmrg
6529bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
6539bd392adSmrg        memcpy_cs_hang_slow_ai_codes,
6549bd392adSmrg        4,
6559bd392adSmrg        3,
6569bd392adSmrg        1
6579bd392adSmrg};
6589bd392adSmrg
6599bd392adSmrgunsigned int memcpy_cs_hang_slow_rv_codes[] = {
6609bd392adSmrg    0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
6619bd392adSmrg    0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
6629bd392adSmrg};
6639bd392adSmrg
6649bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
6659bd392adSmrg        memcpy_cs_hang_slow_rv_codes,
6669bd392adSmrg        4,
6679bd392adSmrg        3,
6689bd392adSmrg        1
6699bd392adSmrg};
6709bd392adSmrg
6710ed5401bSmrgunsigned int memcpy_cs_hang_slow_nv_codes[] = {
6720ed5401bSmrg    0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100,
6730ed5401bSmrg    0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000
6740ed5401bSmrg};
6750ed5401bSmrg
6760ed5401bSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_nv = {
6770ed5401bSmrg        memcpy_cs_hang_slow_nv_codes,
6780ed5401bSmrg        4,
6790ed5401bSmrg        3,
6800ed5401bSmrg        1
6810ed5401bSmrg};
6820ed5401bSmrg
6839bd392adSmrgunsigned int memcpy_ps_hang_slow_ai_codes[] = {
6849bd392adSmrg        0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
6859bd392adSmrg        0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
6869bd392adSmrg        0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
6879bd392adSmrg        0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
6889bd392adSmrg        0x03020100, 0xbf810000
6899bd392adSmrg};
6909bd392adSmrg
6919bd392adSmrgstruct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
6929bd392adSmrg        memcpy_ps_hang_slow_ai_codes,
6939bd392adSmrg        7,
6949bd392adSmrg        2,
6959bd392adSmrg        9
6969bd392adSmrg};
6979bd392adSmrg
6987cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
6997cdc0497Smrg			unsigned alignment, unsigned heap, uint64_t alloc_flags,
7007cdc0497Smrg			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
7017cdc0497Smrg			uint64_t *mc_address,
7027cdc0497Smrg			amdgpu_va_handle *va_handle)
7037cdc0497Smrg{
7047cdc0497Smrg	struct amdgpu_bo_alloc_request request = {};
7057cdc0497Smrg	amdgpu_bo_handle buf_handle;
7067cdc0497Smrg	amdgpu_va_handle handle;
7077cdc0497Smrg	uint64_t vmc_addr;
7087cdc0497Smrg	int r;
7097cdc0497Smrg
7107cdc0497Smrg	request.alloc_size = size;
7117cdc0497Smrg	request.phys_alignment = alignment;
7127cdc0497Smrg	request.preferred_heap = heap;
7137cdc0497Smrg	request.flags = alloc_flags;
7147cdc0497Smrg
7157cdc0497Smrg	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
7167cdc0497Smrg	if (r)
7177cdc0497Smrg		return r;
7187cdc0497Smrg
7197cdc0497Smrg	r = amdgpu_va_range_alloc(dev,
7207cdc0497Smrg				  amdgpu_gpu_va_range_general,
7217cdc0497Smrg				  size, alignment, 0, &vmc_addr,
7227cdc0497Smrg				  &handle, 0);
7237cdc0497Smrg	if (r)
7247cdc0497Smrg		goto error_va_alloc;
7257cdc0497Smrg
7267cdc0497Smrg	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
7277cdc0497Smrg				   AMDGPU_VM_PAGE_READABLE |
7287cdc0497Smrg				   AMDGPU_VM_PAGE_WRITEABLE |
7297cdc0497Smrg				   AMDGPU_VM_PAGE_EXECUTABLE |
7307cdc0497Smrg				   mapping_flags,
7317cdc0497Smrg				   AMDGPU_VA_OP_MAP);
7327cdc0497Smrg	if (r)
7337cdc0497Smrg		goto error_va_map;
7347cdc0497Smrg
7357cdc0497Smrg	r = amdgpu_bo_cpu_map(buf_handle, cpu);
7367cdc0497Smrg	if (r)
7377cdc0497Smrg		goto error_cpu_map;
7387cdc0497Smrg
7397cdc0497Smrg	*bo = buf_handle;
7407cdc0497Smrg	*mc_address = vmc_addr;
7417cdc0497Smrg	*va_handle = handle;
7427cdc0497Smrg
7437cdc0497Smrg	return 0;
7447cdc0497Smrg
7457cdc0497Smrg error_cpu_map:
7467cdc0497Smrg	amdgpu_bo_cpu_unmap(buf_handle);
7477cdc0497Smrg
7487cdc0497Smrg error_va_map:
7497cdc0497Smrg	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
7507cdc0497Smrg
7517cdc0497Smrg error_va_alloc:
7527cdc0497Smrg	amdgpu_bo_free(buf_handle);
7537cdc0497Smrg	return r;
7547cdc0497Smrg}
7557cdc0497Smrg
7567cdc0497Smrg
7577cdc0497Smrg
75841687f09SmrgCU_BOOL suite_basic_tests_enable(void)
75941687f09Smrg{
76041687f09Smrg
76141687f09Smrg	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
76241687f09Smrg					     &minor_version, &device_handle))
76341687f09Smrg		return CU_FALSE;
76441687f09Smrg
7654babd585Smrg
7664babd585Smrg	family_id = device_handle->info.family_id;
7674babd585Smrg	chip_id = device_handle->info.chip_external_rev;
7684babd585Smrg	chip_rev = device_handle->info.chip_rev;
76941687f09Smrg
77041687f09Smrg	if (amdgpu_device_deinitialize(device_handle))
77141687f09Smrg		return CU_FALSE;
77241687f09Smrg
7734babd585Smrg	/* disable gfx engine basic test cases for some asics have no CPG */
7744babd585Smrg	if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
77541687f09Smrg		if (amdgpu_set_test_active("Basic Tests",
77641687f09Smrg					"Command submission Test (GFX)",
77741687f09Smrg					CU_FALSE))
77841687f09Smrg			fprintf(stderr, "test deactivation failed - %s\n",
77941687f09Smrg				CU_get_error_msg());
78041687f09Smrg
78141687f09Smrg		if (amdgpu_set_test_active("Basic Tests",
78241687f09Smrg					"Command submission Test (Multi-Fence)",
78341687f09Smrg					CU_FALSE))
78441687f09Smrg			fprintf(stderr, "test deactivation failed - %s\n",
78541687f09Smrg				CU_get_error_msg());
78641687f09Smrg
78741687f09Smrg		if (amdgpu_set_test_active("Basic Tests",
78841687f09Smrg					"Sync dependency Test",
78941687f09Smrg					CU_FALSE))
79041687f09Smrg			fprintf(stderr, "test deactivation failed - %s\n",
79141687f09Smrg				CU_get_error_msg());
79241687f09Smrg	}
79341687f09Smrg
79441687f09Smrg	return CU_TRUE;
79541687f09Smrg}
79641687f09Smrg
7973f012e29Smrgint suite_basic_tests_init(void)
7983f012e29Smrg{
799d8807b2fSmrg	struct amdgpu_gpu_info gpu_info = {0};
8003f012e29Smrg	int r;
8013f012e29Smrg
8023f012e29Smrg	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
8033f012e29Smrg				   &minor_version, &device_handle);
8043f012e29Smrg
805d8807b2fSmrg	if (r) {
806037b3c26Smrg		if ((r == -EACCES) && (errno == EACCES))
807037b3c26Smrg			printf("\n\nError:%s. "
808037b3c26Smrg				"Hint:Try to run this test program as root.",
809037b3c26Smrg				strerror(errno));
8103f012e29Smrg		return CUE_SINIT_FAILED;
811037b3c26Smrg	}
812d8807b2fSmrg
813d8807b2fSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
814d8807b2fSmrg	if (r)
815d8807b2fSmrg		return CUE_SINIT_FAILED;
816d8807b2fSmrg
817d8807b2fSmrg	family_id = gpu_info.family_id;
818d8807b2fSmrg
819d8807b2fSmrg	return CUE_SUCCESS;
8203f012e29Smrg}
8213f012e29Smrg
8223f012e29Smrgint suite_basic_tests_clean(void)
8233f012e29Smrg{
8243f012e29Smrg	int r = amdgpu_device_deinitialize(device_handle);
8253f012e29Smrg
8263f012e29Smrg	if (r == 0)
8273f012e29Smrg		return CUE_SUCCESS;
8283f012e29Smrg	else
8293f012e29Smrg		return CUE_SCLEAN_FAILED;
8303f012e29Smrg}
8313f012e29Smrg
8323f012e29Smrgstatic void amdgpu_query_info_test(void)
8333f012e29Smrg{
8343f012e29Smrg	struct amdgpu_gpu_info gpu_info = {0};
8353f012e29Smrg	uint32_t version, feature;
8363f012e29Smrg	int r;
8373f012e29Smrg
8383f012e29Smrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
8393f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8403f012e29Smrg
8413f012e29Smrg	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
8423f012e29Smrg					  0, &version, &feature);
8433f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8443f012e29Smrg}
8453f012e29Smrg
8463f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void)
8473f012e29Smrg{
8483f012e29Smrg	amdgpu_context_handle context_handle;
8493f012e29Smrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
8503f012e29Smrg	void *ib_result_cpu, *ib_result_ce_cpu;
8513f012e29Smrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
8523f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
8533f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
8543f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
8553f012e29Smrg	uint32_t *ptr;
8563f012e29Smrg	uint32_t expired;
8573f012e29Smrg	amdgpu_bo_list_handle bo_list;
8583f012e29Smrg	amdgpu_va_handle va_handle, va_handle_ce;
859d8807b2fSmrg	int r, i = 0;
8603f012e29Smrg
8613f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
8623f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8633f012e29Smrg
8643f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
8653f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
8663f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
8673f012e29Smrg				    &ib_result_mc_address, &va_handle);
8683f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8693f012e29Smrg
8703f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
8713f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
8723f012e29Smrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
8733f012e29Smrg				    &ib_result_ce_mc_address, &va_handle_ce);
8743f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8753f012e29Smrg
8763f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
8773f012e29Smrg			       ib_result_ce_handle, &bo_list);
8783f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8793f012e29Smrg
8803f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
8813f012e29Smrg
8823f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
8833f012e29Smrg	ptr = ib_result_ce_cpu;
884d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
885d8807b2fSmrg		ptr[i++] = 0xc0008900;
886d8807b2fSmrg		ptr[i++] = 0;
887d8807b2fSmrg	}
888d8807b2fSmrg	ptr[i++] = 0xc0008400;
889d8807b2fSmrg	ptr[i++] = 1;
8903f012e29Smrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
891d8807b2fSmrg	ib_info[0].size = i;
8923f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
8933f012e29Smrg
8943f012e29Smrg	/* IT_WAIT_ON_CE_COUNTER */
8953f012e29Smrg	ptr = ib_result_cpu;
8963f012e29Smrg	ptr[0] = 0xc0008600;
8973f012e29Smrg	ptr[1] = 0x00000001;
8983f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address;
8993f012e29Smrg	ib_info[1].size = 2;
9003f012e29Smrg
9013f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
9023f012e29Smrg	ibs_request.number_of_ibs = 2;
9033f012e29Smrg	ibs_request.ibs = ib_info;
9043f012e29Smrg	ibs_request.resources = bo_list;
9053f012e29Smrg	ibs_request.fence_info.handle = NULL;
9063f012e29Smrg
9073f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
9083f012e29Smrg
9093f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9103f012e29Smrg
9113f012e29Smrg	fence_status.context = context_handle;
9123f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
9133f012e29Smrg	fence_status.ip_instance = 0;
9143f012e29Smrg	fence_status.fence = ibs_request.seq_no;
9153f012e29Smrg
9163f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
9173f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
9183f012e29Smrg					 0, &expired);
9193f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9203f012e29Smrg
9213f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
9223f012e29Smrg				     ib_result_mc_address, 4096);
9233f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9243f012e29Smrg
9253f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
9263f012e29Smrg				     ib_result_ce_mc_address, 4096);
9273f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9283f012e29Smrg
9293f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
9303f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9313f012e29Smrg
9323f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
9333f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9343f012e29Smrg
9353f012e29Smrg}
9363f012e29Smrg
9373f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void)
9383f012e29Smrg{
9393f012e29Smrg	amdgpu_context_handle context_handle;
9403f012e29Smrg	amdgpu_bo_handle ib_result_handle;
9413f012e29Smrg	void *ib_result_cpu;
9423f012e29Smrg	uint64_t ib_result_mc_address;
9433f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
9443f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
9453f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
9463f012e29Smrg	uint32_t *ptr;
9473f012e29Smrg	uint32_t expired;
9483f012e29Smrg	amdgpu_bo_list_handle bo_list;
9493f012e29Smrg	amdgpu_va_handle va_handle;
950d8807b2fSmrg	int r, i = 0;
9513f012e29Smrg
9523f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
9533f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9543f012e29Smrg
9553f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
9563f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
9573f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
9583f012e29Smrg				    &ib_result_mc_address, &va_handle);
9593f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9603f012e29Smrg
9613f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
9623f012e29Smrg			       &bo_list);
9633f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9643f012e29Smrg
9653f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
9663f012e29Smrg
9673f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
9683f012e29Smrg	ptr = ib_result_cpu;
969d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
970d8807b2fSmrg		ptr[i++] = 0xc0008900;
971d8807b2fSmrg		ptr[i++] = 0;
972d8807b2fSmrg	}
973d8807b2fSmrg	ptr[i++] = 0xc0008400;
974d8807b2fSmrg	ptr[i++] = 1;
9753f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address;
976d8807b2fSmrg	ib_info[0].size = i;
9773f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
9783f012e29Smrg
9793f012e29Smrg	ptr = (uint32_t *)ib_result_cpu + 4;
9803f012e29Smrg	ptr[0] = 0xc0008600;
9813f012e29Smrg	ptr[1] = 0x00000001;
9823f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
9833f012e29Smrg	ib_info[1].size = 2;
9843f012e29Smrg
9853f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
9863f012e29Smrg	ibs_request.number_of_ibs = 2;
9873f012e29Smrg	ibs_request.ibs = ib_info;
9883f012e29Smrg	ibs_request.resources = bo_list;
9893f012e29Smrg	ibs_request.fence_info.handle = NULL;
9903f012e29Smrg
9913f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
9923f012e29Smrg
9933f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9943f012e29Smrg
9953f012e29Smrg	fence_status.context = context_handle;
9963f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
9973f012e29Smrg	fence_status.ip_instance = 0;
9983f012e29Smrg	fence_status.fence = ibs_request.seq_no;
9993f012e29Smrg
10003f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
10013f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
10023f012e29Smrg					 0, &expired);
10033f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10043f012e29Smrg
10053f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
10063f012e29Smrg				     ib_result_mc_address, 4096);
10073f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10083f012e29Smrg
10093f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
10103f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10113f012e29Smrg
10123f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
10133f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10143f012e29Smrg}
10153f012e29Smrg
10163f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void)
10173f012e29Smrg{
10183f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
10193f012e29Smrg}
10203f012e29Smrg
10213f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void)
10223f012e29Smrg{
10233f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
10243f012e29Smrg}
10253f012e29Smrg
10263f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void)
10273f012e29Smrg{
10283f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
10293f012e29Smrg}
10303f012e29Smrg
103100a23bdaSmrgstatic void amdgpu_bo_eviction_test(void)
103200a23bdaSmrg{
103300a23bdaSmrg	const int sdma_write_length = 1024;
103400a23bdaSmrg	const int pm4_dw = 256;
103500a23bdaSmrg	amdgpu_context_handle context_handle;
103600a23bdaSmrg	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
103700a23bdaSmrg	amdgpu_bo_handle *resources;
103800a23bdaSmrg	uint32_t *pm4;
103900a23bdaSmrg	struct amdgpu_cs_ib_info *ib_info;
104000a23bdaSmrg	struct amdgpu_cs_request *ibs_request;
104100a23bdaSmrg	uint64_t bo1_mc, bo2_mc;
104200a23bdaSmrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
104300a23bdaSmrg	int i, j, r, loop1, loop2;
104400a23bdaSmrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
104500a23bdaSmrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
104600a23bdaSmrg	struct amdgpu_heap_info vram_info, gtt_info;
104700a23bdaSmrg
104800a23bdaSmrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
104900a23bdaSmrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
105000a23bdaSmrg
105100a23bdaSmrg	ib_info = calloc(1, sizeof(*ib_info));
105200a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
105300a23bdaSmrg
105400a23bdaSmrg	ibs_request = calloc(1, sizeof(*ibs_request));
105500a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
105600a23bdaSmrg
105700a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
105800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
105900a23bdaSmrg
106000a23bdaSmrg	/* prepare resource */
106100a23bdaSmrg	resources = calloc(4, sizeof(amdgpu_bo_handle));
106200a23bdaSmrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
106300a23bdaSmrg
106400a23bdaSmrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
106500a23bdaSmrg				   0, &vram_info);
106600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
106700a23bdaSmrg
106800a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
106900a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
107000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
107100a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
107200a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
107300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
107400a23bdaSmrg
10754babd585Smrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
10764babd585Smrg				   0, &gtt_info);
10774babd585Smrg	CU_ASSERT_EQUAL(r, 0);
10784babd585Smrg
107900a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
108000a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
108100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
108200a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
108300a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
108400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
108500a23bdaSmrg
108600a23bdaSmrg
108700a23bdaSmrg
108800a23bdaSmrg	loop1 = loop2 = 0;
108900a23bdaSmrg	/* run 9 circle to test all mapping combination */
109000a23bdaSmrg	while(loop1 < 2) {
109100a23bdaSmrg		while(loop2 < 2) {
109200a23bdaSmrg			/* allocate UC bo1for sDMA use */
109300a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
109400a23bdaSmrg						    sdma_write_length, 4096,
109500a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
109600a23bdaSmrg						    gtt_flags[loop1], &bo1,
109700a23bdaSmrg						    (void**)&bo1_cpu, &bo1_mc,
109800a23bdaSmrg						    &bo1_va_handle);
109900a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
110000a23bdaSmrg
110100a23bdaSmrg			/* set bo1 */
110200a23bdaSmrg			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
110300a23bdaSmrg
110400a23bdaSmrg			/* allocate UC bo2 for sDMA use */
110500a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
110600a23bdaSmrg						    sdma_write_length, 4096,
110700a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
110800a23bdaSmrg						    gtt_flags[loop2], &bo2,
110900a23bdaSmrg						    (void**)&bo2_cpu, &bo2_mc,
111000a23bdaSmrg						    &bo2_va_handle);
111100a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
111200a23bdaSmrg
111300a23bdaSmrg			/* clear bo2 */
111400a23bdaSmrg			memset((void*)bo2_cpu, 0, sdma_write_length);
111500a23bdaSmrg
111600a23bdaSmrg			resources[0] = bo1;
111700a23bdaSmrg			resources[1] = bo2;
111800a23bdaSmrg			resources[2] = vram_max[loop2];
111900a23bdaSmrg			resources[3] = gtt_max[loop2];
112000a23bdaSmrg
112100a23bdaSmrg			/* fulfill PM4: test DMA copy linear */
112200a23bdaSmrg			i = j = 0;
112300a23bdaSmrg			if (family_id == AMDGPU_FAMILY_SI) {
112400a23bdaSmrg				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
112500a23bdaSmrg							  sdma_write_length);
112600a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
112700a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
112800a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
112900a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
113000a23bdaSmrg			} else {
113100a23bdaSmrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
113200a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
113300a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
113400a23bdaSmrg				else
113500a23bdaSmrg					pm4[i++] = sdma_write_length;
113600a23bdaSmrg				pm4[i++] = 0;
113700a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
113800a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
113900a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
114000a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
114100a23bdaSmrg			}
114200a23bdaSmrg
114300a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
114400a23bdaSmrg						   AMDGPU_HW_IP_DMA, 0,
114500a23bdaSmrg						   i, pm4,
114600a23bdaSmrg						   4, resources,
114700a23bdaSmrg						   ib_info, ibs_request);
114800a23bdaSmrg
114900a23bdaSmrg			/* verify if SDMA test result meets with expected */
115000a23bdaSmrg			i = 0;
115100a23bdaSmrg			while(i < sdma_write_length) {
115200a23bdaSmrg				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
115300a23bdaSmrg			}
115400a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
115500a23bdaSmrg						     sdma_write_length);
115600a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
115700a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
115800a23bdaSmrg						     sdma_write_length);
115900a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
116000a23bdaSmrg			loop2++;
116100a23bdaSmrg		}
116200a23bdaSmrg		loop2 = 0;
116300a23bdaSmrg		loop1++;
116400a23bdaSmrg	}
116500a23bdaSmrg	amdgpu_bo_free(vram_max[0]);
116600a23bdaSmrg	amdgpu_bo_free(vram_max[1]);
116700a23bdaSmrg	amdgpu_bo_free(gtt_max[0]);
116800a23bdaSmrg	amdgpu_bo_free(gtt_max[1]);
116900a23bdaSmrg	/* clean resources */
117000a23bdaSmrg	free(resources);
117100a23bdaSmrg	free(ibs_request);
117200a23bdaSmrg	free(ib_info);
117300a23bdaSmrg	free(pm4);
117400a23bdaSmrg
117500a23bdaSmrg	/* end of test */
117600a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle);
117700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
117800a23bdaSmrg}
117900a23bdaSmrg
118000a23bdaSmrg
11813f012e29Smrgstatic void amdgpu_command_submission_gfx(void)
11823f012e29Smrg{
11833f012e29Smrg	/* write data using the CP */
11843f012e29Smrg	amdgpu_command_submission_gfx_cp_write_data();
11853f012e29Smrg	/* const fill using the CP */
11863f012e29Smrg	amdgpu_command_submission_gfx_cp_const_fill();
11873f012e29Smrg	/* copy data using the CP */
11883f012e29Smrg	amdgpu_command_submission_gfx_cp_copy_data();
11893f012e29Smrg	/* separate IB buffers for multi-IB submission */
11903f012e29Smrg	amdgpu_command_submission_gfx_separate_ibs();
11913f012e29Smrg	/* shared IB buffer for multi-IB submission */
11923f012e29Smrg	amdgpu_command_submission_gfx_shared_ib();
11933f012e29Smrg}
11943f012e29Smrg
11953f012e29Smrgstatic void amdgpu_semaphore_test(void)
11963f012e29Smrg{
11973f012e29Smrg	amdgpu_context_handle context_handle[2];
11983f012e29Smrg	amdgpu_semaphore_handle sem;
11993f012e29Smrg	amdgpu_bo_handle ib_result_handle[2];
12003f012e29Smrg	void *ib_result_cpu[2];
12013f012e29Smrg	uint64_t ib_result_mc_address[2];
12023f012e29Smrg	struct amdgpu_cs_request ibs_request[2] = {0};
12033f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2] = {0};
12043f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
12053f012e29Smrg	uint32_t *ptr;
12063f012e29Smrg	uint32_t expired;
1207d8807b2fSmrg	uint32_t sdma_nop, gfx_nop;
12083f012e29Smrg	amdgpu_bo_list_handle bo_list[2];
12093f012e29Smrg	amdgpu_va_handle va_handle[2];
12103f012e29Smrg	int r, i;
12114babd585Smrg	struct amdgpu_gpu_info gpu_info = {0};
12124babd585Smrg	unsigned gc_ip_type;
12134babd585Smrg
12144babd585Smrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
12154babd585Smrg	CU_ASSERT_EQUAL(r, 0);
12164babd585Smrg
12174babd585Smrg	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
12184babd585Smrg			AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
12193f012e29Smrg
1220d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI) {
1221d8807b2fSmrg		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1222d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP_SI;
1223d8807b2fSmrg	} else {
1224d8807b2fSmrg		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1225d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP;
1226d8807b2fSmrg	}
1227d8807b2fSmrg
12283f012e29Smrg	r = amdgpu_cs_create_semaphore(&sem);
12293f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12303f012e29Smrg	for (i = 0; i < 2; i++) {
12313f012e29Smrg		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
12323f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12333f012e29Smrg
12343f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
12353f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
12363f012e29Smrg					    &ib_result_handle[i], &ib_result_cpu[i],
12373f012e29Smrg					    &ib_result_mc_address[i], &va_handle[i]);
12383f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12393f012e29Smrg
12403f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
12413f012e29Smrg				       NULL, &bo_list[i]);
12423f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12433f012e29Smrg	}
12443f012e29Smrg
12453f012e29Smrg	/* 1. same context different engine */
12463f012e29Smrg	ptr = ib_result_cpu[0];
1247d8807b2fSmrg	ptr[0] = sdma_nop;
12483f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
12493f012e29Smrg	ib_info[0].size = 1;
12503f012e29Smrg
12513f012e29Smrg	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
12523f012e29Smrg	ibs_request[0].number_of_ibs = 1;
12533f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
12543f012e29Smrg	ibs_request[0].resources = bo_list[0];
12553f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
12563f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
12573f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12583f012e29Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
12593f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12603f012e29Smrg
12614babd585Smrg	r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
12623f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12633f012e29Smrg	ptr = ib_result_cpu[1];
1264d8807b2fSmrg	ptr[0] = gfx_nop;
12653f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
12663f012e29Smrg	ib_info[1].size = 1;
12673f012e29Smrg
12684babd585Smrg	ibs_request[1].ip_type = gc_ip_type;
12693f012e29Smrg	ibs_request[1].number_of_ibs = 1;
12703f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
12713f012e29Smrg	ibs_request[1].resources = bo_list[1];
12723f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
12733f012e29Smrg
12743f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
12753f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12763f012e29Smrg
12773f012e29Smrg	fence_status.context = context_handle[0];
12784babd585Smrg	fence_status.ip_type = gc_ip_type;
12793f012e29Smrg	fence_status.ip_instance = 0;
12803f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
12813f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
12823f012e29Smrg					 500000000, 0, &expired);
12833f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12843f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
12853f012e29Smrg
12863f012e29Smrg	/* 2. same engine different context */
12873f012e29Smrg	ptr = ib_result_cpu[0];
1288d8807b2fSmrg	ptr[0] = gfx_nop;
12893f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
12903f012e29Smrg	ib_info[0].size = 1;
12913f012e29Smrg
12924babd585Smrg	ibs_request[0].ip_type = gc_ip_type;
12933f012e29Smrg	ibs_request[0].number_of_ibs = 1;
12943f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
12953f012e29Smrg	ibs_request[0].resources = bo_list[0];
12963f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
12973f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
12983f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12994babd585Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
13003f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13013f012e29Smrg
13024babd585Smrg	r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
13033f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13043f012e29Smrg	ptr = ib_result_cpu[1];
1305d8807b2fSmrg	ptr[0] = gfx_nop;
13063f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
13073f012e29Smrg	ib_info[1].size = 1;
13083f012e29Smrg
13094babd585Smrg	ibs_request[1].ip_type = gc_ip_type;
13103f012e29Smrg	ibs_request[1].number_of_ibs = 1;
13113f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
13123f012e29Smrg	ibs_request[1].resources = bo_list[1];
13133f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
13143f012e29Smrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
13153f012e29Smrg
13163f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13173f012e29Smrg
13183f012e29Smrg	fence_status.context = context_handle[1];
13194babd585Smrg	fence_status.ip_type = gc_ip_type;
13203f012e29Smrg	fence_status.ip_instance = 0;
13213f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
13223f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
13233f012e29Smrg					 500000000, 0, &expired);
13243f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13253f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
1326d8807b2fSmrg
13273f012e29Smrg	for (i = 0; i < 2; i++) {
13283f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
13293f012e29Smrg					     ib_result_mc_address[i], 4096);
13303f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
13313f012e29Smrg
13323f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list[i]);
13333f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
13343f012e29Smrg
13353f012e29Smrg		r = amdgpu_cs_ctx_free(context_handle[i]);
13363f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
13373f012e29Smrg	}
13383f012e29Smrg
13393f012e29Smrg	r = amdgpu_cs_destroy_semaphore(sem);
13403f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13413f012e29Smrg}
13423f012e29Smrg
13433f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void)
13443f012e29Smrg{
13453f012e29Smrg	amdgpu_context_handle context_handle;
13463f012e29Smrg	amdgpu_bo_handle ib_result_handle;
13473f012e29Smrg	void *ib_result_cpu;
13483f012e29Smrg	uint64_t ib_result_mc_address;
13493f012e29Smrg	struct amdgpu_cs_request ibs_request;
13503f012e29Smrg	struct amdgpu_cs_ib_info ib_info;
13513f012e29Smrg	struct amdgpu_cs_fence fence_status;
13523f012e29Smrg	uint32_t *ptr;
13533f012e29Smrg	uint32_t expired;
135400a23bdaSmrg	int r, instance;
13553f012e29Smrg	amdgpu_bo_list_handle bo_list;
13563f012e29Smrg	amdgpu_va_handle va_handle;
1357d8807b2fSmrg	struct drm_amdgpu_info_hw_ip info;
1358d8807b2fSmrg
1359d8807b2fSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1360d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
13613f012e29Smrg
13623f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
13633f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13643f012e29Smrg
1365d8807b2fSmrg	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
13663f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
13673f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
13683f012e29Smrg					    &ib_result_handle, &ib_result_cpu,
13693f012e29Smrg					    &ib_result_mc_address, &va_handle);
13703f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
13713f012e29Smrg
13723f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
13733f012e29Smrg				       &bo_list);
13743f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
13753f012e29Smrg
13763f012e29Smrg		ptr = ib_result_cpu;
1377d8807b2fSmrg		memset(ptr, 0, 16);
1378d8807b2fSmrg		ptr[0]=PACKET3(PACKET3_NOP, 14);
13793f012e29Smrg
13803f012e29Smrg		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
13813f012e29Smrg		ib_info.ib_mc_address = ib_result_mc_address;
13823f012e29Smrg		ib_info.size = 16;
13833f012e29Smrg
13843f012e29Smrg		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
13853f012e29Smrg		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
13863f012e29Smrg		ibs_request.ring = instance;
13873f012e29Smrg		ibs_request.number_of_ibs = 1;
13883f012e29Smrg		ibs_request.ibs = &ib_info;
13893f012e29Smrg		ibs_request.resources = bo_list;
13903f012e29Smrg		ibs_request.fence_info.handle = NULL;
13913f012e29Smrg
13923f012e29Smrg		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
13933f012e29Smrg		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
13943f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
13953f012e29Smrg
13963f012e29Smrg		fence_status.context = context_handle;
13973f012e29Smrg		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
13983f012e29Smrg		fence_status.ip_instance = 0;
13993f012e29Smrg		fence_status.ring = instance;
14003f012e29Smrg		fence_status.fence = ibs_request.seq_no;
14013f012e29Smrg
14023f012e29Smrg		r = amdgpu_cs_query_fence_status(&fence_status,
14033f012e29Smrg						 AMDGPU_TIMEOUT_INFINITE,
14043f012e29Smrg						 0, &expired);
14053f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
14063f012e29Smrg
14073f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list);
14083f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
14093f012e29Smrg
14103f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
14113f012e29Smrg					     ib_result_mc_address, 4096);
14123f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
14133f012e29Smrg	}
14143f012e29Smrg
14153f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
14163f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
14173f012e29Smrg}
14183f012e29Smrg
14193f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void)
14203f012e29Smrg{
14213f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
14223f012e29Smrg}
14233f012e29Smrg
14243f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void)
14253f012e29Smrg{
14263f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
14273f012e29Smrg}
14283f012e29Smrg
14293f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void)
14303f012e29Smrg{
14313f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
14323f012e29Smrg}
14333f012e29Smrg
14343f012e29Smrgstatic void amdgpu_command_submission_compute(void)
14353f012e29Smrg{
14363f012e29Smrg	/* write data using the CP */
14373f012e29Smrg	amdgpu_command_submission_compute_cp_write_data();
14383f012e29Smrg	/* const fill using the CP */
14393f012e29Smrg	amdgpu_command_submission_compute_cp_const_fill();
14403f012e29Smrg	/* copy data using the CP */
14413f012e29Smrg	amdgpu_command_submission_compute_cp_copy_data();
14423f012e29Smrg	/* nop test */
14433f012e29Smrg	amdgpu_command_submission_compute_nop();
14443f012e29Smrg}
14453f012e29Smrg
14463f012e29Smrg/*
14473f012e29Smrg * caller need create/release:
14483f012e29Smrg * pm4_src, resources, ib_info, and ibs_request
14493f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished
14503f012e29Smrg */
145141687f09Smrgvoid
145241687f09Smrgamdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
145341687f09Smrg			       amdgpu_context_handle context_handle,
145441687f09Smrg			       unsigned ip_type, int instance, int pm4_dw,
145541687f09Smrg			       uint32_t *pm4_src, int res_cnt,
145641687f09Smrg			       amdgpu_bo_handle *resources,
145741687f09Smrg			       struct amdgpu_cs_ib_info *ib_info,
145841687f09Smrg			       struct amdgpu_cs_request *ibs_request,
145941687f09Smrg			       bool secure)
14603f012e29Smrg{
14613f012e29Smrg	int r;
14623f012e29Smrg	uint32_t expired;
14633f012e29Smrg	uint32_t *ring_ptr;
14643f012e29Smrg	amdgpu_bo_handle ib_result_handle;
14653f012e29Smrg	void *ib_result_cpu;
14663f012e29Smrg	uint64_t ib_result_mc_address;
14673f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
14683f012e29Smrg	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
14693f012e29Smrg	amdgpu_va_handle va_handle;
14703f012e29Smrg
14713f012e29Smrg	/* prepare CS */
14723f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
14733f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
14743f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
14753f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
14763f012e29Smrg	CU_ASSERT_TRUE(pm4_dw <= 1024);
14773f012e29Smrg
14783f012e29Smrg	/* allocate IB */
14793f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
14803f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
14813f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
14823f012e29Smrg				    &ib_result_mc_address, &va_handle);
14833f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
14843f012e29Smrg
14853f012e29Smrg	/* copy PM4 packet to ring from caller */
14863f012e29Smrg	ring_ptr = ib_result_cpu;
14873f012e29Smrg	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
14883f012e29Smrg
14893f012e29Smrg	ib_info->ib_mc_address = ib_result_mc_address;
14903f012e29Smrg	ib_info->size = pm4_dw;
149141687f09Smrg	if (secure)
149241687f09Smrg		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
14933f012e29Smrg
14943f012e29Smrg	ibs_request->ip_type = ip_type;
14953f012e29Smrg	ibs_request->ring = instance;
14963f012e29Smrg	ibs_request->number_of_ibs = 1;
14973f012e29Smrg	ibs_request->ibs = ib_info;
14983f012e29Smrg	ibs_request->fence_info.handle = NULL;
14993f012e29Smrg
15003f012e29Smrg	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
15013f012e29Smrg	all_res[res_cnt] = ib_result_handle;
15023f012e29Smrg
15033f012e29Smrg	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
15043f012e29Smrg				  NULL, &ibs_request->resources);
15053f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15063f012e29Smrg
15073f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
15083f012e29Smrg
15093f012e29Smrg	/* submit CS */
15103f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
15113f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15123f012e29Smrg
15133f012e29Smrg	r = amdgpu_bo_list_destroy(ibs_request->resources);
15143f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15153f012e29Smrg
15163f012e29Smrg	fence_status.ip_type = ip_type;
15173f012e29Smrg	fence_status.ip_instance = 0;
15183f012e29Smrg	fence_status.ring = ibs_request->ring;
15193f012e29Smrg	fence_status.context = context_handle;
15203f012e29Smrg	fence_status.fence = ibs_request->seq_no;
15213f012e29Smrg
15223f012e29Smrg	/* wait for IB accomplished */
15233f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
15243f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
15253f012e29Smrg					 0, &expired);
15263f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15273f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
15283f012e29Smrg
15293f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
15303f012e29Smrg				     ib_result_mc_address, 4096);
15313f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15323f012e29Smrg}
15333f012e29Smrg
153441687f09Smrgstatic void
153541687f09Smrgamdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
153641687f09Smrg			   unsigned ip_type, int instance, int pm4_dw,
153741687f09Smrg			   uint32_t *pm4_src, int res_cnt,
153841687f09Smrg			   amdgpu_bo_handle *resources,
153941687f09Smrg			   struct amdgpu_cs_ib_info *ib_info,
154041687f09Smrg			   struct amdgpu_cs_request *ibs_request)
154141687f09Smrg{
154241687f09Smrg	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
154341687f09Smrg				       ip_type, instance, pm4_dw, pm4_src,
154441687f09Smrg				       res_cnt, resources, ib_info,
154541687f09Smrg				       ibs_request, false);
154641687f09Smrg}
154741687f09Smrg
154841687f09Smrgvoid
154941687f09Smrgamdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
155041687f09Smrg							  device, unsigned
155141687f09Smrg							  ip_type, bool secure)
15523f012e29Smrg{
15533f012e29Smrg	const int sdma_write_length = 128;
15543f012e29Smrg	const int pm4_dw = 256;
15553f012e29Smrg	amdgpu_context_handle context_handle;
15563f012e29Smrg	amdgpu_bo_handle bo;
15573f012e29Smrg	amdgpu_bo_handle *resources;
15583f012e29Smrg	uint32_t *pm4;
15593f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
15603f012e29Smrg	struct amdgpu_cs_request *ibs_request;
15613f012e29Smrg	uint64_t bo_mc;
15623f012e29Smrg	volatile uint32_t *bo_cpu;
156341687f09Smrg	uint32_t bo_cpu_origin;
156400a23bdaSmrg	int i, j, r, loop, ring_id;
15653f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
15663f012e29Smrg	amdgpu_va_handle va_handle;
156700a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
15683f012e29Smrg
15693f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
15703f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
15713f012e29Smrg
15723f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
15733f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
15743f012e29Smrg
15753f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
15763f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
15773f012e29Smrg
157841687f09Smrg	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
157900a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
158000a23bdaSmrg
158141687f09Smrg	for (i = 0; secure && (i < 2); i++)
158241687f09Smrg		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
158341687f09Smrg
158441687f09Smrg	r = amdgpu_cs_ctx_create(device, &context_handle);
158541687f09Smrg
15863f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15873f012e29Smrg
15883f012e29Smrg	/* prepare resource */
15893f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
15903f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
15913f012e29Smrg
159200a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
159300a23bdaSmrg		loop = 0;
159400a23bdaSmrg		while(loop < 2) {
159500a23bdaSmrg			/* allocate UC bo for sDMA use */
159641687f09Smrg			r = amdgpu_bo_alloc_and_map(device,
159700a23bdaSmrg						    sdma_write_length * sizeof(uint32_t),
159800a23bdaSmrg						    4096, AMDGPU_GEM_DOMAIN_GTT,
159900a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
160000a23bdaSmrg						    &bo_mc, &va_handle);
160100a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
16023f012e29Smrg
160300a23bdaSmrg			/* clear bo */
160400a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
16053f012e29Smrg
160600a23bdaSmrg			resources[0] = bo;
16073f012e29Smrg
160800a23bdaSmrg			/* fulfill PM4: test DMA write-linear */
160900a23bdaSmrg			i = j = 0;
161000a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
161100a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI)
161200a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
161300a23bdaSmrg								  sdma_write_length);
161400a23bdaSmrg				else
161500a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
161641687f09Smrg							       SDMA_WRITE_SUB_OPCODE_LINEAR,
161741687f09Smrg							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
161841687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
161900a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
162000a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
162100a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
162200a23bdaSmrg				else if (family_id != AMDGPU_FAMILY_SI)
162300a23bdaSmrg					pm4[i++] = sdma_write_length;
162400a23bdaSmrg				while(j++ < sdma_write_length)
162500a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
162600a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
162700a23bdaSmrg				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
162800a23bdaSmrg				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
162900a23bdaSmrg				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
163000a23bdaSmrg				pm4[i++] = 0xfffffffc & bo_mc;
163100a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
163200a23bdaSmrg				while(j++ < sdma_write_length)
163300a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
163400a23bdaSmrg			}
16353f012e29Smrg
163641687f09Smrg			amdgpu_test_exec_cs_helper_raw(device, context_handle,
163741687f09Smrg						       ip_type, ring_id, i, pm4,
163841687f09Smrg						       1, resources, ib_info,
163941687f09Smrg						       ibs_request, secure);
16403f012e29Smrg
164100a23bdaSmrg			/* verify if SDMA test result meets with expected */
164200a23bdaSmrg			i = 0;
164341687f09Smrg			if (!secure) {
164441687f09Smrg				while(i < sdma_write_length) {
164541687f09Smrg					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
164641687f09Smrg				}
164741687f09Smrg			} else if (ip_type == AMDGPU_HW_IP_GFX) {
164841687f09Smrg				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
164941687f09Smrg				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
165041687f09Smrg				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
165141687f09Smrg				 * command, 1-loop_until_compare_satisfied.
165241687f09Smrg				 * single_pass_atomic, 0-lru
165341687f09Smrg				 * engine_sel, 0-micro_engine
165441687f09Smrg				 */
165541687f09Smrg				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
165641687f09Smrg							ATOMIC_MEM_COMMAND(1) |
165741687f09Smrg							ATOMIC_MEM_CACHEPOLICAY(0) |
165841687f09Smrg							ATOMIC_MEM_ENGINESEL(0));
165941687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
166041687f09Smrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
166141687f09Smrg				pm4[i++] = 0x12345678;
166241687f09Smrg				pm4[i++] = 0x0;
166341687f09Smrg				pm4[i++] = 0xdeadbeaf;
166441687f09Smrg				pm4[i++] = 0x0;
166541687f09Smrg				pm4[i++] = 0x100;
166641687f09Smrg				amdgpu_test_exec_cs_helper_raw(device, context_handle,
166741687f09Smrg							ip_type, ring_id, i, pm4,
166841687f09Smrg							1, resources, ib_info,
166941687f09Smrg							ibs_request, true);
167041687f09Smrg			} else if (ip_type == AMDGPU_HW_IP_DMA) {
167141687f09Smrg				/* restore the bo_cpu to compare */
167241687f09Smrg				bo_cpu_origin = bo_cpu[0];
167341687f09Smrg				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
167441687f09Smrg				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
167541687f09Smrg				 * loop, 1-loop_until_compare_satisfied.
167641687f09Smrg				 * single_pass_atomic, 0-lru
167741687f09Smrg				 */
167841687f09Smrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
167941687f09Smrg							       0,
168041687f09Smrg							       SDMA_ATOMIC_LOOP(1) |
168141687f09Smrg							       SDMA_ATOMIC_TMZ(1) |
168241687f09Smrg							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
168341687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
168441687f09Smrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
168541687f09Smrg				pm4[i++] = 0x12345678;
168641687f09Smrg				pm4[i++] = 0x0;
168741687f09Smrg				pm4[i++] = 0xdeadbeaf;
168841687f09Smrg				pm4[i++] = 0x0;
168941687f09Smrg				pm4[i++] = 0x100;
169041687f09Smrg				amdgpu_test_exec_cs_helper_raw(device, context_handle,
169141687f09Smrg							ip_type, ring_id, i, pm4,
169241687f09Smrg							1, resources, ib_info,
169341687f09Smrg							ibs_request, true);
169441687f09Smrg				/* DMA's atomic behavir is unlike GFX
169541687f09Smrg				 * If the comparing data is not equal to destination data,
169641687f09Smrg				 * For GFX, loop again till gfx timeout(system hang).
169741687f09Smrg				 * For DMA, loop again till timer expired and then send interrupt.
169841687f09Smrg				 * So testcase can't use interrupt mechanism.
169941687f09Smrg				 * We take another way to verify. When the comparing data is not
170041687f09Smrg				 * equal to destination data, overwrite the source data to the destination
170141687f09Smrg				 * buffer. Otherwise, original destination data unchanged.
170241687f09Smrg				 * So if the bo_cpu data is overwritten, the result is passed.
170341687f09Smrg				 */
170441687f09Smrg				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
170541687f09Smrg
170641687f09Smrg				/* compare again for the case of dest_data != cmp_data */
170741687f09Smrg				i = 0;
170841687f09Smrg				/* restore again, here dest_data should be */
170941687f09Smrg				bo_cpu_origin = bo_cpu[0];
171041687f09Smrg				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
171141687f09Smrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
171241687f09Smrg							       0,
171341687f09Smrg							       SDMA_ATOMIC_LOOP(1) |
171441687f09Smrg							       SDMA_ATOMIC_TMZ(1) |
171541687f09Smrg							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
171641687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
171741687f09Smrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
171841687f09Smrg				pm4[i++] = 0x87654321;
171941687f09Smrg				pm4[i++] = 0x0;
172041687f09Smrg				pm4[i++] = 0xdeadbeaf;
172141687f09Smrg				pm4[i++] = 0x0;
172241687f09Smrg				pm4[i++] = 0x100;
172341687f09Smrg				amdgpu_test_exec_cs_helper_raw(device, context_handle,
172441687f09Smrg							ip_type, ring_id, i, pm4,
172541687f09Smrg							1, resources, ib_info,
172641687f09Smrg							ibs_request, true);
172741687f09Smrg				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
172841687f09Smrg				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
172900a23bdaSmrg			}
17303f012e29Smrg
173100a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
173200a23bdaSmrg						     sdma_write_length * sizeof(uint32_t));
173300a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
173400a23bdaSmrg			loop++;
17353f012e29Smrg		}
17363f012e29Smrg	}
17373f012e29Smrg	/* clean resources */
17383f012e29Smrg	free(resources);
17393f012e29Smrg	free(ibs_request);
17403f012e29Smrg	free(ib_info);
17413f012e29Smrg	free(pm4);
17423f012e29Smrg
17433f012e29Smrg	/* end of test */
17443f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
17453f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17463f012e29Smrg}
17473f012e29Smrg
174841687f09Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
174941687f09Smrg{
175041687f09Smrg	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
175141687f09Smrg								  ip_type,
175241687f09Smrg								  false);
175341687f09Smrg}
175441687f09Smrg
17553f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void)
17563f012e29Smrg{
17573f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
17583f012e29Smrg}
17593f012e29Smrg
17603f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
17613f012e29Smrg{
17623f012e29Smrg	const int sdma_write_length = 1024 * 1024;
17633f012e29Smrg	const int pm4_dw = 256;
17643f012e29Smrg	amdgpu_context_handle context_handle;
17653f012e29Smrg	amdgpu_bo_handle bo;
17663f012e29Smrg	amdgpu_bo_handle *resources;
17673f012e29Smrg	uint32_t *pm4;
17683f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
17693f012e29Smrg	struct amdgpu_cs_request *ibs_request;
17703f012e29Smrg	uint64_t bo_mc;
17713f012e29Smrg	volatile uint32_t *bo_cpu;
177200a23bdaSmrg	int i, j, r, loop, ring_id;
17733f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
17743f012e29Smrg	amdgpu_va_handle va_handle;
177500a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
17763f012e29Smrg
17773f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
17783f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
17793f012e29Smrg
17803f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
17813f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
17823f012e29Smrg
17833f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
17843f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
17853f012e29Smrg
178600a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
178700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
178800a23bdaSmrg
17893f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
17903f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17913f012e29Smrg
17923f012e29Smrg	/* prepare resource */
17933f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
17943f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
17953f012e29Smrg
179600a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
179700a23bdaSmrg		loop = 0;
179800a23bdaSmrg		while(loop < 2) {
179900a23bdaSmrg			/* allocate UC bo for sDMA use */
180000a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
180100a23bdaSmrg						    sdma_write_length, 4096,
180200a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
180300a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
180400a23bdaSmrg						    &bo_mc, &va_handle);
180500a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
18063f012e29Smrg
180700a23bdaSmrg			/* clear bo */
180800a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length);
18093f012e29Smrg
181000a23bdaSmrg			resources[0] = bo;
18113f012e29Smrg
181200a23bdaSmrg			/* fulfill PM4: test DMA const fill */
181300a23bdaSmrg			i = j = 0;
181400a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
181500a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
181600a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
181700a23bdaSmrg								  0, 0, 0,
181800a23bdaSmrg								  sdma_write_length / 4);
181900a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
182000a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
182100a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
182200a23bdaSmrg				} else {
182300a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
182400a23bdaSmrg							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
182500a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
182600a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
182700a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
182800a23bdaSmrg					if (family_id >= AMDGPU_FAMILY_AI)
182900a23bdaSmrg						pm4[i++] = sdma_write_length - 1;
183000a23bdaSmrg					else
183100a23bdaSmrg						pm4[i++] = sdma_write_length;
183200a23bdaSmrg				}
183300a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
183400a23bdaSmrg				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
183500a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
183600a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
183700a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
183800a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
183900a23bdaSmrg						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
184000a23bdaSmrg						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
184100a23bdaSmrg						   PACKET3_DMA_DATA_SI_CP_SYNC;
184200a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
184300a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1844d8807b2fSmrg					pm4[i++] = sdma_write_length;
184500a23bdaSmrg				} else {
184600a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
184700a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
184800a23bdaSmrg						   PACKET3_DMA_DATA_DST_SEL(0) |
184900a23bdaSmrg						   PACKET3_DMA_DATA_SRC_SEL(2) |
185000a23bdaSmrg						   PACKET3_DMA_DATA_CP_SYNC;
185100a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
185200a23bdaSmrg					pm4[i++] = 0;
185300a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
185400a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
185500a23bdaSmrg					pm4[i++] = sdma_write_length;
185600a23bdaSmrg				}
1857d8807b2fSmrg			}
18583f012e29Smrg
185900a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
186000a23bdaSmrg						   ip_type, ring_id,
186100a23bdaSmrg						   i, pm4,
186200a23bdaSmrg						   1, resources,
186300a23bdaSmrg						   ib_info, ibs_request);
18643f012e29Smrg
186500a23bdaSmrg			/* verify if SDMA test result meets with expected */
186600a23bdaSmrg			i = 0;
186700a23bdaSmrg			while(i < (sdma_write_length / 4)) {
186800a23bdaSmrg				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
186900a23bdaSmrg			}
18703f012e29Smrg
187100a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
187200a23bdaSmrg						     sdma_write_length);
187300a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
187400a23bdaSmrg			loop++;
187500a23bdaSmrg		}
18763f012e29Smrg	}
18773f012e29Smrg	/* clean resources */
18783f012e29Smrg	free(resources);
18793f012e29Smrg	free(ibs_request);
18803f012e29Smrg	free(ib_info);
18813f012e29Smrg	free(pm4);
18823f012e29Smrg
18833f012e29Smrg	/* end of test */
18843f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
18853f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18863f012e29Smrg}
18873f012e29Smrg
18883f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void)
18893f012e29Smrg{
18903f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
18913f012e29Smrg}
18923f012e29Smrg
18933f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
18943f012e29Smrg{
18953f012e29Smrg	const int sdma_write_length = 1024;
18963f012e29Smrg	const int pm4_dw = 256;
18973f012e29Smrg	amdgpu_context_handle context_handle;
18983f012e29Smrg	amdgpu_bo_handle bo1, bo2;
18993f012e29Smrg	amdgpu_bo_handle *resources;
19003f012e29Smrg	uint32_t *pm4;
19013f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
19023f012e29Smrg	struct amdgpu_cs_request *ibs_request;
19033f012e29Smrg	uint64_t bo1_mc, bo2_mc;
19043f012e29Smrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
190500a23bdaSmrg	int i, j, r, loop1, loop2, ring_id;
19063f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
19073f012e29Smrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
190800a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
19093f012e29Smrg
19103f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
19113f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
19123f012e29Smrg
19133f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
19143f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
19153f012e29Smrg
19163f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
19173f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
19183f012e29Smrg
191900a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
192000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
192100a23bdaSmrg
19223f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
19233f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
19243f012e29Smrg
19253f012e29Smrg	/* prepare resource */
19263f012e29Smrg	resources = calloc(2, sizeof(amdgpu_bo_handle));
19273f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
19283f012e29Smrg
192900a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
193000a23bdaSmrg		loop1 = loop2 = 0;
193100a23bdaSmrg		/* run 9 circle to test all mapping combination */
193200a23bdaSmrg		while(loop1 < 2) {
193300a23bdaSmrg			while(loop2 < 2) {
193400a23bdaSmrg				/* allocate UC bo1for sDMA use */
193500a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
193600a23bdaSmrg							    sdma_write_length, 4096,
193700a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
193800a23bdaSmrg							    gtt_flags[loop1], &bo1,
193900a23bdaSmrg							    (void**)&bo1_cpu, &bo1_mc,
194000a23bdaSmrg							    &bo1_va_handle);
194100a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
194200a23bdaSmrg
194300a23bdaSmrg				/* set bo1 */
194400a23bdaSmrg				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
194500a23bdaSmrg
194600a23bdaSmrg				/* allocate UC bo2 for sDMA use */
194700a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
194800a23bdaSmrg							    sdma_write_length, 4096,
194900a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
195000a23bdaSmrg							    gtt_flags[loop2], &bo2,
195100a23bdaSmrg							    (void**)&bo2_cpu, &bo2_mc,
195200a23bdaSmrg							    &bo2_va_handle);
195300a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
195400a23bdaSmrg
195500a23bdaSmrg				/* clear bo2 */
195600a23bdaSmrg				memset((void*)bo2_cpu, 0, sdma_write_length);
195700a23bdaSmrg
195800a23bdaSmrg				resources[0] = bo1;
195900a23bdaSmrg				resources[1] = bo2;
196000a23bdaSmrg
196100a23bdaSmrg				/* fulfill PM4: test DMA copy linear */
196200a23bdaSmrg				i = j = 0;
196300a23bdaSmrg				if (ip_type == AMDGPU_HW_IP_DMA) {
196400a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
196500a23bdaSmrg						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
196600a23bdaSmrg									  0, 0, 0,
196700a23bdaSmrg									  sdma_write_length);
196800a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
196900a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
197000a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
197100a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
197200a23bdaSmrg					} else {
197300a23bdaSmrg						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
197400a23bdaSmrg								       SDMA_COPY_SUB_OPCODE_LINEAR,
197500a23bdaSmrg								       0);
197600a23bdaSmrg						if (family_id >= AMDGPU_FAMILY_AI)
197700a23bdaSmrg							pm4[i++] = sdma_write_length - 1;
197800a23bdaSmrg						else
197900a23bdaSmrg							pm4[i++] = sdma_write_length;
198000a23bdaSmrg						pm4[i++] = 0;
198100a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
198200a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
198300a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
198400a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
198500a23bdaSmrg					}
198600a23bdaSmrg				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
198700a23bdaSmrg					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
198800a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
198900a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
199000a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
199100a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
199200a23bdaSmrg							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
199300a23bdaSmrg							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
199400a23bdaSmrg							   PACKET3_DMA_DATA_SI_CP_SYNC |
199500a23bdaSmrg							   (0xffff00000000 & bo1_mc) >> 32;
199600a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
199700a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1998d8807b2fSmrg						pm4[i++] = sdma_write_length;
199900a23bdaSmrg					} else {
200000a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
200100a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
200200a23bdaSmrg							   PACKET3_DMA_DATA_DST_SEL(0) |
200300a23bdaSmrg							   PACKET3_DMA_DATA_SRC_SEL(0) |
200400a23bdaSmrg							   PACKET3_DMA_DATA_CP_SYNC;
200500a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
200600a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
200700a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
200800a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
200900a23bdaSmrg						pm4[i++] = sdma_write_length;
201000a23bdaSmrg					}
2011d8807b2fSmrg				}
20123f012e29Smrg
201300a23bdaSmrg				amdgpu_test_exec_cs_helper(context_handle,
201400a23bdaSmrg							   ip_type, ring_id,
201500a23bdaSmrg							   i, pm4,
201600a23bdaSmrg							   2, resources,
201700a23bdaSmrg							   ib_info, ibs_request);
20183f012e29Smrg
201900a23bdaSmrg				/* verify if SDMA test result meets with expected */
202000a23bdaSmrg				i = 0;
202100a23bdaSmrg				while(i < sdma_write_length) {
202200a23bdaSmrg					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
202300a23bdaSmrg				}
202400a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
202500a23bdaSmrg							     sdma_write_length);
202600a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
202700a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
202800a23bdaSmrg							     sdma_write_length);
202900a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
203000a23bdaSmrg				loop2++;
20313f012e29Smrg			}
203200a23bdaSmrg			loop1++;
20333f012e29Smrg		}
20343f012e29Smrg	}
20353f012e29Smrg	/* clean resources */
20363f012e29Smrg	free(resources);
20373f012e29Smrg	free(ibs_request);
20383f012e29Smrg	free(ib_info);
20393f012e29Smrg	free(pm4);
20403f012e29Smrg
20413f012e29Smrg	/* end of test */
20423f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
20433f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
20443f012e29Smrg}
20453f012e29Smrg
20463f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void)
20473f012e29Smrg{
20483f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
20493f012e29Smrg}
20503f012e29Smrg
20513f012e29Smrgstatic void amdgpu_command_submission_sdma(void)
20523f012e29Smrg{
20533f012e29Smrg	amdgpu_command_submission_sdma_write_linear();
20543f012e29Smrg	amdgpu_command_submission_sdma_const_fill();
20553f012e29Smrg	amdgpu_command_submission_sdma_copy_linear();
20563f012e29Smrg}
20573f012e29Smrg
2058d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
2059d8807b2fSmrg{
2060d8807b2fSmrg	amdgpu_context_handle context_handle;
2061d8807b2fSmrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
2062d8807b2fSmrg	void *ib_result_cpu, *ib_result_ce_cpu;
2063d8807b2fSmrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
2064d8807b2fSmrg	struct amdgpu_cs_request ibs_request[2] = {0};
2065d8807b2fSmrg	struct amdgpu_cs_ib_info ib_info[2];
2066d8807b2fSmrg	struct amdgpu_cs_fence fence_status[2] = {0};
2067d8807b2fSmrg	uint32_t *ptr;
2068d8807b2fSmrg	uint32_t expired;
2069d8807b2fSmrg	amdgpu_bo_list_handle bo_list;
2070d8807b2fSmrg	amdgpu_va_handle va_handle, va_handle_ce;
2071d8807b2fSmrg	int r;
2072d8807b2fSmrg	int i = 0, ib_cs_num = 2;
2073d8807b2fSmrg
2074d8807b2fSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2075d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2076d8807b2fSmrg
2077d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2078d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
2079d8807b2fSmrg				    &ib_result_handle, &ib_result_cpu,
2080d8807b2fSmrg				    &ib_result_mc_address, &va_handle);
2081d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2082d8807b2fSmrg
2083d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2084d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
2085d8807b2fSmrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
2086d8807b2fSmrg				    &ib_result_ce_mc_address, &va_handle_ce);
2087d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2088d8807b2fSmrg
2089d8807b2fSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
2090d8807b2fSmrg			       ib_result_ce_handle, &bo_list);
2091d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2092d8807b2fSmrg
2093d8807b2fSmrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
2094d8807b2fSmrg
2095d8807b2fSmrg	/* IT_SET_CE_DE_COUNTERS */
2096d8807b2fSmrg	ptr = ib_result_ce_cpu;
2097d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
2098d8807b2fSmrg		ptr[i++] = 0xc0008900;
2099d8807b2fSmrg		ptr[i++] = 0;
2100d8807b2fSmrg	}
2101d8807b2fSmrg	ptr[i++] = 0xc0008400;
2102d8807b2fSmrg	ptr[i++] = 1;
2103d8807b2fSmrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
2104d8807b2fSmrg	ib_info[0].size = i;
2105d8807b2fSmrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
2106d8807b2fSmrg
2107d8807b2fSmrg	/* IT_WAIT_ON_CE_COUNTER */
2108d8807b2fSmrg	ptr = ib_result_cpu;
2109d8807b2fSmrg	ptr[0] = 0xc0008600;
2110d8807b2fSmrg	ptr[1] = 0x00000001;
2111d8807b2fSmrg	ib_info[1].ib_mc_address = ib_result_mc_address;
2112d8807b2fSmrg	ib_info[1].size = 2;
2113d8807b2fSmrg
2114d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
2115d8807b2fSmrg		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
2116d8807b2fSmrg		ibs_request[i].number_of_ibs = 2;
2117d8807b2fSmrg		ibs_request[i].ibs = ib_info;
2118d8807b2fSmrg		ibs_request[i].resources = bo_list;
2119d8807b2fSmrg		ibs_request[i].fence_info.handle = NULL;
2120d8807b2fSmrg	}
2121d8807b2fSmrg
2122d8807b2fSmrg	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
2123d8807b2fSmrg
2124d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2125d8807b2fSmrg
2126d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
2127d8807b2fSmrg		fence_status[i].context = context_handle;
2128d8807b2fSmrg		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
2129d8807b2fSmrg		fence_status[i].fence = ibs_request[i].seq_no;
2130d8807b2fSmrg	}
2131d8807b2fSmrg
2132d8807b2fSmrg	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
2133d8807b2fSmrg				AMDGPU_TIMEOUT_INFINITE,
2134d8807b2fSmrg				&expired, NULL);
2135d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2136d8807b2fSmrg
2137d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2138d8807b2fSmrg				     ib_result_mc_address, 4096);
2139d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2140d8807b2fSmrg
2141d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2142d8807b2fSmrg				     ib_result_ce_mc_address, 4096);
2143d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2144d8807b2fSmrg
2145d8807b2fSmrg	r = amdgpu_bo_list_destroy(bo_list);
2146d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2147d8807b2fSmrg
2148d8807b2fSmrg	r = amdgpu_cs_ctx_free(context_handle);
2149d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2150d8807b2fSmrg}
2151d8807b2fSmrg
2152d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void)
2153d8807b2fSmrg{
2154d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(true);
2155d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(false);
2156d8807b2fSmrg}
2157d8807b2fSmrg
21583f012e29Smrgstatic void amdgpu_userptr_test(void)
21593f012e29Smrg{
21603f012e29Smrg	int i, r, j;
21613f012e29Smrg	uint32_t *pm4 = NULL;
21623f012e29Smrg	uint64_t bo_mc;
21633f012e29Smrg	void *ptr = NULL;
21643f012e29Smrg	int pm4_dw = 256;
21653f012e29Smrg	int sdma_write_length = 4;
21663f012e29Smrg	amdgpu_bo_handle handle;
21673f012e29Smrg	amdgpu_context_handle context_handle;
21683f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
21693f012e29Smrg	struct amdgpu_cs_request *ibs_request;
21703f012e29Smrg	amdgpu_bo_handle buf_handle;
21713f012e29Smrg	amdgpu_va_handle va_handle;
21723f012e29Smrg
21733f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
21743f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
21753f012e29Smrg
21763f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
21773f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
21783f012e29Smrg
21793f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
21803f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
21813f012e29Smrg
21823f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
21833f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
21843f012e29Smrg
21853f012e29Smrg	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
21863f012e29Smrg	CU_ASSERT_NOT_EQUAL(ptr, NULL);
21873f012e29Smrg	memset(ptr, 0, BUFFER_SIZE);
21883f012e29Smrg
21893f012e29Smrg	r = amdgpu_create_bo_from_user_mem(device_handle,
21903f012e29Smrg					   ptr, BUFFER_SIZE, &buf_handle);
21913f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
21923f012e29Smrg
21933f012e29Smrg	r = amdgpu_va_range_alloc(device_handle,
21943f012e29Smrg				  amdgpu_gpu_va_range_general,
21953f012e29Smrg				  BUFFER_SIZE, 1, 0, &bo_mc,
21963f012e29Smrg				  &va_handle, 0);
21973f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
21983f012e29Smrg
21993f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
22003f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
22013f012e29Smrg
22023f012e29Smrg	handle = buf_handle;
22033f012e29Smrg
22043f012e29Smrg	j = i = 0;
2205d8807b2fSmrg
2206d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI)
2207d8807b2fSmrg		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2208d8807b2fSmrg				sdma_write_length);
2209d8807b2fSmrg	else
2210d8807b2fSmrg		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2211d8807b2fSmrg				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
22123f012e29Smrg	pm4[i++] = 0xffffffff & bo_mc;
22133f012e29Smrg	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2214d8807b2fSmrg	if (family_id >= AMDGPU_FAMILY_AI)
2215d8807b2fSmrg		pm4[i++] = sdma_write_length - 1;
2216d8807b2fSmrg	else if (family_id != AMDGPU_FAMILY_SI)
2217d8807b2fSmrg		pm4[i++] = sdma_write_length;
22183f012e29Smrg
22193f012e29Smrg	while (j++ < sdma_write_length)
22203f012e29Smrg		pm4[i++] = 0xdeadbeaf;
22213f012e29Smrg
222200a23bdaSmrg	if (!fork()) {
222300a23bdaSmrg		pm4[0] = 0x0;
222400a23bdaSmrg		exit(0);
222500a23bdaSmrg	}
222600a23bdaSmrg
22273f012e29Smrg	amdgpu_test_exec_cs_helper(context_handle,
22283f012e29Smrg				   AMDGPU_HW_IP_DMA, 0,
22293f012e29Smrg				   i, pm4,
22303f012e29Smrg				   1, &handle,
22313f012e29Smrg				   ib_info, ibs_request);
22323f012e29Smrg	i = 0;
22333f012e29Smrg	while (i < sdma_write_length) {
22343f012e29Smrg		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
22353f012e29Smrg	}
22363f012e29Smrg	free(ibs_request);
22373f012e29Smrg	free(ib_info);
22383f012e29Smrg	free(pm4);
22393f012e29Smrg
22403f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
22413f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
22423f012e29Smrg	r = amdgpu_va_range_free(va_handle);
22433f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
22443f012e29Smrg	r = amdgpu_bo_free(buf_handle);
22453f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
22463f012e29Smrg	free(ptr);
22473f012e29Smrg
22483f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
22493f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
225000a23bdaSmrg
225100a23bdaSmrg	wait(NULL);
225200a23bdaSmrg}
225300a23bdaSmrg
225400a23bdaSmrgstatic void amdgpu_sync_dependency_test(void)
225500a23bdaSmrg{
225600a23bdaSmrg	amdgpu_context_handle context_handle[2];
225700a23bdaSmrg	amdgpu_bo_handle ib_result_handle;
225800a23bdaSmrg	void *ib_result_cpu;
225900a23bdaSmrg	uint64_t ib_result_mc_address;
226000a23bdaSmrg	struct amdgpu_cs_request ibs_request;
226100a23bdaSmrg	struct amdgpu_cs_ib_info ib_info;
226200a23bdaSmrg	struct amdgpu_cs_fence fence_status;
226300a23bdaSmrg	uint32_t expired;
226400a23bdaSmrg	int i, j, r;
226500a23bdaSmrg	amdgpu_bo_list_handle bo_list;
226600a23bdaSmrg	amdgpu_va_handle va_handle;
226700a23bdaSmrg	static uint32_t *ptr;
226800a23bdaSmrg	uint64_t seq_no;
226900a23bdaSmrg
227000a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
227100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
227200a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
227300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
227400a23bdaSmrg
227500a23bdaSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
227600a23bdaSmrg			AMDGPU_GEM_DOMAIN_GTT, 0,
227700a23bdaSmrg						    &ib_result_handle, &ib_result_cpu,
227800a23bdaSmrg						    &ib_result_mc_address, &va_handle);
227900a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
228000a23bdaSmrg
228100a23bdaSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
228200a23bdaSmrg			       &bo_list);
228300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
228400a23bdaSmrg
228500a23bdaSmrg	ptr = ib_result_cpu;
228600a23bdaSmrg	i = 0;
228700a23bdaSmrg
228800a23bdaSmrg	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
228900a23bdaSmrg
229000a23bdaSmrg	/* Dispatch minimal init config and verify it's executed */
229100a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
229200a23bdaSmrg	ptr[i++] = 0x80000000;
229300a23bdaSmrg	ptr[i++] = 0x80000000;
229400a23bdaSmrg
229500a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
229600a23bdaSmrg	ptr[i++] = 0x80000000;
229700a23bdaSmrg
229800a23bdaSmrg
229900a23bdaSmrg	/* Program compute regs */
230000a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
230100a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
230200a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
230300a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
230400a23bdaSmrg
230500a23bdaSmrg
230600a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
230700a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
230800a23bdaSmrg	/*
230900a23bdaSmrg	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
231000a23bdaSmrg	                                      SGPRS = 1
231100a23bdaSmrg	                                      PRIORITY = 0
231200a23bdaSmrg	                                      FLOAT_MODE = 192 (0xc0)
231300a23bdaSmrg	                                      PRIV = 0
231400a23bdaSmrg	                                      DX10_CLAMP = 1
231500a23bdaSmrg	                                      DEBUG_MODE = 0
231600a23bdaSmrg	                                      IEEE_MODE = 0
231700a23bdaSmrg	                                      BULKY = 0
231800a23bdaSmrg	                                      CDBG_USER = 0
231900a23bdaSmrg	 *
232000a23bdaSmrg	 */
232100a23bdaSmrg	ptr[i++] = 0x002c0040;
232200a23bdaSmrg
232300a23bdaSmrg
232400a23bdaSmrg	/*
232500a23bdaSmrg	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
232600a23bdaSmrg	                                      USER_SGPR = 8
232700a23bdaSmrg	                                      TRAP_PRESENT = 0
232800a23bdaSmrg	                                      TGID_X_EN = 0
232900a23bdaSmrg	                                      TGID_Y_EN = 0
233000a23bdaSmrg	                                      TGID_Z_EN = 0
233100a23bdaSmrg	                                      TG_SIZE_EN = 0
233200a23bdaSmrg	                                      TIDIG_COMP_CNT = 0
233300a23bdaSmrg	                                      EXCP_EN_MSB = 0
233400a23bdaSmrg	                                      LDS_SIZE = 0
233500a23bdaSmrg	                                      EXCP_EN = 0
233600a23bdaSmrg	 *
233700a23bdaSmrg	 */
233800a23bdaSmrg	ptr[i++] = 0x00000010;
233900a23bdaSmrg
234000a23bdaSmrg
234100a23bdaSmrg/*
234200a23bdaSmrg * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
234300a23bdaSmrg                                         WAVESIZE = 0
234400a23bdaSmrg *
234500a23bdaSmrg */
234600a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
234700a23bdaSmrg	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
234800a23bdaSmrg	ptr[i++] = 0x00000100;
234900a23bdaSmrg
235000a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
235100a23bdaSmrg	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
235200a23bdaSmrg	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
235300a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
235400a23bdaSmrg
235500a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
235600a23bdaSmrg	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
235700a23bdaSmrg	ptr[i++] = 0;
235800a23bdaSmrg
235900a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
236000a23bdaSmrg	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
236100a23bdaSmrg	ptr[i++] = 1;
236200a23bdaSmrg	ptr[i++] = 1;
236300a23bdaSmrg	ptr[i++] = 1;
236400a23bdaSmrg
236500a23bdaSmrg
236600a23bdaSmrg	/* Dispatch */
236700a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
236800a23bdaSmrg	ptr[i++] = 1;
236900a23bdaSmrg	ptr[i++] = 1;
237000a23bdaSmrg	ptr[i++] = 1;
237100a23bdaSmrg	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
237200a23bdaSmrg
237300a23bdaSmrg
237400a23bdaSmrg	while (i & 7)
237500a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
237600a23bdaSmrg
237700a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
237800a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address;
237900a23bdaSmrg	ib_info.size = i;
238000a23bdaSmrg
238100a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
238200a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
238300a23bdaSmrg	ibs_request.ring = 0;
238400a23bdaSmrg	ibs_request.number_of_ibs = 1;
238500a23bdaSmrg	ibs_request.ibs = &ib_info;
238600a23bdaSmrg	ibs_request.resources = bo_list;
238700a23bdaSmrg	ibs_request.fence_info.handle = NULL;
238800a23bdaSmrg
238900a23bdaSmrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
239000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
239100a23bdaSmrg	seq_no = ibs_request.seq_no;
239200a23bdaSmrg
239300a23bdaSmrg
239400a23bdaSmrg
239500a23bdaSmrg	/* Prepare second command with dependency on the first */
239600a23bdaSmrg	j = i;
239700a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
239800a23bdaSmrg	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
239900a23bdaSmrg	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
240000a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
240100a23bdaSmrg	ptr[i++] = 99;
240200a23bdaSmrg
240300a23bdaSmrg	while (i & 7)
240400a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
240500a23bdaSmrg
240600a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
240700a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
240800a23bdaSmrg	ib_info.size = i - j;
240900a23bdaSmrg
241000a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
241100a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
241200a23bdaSmrg	ibs_request.ring = 0;
241300a23bdaSmrg	ibs_request.number_of_ibs = 1;
241400a23bdaSmrg	ibs_request.ibs = &ib_info;
241500a23bdaSmrg	ibs_request.resources = bo_list;
241600a23bdaSmrg	ibs_request.fence_info.handle = NULL;
241700a23bdaSmrg
241800a23bdaSmrg	ibs_request.number_of_dependencies = 1;
241900a23bdaSmrg
242000a23bdaSmrg	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
242100a23bdaSmrg	ibs_request.dependencies[0].context = context_handle[1];
242200a23bdaSmrg	ibs_request.dependencies[0].ip_instance = 0;
242300a23bdaSmrg	ibs_request.dependencies[0].ring = 0;
242400a23bdaSmrg	ibs_request.dependencies[0].fence = seq_no;
242500a23bdaSmrg
242600a23bdaSmrg
242700a23bdaSmrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
242800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
242900a23bdaSmrg
243000a23bdaSmrg
243100a23bdaSmrg	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
243200a23bdaSmrg	fence_status.context = context_handle[0];
243300a23bdaSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
243400a23bdaSmrg	fence_status.ip_instance = 0;
243500a23bdaSmrg	fence_status.ring = 0;
243600a23bdaSmrg	fence_status.fence = ibs_request.seq_no;
243700a23bdaSmrg
243800a23bdaSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
243900a23bdaSmrg		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
244000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
244100a23bdaSmrg
244200a23bdaSmrg	/* Expect the second command to wait for shader to complete */
244300a23bdaSmrg	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
244400a23bdaSmrg
244500a23bdaSmrg	r = amdgpu_bo_list_destroy(bo_list);
244600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
244700a23bdaSmrg
244800a23bdaSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
244900a23bdaSmrg				     ib_result_mc_address, 4096);
245000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
245100a23bdaSmrg
245200a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[0]);
245300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
245400a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[1]);
245500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
245600a23bdaSmrg
245700a23bdaSmrg	free(ibs_request.dependencies);
24583f012e29Smrg}
24595324fb0dSmrg
24609bd392adSmrgstatic int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
24619bd392adSmrg{
24629bd392adSmrg	struct amdgpu_test_shader *shader;
24639bd392adSmrg	int i, loop = 0x10000;
24649bd392adSmrg
24659bd392adSmrg	switch (family) {
24669bd392adSmrg		case AMDGPU_FAMILY_AI:
24679bd392adSmrg			shader = &memcpy_cs_hang_slow_ai;
24689bd392adSmrg			break;
24699bd392adSmrg		case AMDGPU_FAMILY_RV:
24709bd392adSmrg			shader = &memcpy_cs_hang_slow_rv;
24719bd392adSmrg			break;
24720ed5401bSmrg		case AMDGPU_FAMILY_NV:
24730ed5401bSmrg			shader = &memcpy_cs_hang_slow_nv;
24740ed5401bSmrg			break;
24759bd392adSmrg		default:
24769bd392adSmrg			return -1;
24779bd392adSmrg			break;
24789bd392adSmrg	}
24799bd392adSmrg
24809bd392adSmrg	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
24819bd392adSmrg
24829bd392adSmrg	for (i = 0; i < loop; i++)
24839bd392adSmrg		memcpy(ptr + shader->header_length + shader->body_length * i,
24849bd392adSmrg			shader->shader + shader->header_length,
24859bd392adSmrg			shader->body_length * sizeof(uint32_t));
24869bd392adSmrg
24879bd392adSmrg	memcpy(ptr + shader->header_length + shader->body_length * loop,
24889bd392adSmrg		shader->shader + shader->header_length + shader->body_length,
24899bd392adSmrg		shader->foot_length * sizeof(uint32_t));
24909bd392adSmrg
24919bd392adSmrg	return 0;
24929bd392adSmrg}
24939bd392adSmrg
24945324fb0dSmrgstatic int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
24950ed5401bSmrg					   int cs_type,
24960ed5401bSmrg					   uint32_t version)
24975324fb0dSmrg{
24985324fb0dSmrg	uint32_t shader_size;
24995324fb0dSmrg	const uint32_t *shader;
25005324fb0dSmrg
25015324fb0dSmrg	switch (cs_type) {
25025324fb0dSmrg		case CS_BUFFERCLEAR:
25030ed5401bSmrg			if (version == 9) {
25040ed5401bSmrg				shader = bufferclear_cs_shader_gfx9;
25050ed5401bSmrg				shader_size = sizeof(bufferclear_cs_shader_gfx9);
25060ed5401bSmrg			} else if (version == 10) {
25070ed5401bSmrg				shader = bufferclear_cs_shader_gfx10;
25080ed5401bSmrg				shader_size = sizeof(bufferclear_cs_shader_gfx10);
25090ed5401bSmrg			}
25105324fb0dSmrg			break;
25115324fb0dSmrg		case CS_BUFFERCOPY:
25120ed5401bSmrg			if (version == 9) {
25130ed5401bSmrg				shader = buffercopy_cs_shader_gfx9;
25140ed5401bSmrg				shader_size = sizeof(buffercopy_cs_shader_gfx9);
25150ed5401bSmrg			} else if (version == 10) {
25160ed5401bSmrg				shader = buffercopy_cs_shader_gfx10;
25170ed5401bSmrg				shader_size = sizeof(buffercopy_cs_shader_gfx10);
25180ed5401bSmrg			}
25195324fb0dSmrg			break;
25209bd392adSmrg		case CS_HANG:
25219bd392adSmrg			shader = memcpy_ps_hang;
25229bd392adSmrg			shader_size = sizeof(memcpy_ps_hang);
25239bd392adSmrg			break;
25245324fb0dSmrg		default:
25255324fb0dSmrg			return -1;
25265324fb0dSmrg			break;
25275324fb0dSmrg	}
25285324fb0dSmrg
25295324fb0dSmrg	memcpy(ptr, shader, shader_size);
25305324fb0dSmrg	return 0;
25315324fb0dSmrg}
25325324fb0dSmrg
25330ed5401bSmrgstatic int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type, uint32_t version)
25345324fb0dSmrg{
25355324fb0dSmrg	int i = 0;
25365324fb0dSmrg
25375324fb0dSmrg	/* Write context control and load shadowing register if necessary */
25385324fb0dSmrg	if (ip_type == AMDGPU_HW_IP_GFX) {
25395324fb0dSmrg		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
25405324fb0dSmrg		ptr[i++] = 0x80000000;
25415324fb0dSmrg		ptr[i++] = 0x80000000;
25425324fb0dSmrg	}
25435324fb0dSmrg
25445324fb0dSmrg	/* Issue commands to set default compute state. */
25455324fb0dSmrg	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
25465324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
25475324fb0dSmrg	ptr[i++] = 0x204;
25485324fb0dSmrg	i += 3;
254988f8a8d2Smrg
25505324fb0dSmrg	/* clear mmCOMPUTE_TMPRING_SIZE */
25515324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
25525324fb0dSmrg	ptr[i++] = 0x218;
25535324fb0dSmrg	ptr[i++] = 0;
25545324fb0dSmrg
25550ed5401bSmrg	/* Set new sh registers in GFX10 to 0 */
25560ed5401bSmrg	if (version == 10) {
25570ed5401bSmrg		/* mmCOMPUTE_SHADER_CHKSUM */
25580ed5401bSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
25590ed5401bSmrg		ptr[i++] = 0x22a;
25600ed5401bSmrg		ptr[i++] = 0;
25610ed5401bSmrg		/* mmCOMPUTE_REQ_CTRL */
25620ed5401bSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 6);
25630ed5401bSmrg		ptr[i++] = 0x222;
25640ed5401bSmrg		i += 6;
25650ed5401bSmrg		/* mmCP_COHER_START_DELAY */
25660ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
25670ed5401bSmrg		ptr[i++] = 0x7b;
25680ed5401bSmrg		ptr[i++] = 0x20;
25690ed5401bSmrg	}
25705324fb0dSmrg	return i;
25715324fb0dSmrg}
25725324fb0dSmrg
25730ed5401bSmrgstatic int amdgpu_dispatch_write_cumask(uint32_t *ptr, uint32_t version)
25745324fb0dSmrg{
25755324fb0dSmrg	int i = 0;
25765324fb0dSmrg
25775324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
25780ed5401bSmrg	if (version == 9) {
25790ed5401bSmrg		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
25800ed5401bSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
25810ed5401bSmrg		ptr[i++] = 0x216;
25820ed5401bSmrg		ptr[i++] = 0xffffffff;
25830ed5401bSmrg		ptr[i++] = 0xffffffff;
25840ed5401bSmrg		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
25850ed5401bSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
25860ed5401bSmrg		ptr[i++] = 0x219;
25870ed5401bSmrg		ptr[i++] = 0xffffffff;
25880ed5401bSmrg		ptr[i++] = 0xffffffff;
25890ed5401bSmrg	} else if (version == 10) {
25900ed5401bSmrg		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
25910ed5401bSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2);
25920ed5401bSmrg		ptr[i++] = 0x30000216;
25930ed5401bSmrg		ptr[i++] = 0xffffffff;
25940ed5401bSmrg		ptr[i++] = 0xffffffff;
25950ed5401bSmrg		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
25960ed5401bSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2);
25970ed5401bSmrg		ptr[i++] = 0x30000219;
25980ed5401bSmrg		ptr[i++] = 0xffffffff;
25990ed5401bSmrg		ptr[i++] = 0xffffffff;
26000ed5401bSmrg	}
26015324fb0dSmrg
26025324fb0dSmrg	return i;
26035324fb0dSmrg}
26045324fb0dSmrg
26050ed5401bSmrgstatic int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr, uint32_t version)
26065324fb0dSmrg{
26075324fb0dSmrg	int i, j;
26085324fb0dSmrg
26095324fb0dSmrg	i = 0;
26105324fb0dSmrg
26115324fb0dSmrg	/* Writes shader state to HW */
26125324fb0dSmrg	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
26135324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
26145324fb0dSmrg	ptr[i++] = 0x20c;
26155324fb0dSmrg	ptr[i++] = (shader_addr >> 8);
26165324fb0dSmrg	ptr[i++] = (shader_addr >> 40);
26175324fb0dSmrg	/* write sh regs*/
26185324fb0dSmrg	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
26195324fb0dSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
26205324fb0dSmrg		/* - Gfx9ShRegBase */
26215324fb0dSmrg		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
26225324fb0dSmrg		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
26235324fb0dSmrg	}
26245324fb0dSmrg
26250ed5401bSmrg	if (version == 10) {
26260ed5401bSmrg		/* mmCOMPUTE_PGM_RSRC3 */
26270ed5401bSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
26280ed5401bSmrg		ptr[i++] = 0x228;
26290ed5401bSmrg		ptr[i++] = 0;
26300ed5401bSmrg	}
26310ed5401bSmrg
26325324fb0dSmrg	return i;
26335324fb0dSmrg}
26345324fb0dSmrg
26355324fb0dSmrgstatic void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
26365324fb0dSmrg					 uint32_t ip_type,
26370ed5401bSmrg					 uint32_t ring,
26380ed5401bSmrg					 uint32_t version)
26395324fb0dSmrg{
26405324fb0dSmrg	amdgpu_context_handle context_handle;
26415324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
26425324fb0dSmrg	volatile unsigned char *ptr_dst;
26435324fb0dSmrg	void *ptr_shader;
26445324fb0dSmrg	uint32_t *ptr_cmd;
26455324fb0dSmrg	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
26465324fb0dSmrg	amdgpu_va_handle va_dst, va_shader, va_cmd;
26475324fb0dSmrg	int i, r;
26485324fb0dSmrg	int bo_dst_size = 16384;
26495324fb0dSmrg	int bo_shader_size = 4096;
26505324fb0dSmrg	int bo_cmd_size = 4096;
26515324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
26525324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
26535324fb0dSmrg	amdgpu_bo_list_handle bo_list;
26545324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
26555324fb0dSmrg	uint32_t expired;
26565324fb0dSmrg
26575324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
26585324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26595324fb0dSmrg
26605324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
26615324fb0dSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
26625324fb0dSmrg					&bo_cmd, (void **)&ptr_cmd,
26635324fb0dSmrg					&mc_address_cmd, &va_cmd);
26645324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26655324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
26665324fb0dSmrg
26675324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
26685324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
26695324fb0dSmrg					&bo_shader, &ptr_shader,
26705324fb0dSmrg					&mc_address_shader, &va_shader);
26715324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
267288f8a8d2Smrg	memset(ptr_shader, 0, bo_shader_size);
26735324fb0dSmrg
26740ed5401bSmrg	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR, version);
26755324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26765324fb0dSmrg
26775324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
26785324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
26795324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
26805324fb0dSmrg					&mc_address_dst, &va_dst);
26815324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26825324fb0dSmrg
26835324fb0dSmrg	i = 0;
26840ed5401bSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version);
26855324fb0dSmrg
26865324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
26870ed5401bSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version);
26885324fb0dSmrg
26895324fb0dSmrg	/* Writes shader state to HW */
26900ed5401bSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version);
26915324fb0dSmrg
26925324fb0dSmrg	/* Write constant data */
26935324fb0dSmrg	/* Writes the UAV constant data to the SGPRs. */
26945324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
26955324fb0dSmrg	ptr_cmd[i++] = 0x240;
26965324fb0dSmrg	ptr_cmd[i++] = mc_address_dst;
26975324fb0dSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
26985324fb0dSmrg	ptr_cmd[i++] = 0x400;
26990ed5401bSmrg	if (version == 9)
27000ed5401bSmrg		ptr_cmd[i++] = 0x74fac;
27010ed5401bSmrg	else if (version == 10)
27020ed5401bSmrg		ptr_cmd[i++] = 0x1104bfac;
27035324fb0dSmrg
27045324fb0dSmrg	/* Sets a range of pixel shader constants */
27055324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
27065324fb0dSmrg	ptr_cmd[i++] = 0x244;
27075324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
27085324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
27095324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
27105324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
27115324fb0dSmrg
271288f8a8d2Smrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
271388f8a8d2Smrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
271488f8a8d2Smrg	ptr_cmd[i++] = 0x215;
271588f8a8d2Smrg	ptr_cmd[i++] = 0;
271688f8a8d2Smrg
27175324fb0dSmrg	/* dispatch direct command */
27185324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
27195324fb0dSmrg	ptr_cmd[i++] = 0x10;
27205324fb0dSmrg	ptr_cmd[i++] = 1;
27215324fb0dSmrg	ptr_cmd[i++] = 1;
27225324fb0dSmrg	ptr_cmd[i++] = 1;
27235324fb0dSmrg
27245324fb0dSmrg	while (i & 7)
27255324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
27265324fb0dSmrg
27275324fb0dSmrg	resources[0] = bo_dst;
27285324fb0dSmrg	resources[1] = bo_shader;
27295324fb0dSmrg	resources[2] = bo_cmd;
27305324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
27315324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27325324fb0dSmrg
27335324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
27345324fb0dSmrg	ib_info.size = i;
27355324fb0dSmrg	ibs_request.ip_type = ip_type;
27365324fb0dSmrg	ibs_request.ring = ring;
27375324fb0dSmrg	ibs_request.resources = bo_list;
27385324fb0dSmrg	ibs_request.number_of_ibs = 1;
27395324fb0dSmrg	ibs_request.ibs = &ib_info;
27405324fb0dSmrg	ibs_request.fence_info.handle = NULL;
27415324fb0dSmrg
27425324fb0dSmrg	/* submit CS */
27435324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
27445324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27455324fb0dSmrg
27465324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
27475324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27485324fb0dSmrg
27495324fb0dSmrg	fence_status.ip_type = ip_type;
27505324fb0dSmrg	fence_status.ip_instance = 0;
27515324fb0dSmrg	fence_status.ring = ring;
27525324fb0dSmrg	fence_status.context = context_handle;
27535324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
27545324fb0dSmrg
27555324fb0dSmrg	/* wait for IB accomplished */
27565324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
27575324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
27585324fb0dSmrg					 0, &expired);
27595324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27605324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
27615324fb0dSmrg
27625324fb0dSmrg	/* verify if memset test result meets with expected */
27635324fb0dSmrg	i = 0;
27645324fb0dSmrg	while(i < bo_dst_size) {
27655324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
27665324fb0dSmrg	}
27675324fb0dSmrg
27685324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
27695324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27705324fb0dSmrg
27715324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
27725324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27735324fb0dSmrg
27745324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
27755324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27765324fb0dSmrg
27775324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
27785324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27795324fb0dSmrg}
27805324fb0dSmrg
27815324fb0dSmrgstatic void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
27825324fb0dSmrg					uint32_t ip_type,
27839bd392adSmrg					uint32_t ring,
27840ed5401bSmrg					uint32_t version,
27859bd392adSmrg					int hang)
27865324fb0dSmrg{
27875324fb0dSmrg	amdgpu_context_handle context_handle;
27885324fb0dSmrg	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
27895324fb0dSmrg	volatile unsigned char *ptr_dst;
27905324fb0dSmrg	void *ptr_shader;
27915324fb0dSmrg	unsigned char *ptr_src;
27925324fb0dSmrg	uint32_t *ptr_cmd;
27935324fb0dSmrg	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
27945324fb0dSmrg	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
27955324fb0dSmrg	int i, r;
27965324fb0dSmrg	int bo_dst_size = 16384;
27975324fb0dSmrg	int bo_shader_size = 4096;
27985324fb0dSmrg	int bo_cmd_size = 4096;
27995324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
28005324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
28019bd392adSmrg	uint32_t expired, hang_state, hangs;
28029bd392adSmrg	enum cs_type cs_type;
28035324fb0dSmrg	amdgpu_bo_list_handle bo_list;
28045324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
28055324fb0dSmrg
28065324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
28075324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28085324fb0dSmrg
28095324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
28105324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
28115324fb0dSmrg				    &bo_cmd, (void **)&ptr_cmd,
28125324fb0dSmrg				    &mc_address_cmd, &va_cmd);
28135324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28145324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
28155324fb0dSmrg
28165324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
28175324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
28185324fb0dSmrg					&bo_shader, &ptr_shader,
28195324fb0dSmrg					&mc_address_shader, &va_shader);
28205324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
282188f8a8d2Smrg	memset(ptr_shader, 0, bo_shader_size);
28225324fb0dSmrg
28239bd392adSmrg	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
28240ed5401bSmrg	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type, version);
28255324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28265324fb0dSmrg
28275324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
28285324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
28295324fb0dSmrg					&bo_src, (void **)&ptr_src,
28305324fb0dSmrg					&mc_address_src, &va_src);
28315324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28325324fb0dSmrg
28335324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
28345324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
28355324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
28365324fb0dSmrg					&mc_address_dst, &va_dst);
28375324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28385324fb0dSmrg
28395324fb0dSmrg	memset(ptr_src, 0x55, bo_dst_size);
28405324fb0dSmrg
28415324fb0dSmrg	i = 0;
28420ed5401bSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version);
28435324fb0dSmrg
28445324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
28450ed5401bSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version);
28465324fb0dSmrg
28475324fb0dSmrg	/* Writes shader state to HW */
28480ed5401bSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version);
28495324fb0dSmrg
28505324fb0dSmrg	/* Write constant data */
28515324fb0dSmrg	/* Writes the texture resource constants data to the SGPRs */
28525324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
28535324fb0dSmrg	ptr_cmd[i++] = 0x240;
28545324fb0dSmrg	ptr_cmd[i++] = mc_address_src;
28555324fb0dSmrg	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
28565324fb0dSmrg	ptr_cmd[i++] = 0x400;
28570ed5401bSmrg	if (version == 9)
28580ed5401bSmrg		ptr_cmd[i++] = 0x74fac;
28590ed5401bSmrg	else if (version == 10)
28600ed5401bSmrg		ptr_cmd[i++] = 0x1104bfac;
28615324fb0dSmrg
28625324fb0dSmrg	/* Writes the UAV constant data to the SGPRs. */
28635324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
28645324fb0dSmrg	ptr_cmd[i++] = 0x244;
28655324fb0dSmrg	ptr_cmd[i++] = mc_address_dst;
28665324fb0dSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
28675324fb0dSmrg	ptr_cmd[i++] = 0x400;
28680ed5401bSmrg	if (version == 9)
28690ed5401bSmrg		ptr_cmd[i++] = 0x74fac;
28700ed5401bSmrg	else if (version == 10)
28710ed5401bSmrg		ptr_cmd[i++] = 0x1104bfac;
28725324fb0dSmrg
287388f8a8d2Smrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
287488f8a8d2Smrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
287588f8a8d2Smrg	ptr_cmd[i++] = 0x215;
287688f8a8d2Smrg	ptr_cmd[i++] = 0;
287788f8a8d2Smrg
28785324fb0dSmrg	/* dispatch direct command */
28795324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
28805324fb0dSmrg	ptr_cmd[i++] = 0x10;
28815324fb0dSmrg	ptr_cmd[i++] = 1;
28825324fb0dSmrg	ptr_cmd[i++] = 1;
28835324fb0dSmrg	ptr_cmd[i++] = 1;
28845324fb0dSmrg
28855324fb0dSmrg	while (i & 7)
28865324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
28875324fb0dSmrg
28885324fb0dSmrg	resources[0] = bo_shader;
28895324fb0dSmrg	resources[1] = bo_src;
28905324fb0dSmrg	resources[2] = bo_dst;
28915324fb0dSmrg	resources[3] = bo_cmd;
28925324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
28935324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28945324fb0dSmrg
28955324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
28965324fb0dSmrg	ib_info.size = i;
28975324fb0dSmrg	ibs_request.ip_type = ip_type;
28985324fb0dSmrg	ibs_request.ring = ring;
28995324fb0dSmrg	ibs_request.resources = bo_list;
29005324fb0dSmrg	ibs_request.number_of_ibs = 1;
29015324fb0dSmrg	ibs_request.ibs = &ib_info;
29025324fb0dSmrg	ibs_request.fence_info.handle = NULL;
29035324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
29045324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29055324fb0dSmrg
29065324fb0dSmrg	fence_status.ip_type = ip_type;
29075324fb0dSmrg	fence_status.ip_instance = 0;
29085324fb0dSmrg	fence_status.ring = ring;
29095324fb0dSmrg	fence_status.context = context_handle;
29105324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
29115324fb0dSmrg
29125324fb0dSmrg	/* wait for IB accomplished */
29135324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
29145324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
29155324fb0dSmrg					 0, &expired);
29165324fb0dSmrg
29179bd392adSmrg	if (!hang) {
29189bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
29199bd392adSmrg		CU_ASSERT_EQUAL(expired, true);
29209bd392adSmrg
29219bd392adSmrg		/* verify if memcpy test result meets with expected */
29229bd392adSmrg		i = 0;
29239bd392adSmrg		while(i < bo_dst_size) {
29249bd392adSmrg			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
29259bd392adSmrg			i++;
29269bd392adSmrg		}
29279bd392adSmrg	} else {
29289bd392adSmrg		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
29299bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
29309bd392adSmrg		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
29315324fb0dSmrg	}
29325324fb0dSmrg
29335324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
29345324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29355324fb0dSmrg
29365324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
29375324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29385324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
29395324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29405324fb0dSmrg
29415324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
29425324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29435324fb0dSmrg
29445324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
29455324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29465324fb0dSmrg
29475324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
29485324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29495324fb0dSmrg}
295088f8a8d2Smrg
295188f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void)
29525324fb0dSmrg{
29535324fb0dSmrg	int r;
29545324fb0dSmrg	struct drm_amdgpu_info_hw_ip info;
29550ed5401bSmrg	uint32_t ring_id, version;
29565324fb0dSmrg
29575324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
29585324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
295988f8a8d2Smrg	if (!info.available_rings)
296088f8a8d2Smrg		printf("SKIP ... as there's no compute ring\n");
29615324fb0dSmrg
29620ed5401bSmrg	version = info.hw_ip_version_major;
29630ed5401bSmrg	if (version != 9 && version != 10) {
29640ed5401bSmrg		printf("SKIP ... unsupported gfx version %d\n", version);
29650ed5401bSmrg		return;
29660ed5401bSmrg	}
29670ed5401bSmrg
29685324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
29690ed5401bSmrg		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version);
29700ed5401bSmrg		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version, 0);
29715324fb0dSmrg	}
297288f8a8d2Smrg}
297388f8a8d2Smrg
297488f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void)
297588f8a8d2Smrg{
297688f8a8d2Smrg	int r;
297788f8a8d2Smrg	struct drm_amdgpu_info_hw_ip info;
29780ed5401bSmrg	uint32_t ring_id, version;
29795324fb0dSmrg
29805324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
29815324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
298288f8a8d2Smrg	if (!info.available_rings)
298388f8a8d2Smrg		printf("SKIP ... as there's no graphics ring\n");
29845324fb0dSmrg
29850ed5401bSmrg	version = info.hw_ip_version_major;
29860ed5401bSmrg	if (version != 9 && version != 10) {
29870ed5401bSmrg		printf("SKIP ... unsupported gfx version %d\n", version);
29880ed5401bSmrg		return;
29890ed5401bSmrg	}
29900ed5401bSmrg
29915324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
29920ed5401bSmrg		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version);
29930ed5401bSmrg		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version, 0);
29949bd392adSmrg	}
29959bd392adSmrg}
29969bd392adSmrg
29979bd392adSmrgvoid amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
29989bd392adSmrg{
29999bd392adSmrg	int r;
30009bd392adSmrg	struct drm_amdgpu_info_hw_ip info;
30010ed5401bSmrg	uint32_t ring_id, version;
30029bd392adSmrg
30039bd392adSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
30049bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
30059bd392adSmrg	if (!info.available_rings)
30069bd392adSmrg		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
30079bd392adSmrg
30080ed5401bSmrg	version = info.hw_ip_version_major;
30090ed5401bSmrg	if (version != 9 && version != 10) {
30100ed5401bSmrg		printf("SKIP ... unsupported gfx version %d\n", version);
30110ed5401bSmrg		return;
30120ed5401bSmrg	}
30130ed5401bSmrg
30149bd392adSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
30150ed5401bSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
30160ed5401bSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 1);
30170ed5401bSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
30189bd392adSmrg	}
30199bd392adSmrg}
30209bd392adSmrg
30219bd392adSmrgstatic void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
30220ed5401bSmrg						  uint32_t ip_type, uint32_t ring, int version)
30239bd392adSmrg{
30249bd392adSmrg	amdgpu_context_handle context_handle;
30259bd392adSmrg	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
30269bd392adSmrg	volatile unsigned char *ptr_dst;
30279bd392adSmrg	void *ptr_shader;
30289bd392adSmrg	unsigned char *ptr_src;
30299bd392adSmrg	uint32_t *ptr_cmd;
30309bd392adSmrg	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
30319bd392adSmrg	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
30329bd392adSmrg	int i, r;
30339bd392adSmrg	int bo_dst_size = 0x4000000;
30349bd392adSmrg	int bo_shader_size = 0x400000;
30359bd392adSmrg	int bo_cmd_size = 4096;
30369bd392adSmrg	struct amdgpu_cs_request ibs_request = {0};
30379bd392adSmrg	struct amdgpu_cs_ib_info ib_info= {0};
30389bd392adSmrg	uint32_t hang_state, hangs, expired;
30399bd392adSmrg	struct amdgpu_gpu_info gpu_info = {0};
30409bd392adSmrg	amdgpu_bo_list_handle bo_list;
30419bd392adSmrg	struct amdgpu_cs_fence fence_status = {0};
30429bd392adSmrg
30439bd392adSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
30449bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
30459bd392adSmrg
30469bd392adSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
30479bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
30489bd392adSmrg
30499bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
30509bd392adSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
30519bd392adSmrg				    &bo_cmd, (void **)&ptr_cmd,
30529bd392adSmrg				    &mc_address_cmd, &va_cmd);
30539bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
30549bd392adSmrg	memset(ptr_cmd, 0, bo_cmd_size);
30559bd392adSmrg
30569bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
30579bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
30589bd392adSmrg					&bo_shader, &ptr_shader,
30599bd392adSmrg					&mc_address_shader, &va_shader);
30609bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
30619bd392adSmrg	memset(ptr_shader, 0, bo_shader_size);
30629bd392adSmrg
30639bd392adSmrg	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
30649bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
30659bd392adSmrg
30669bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
30679bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
30689bd392adSmrg					&bo_src, (void **)&ptr_src,
30699bd392adSmrg					&mc_address_src, &va_src);
30709bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
30719bd392adSmrg
30729bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
30739bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
30749bd392adSmrg					&bo_dst, (void **)&ptr_dst,
30759bd392adSmrg					&mc_address_dst, &va_dst);
30769bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
30779bd392adSmrg
30789bd392adSmrg	memset(ptr_src, 0x55, bo_dst_size);
30799bd392adSmrg
30809bd392adSmrg	i = 0;
30810ed5401bSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version);
30829bd392adSmrg
30839bd392adSmrg	/*  Issue commands to set cu mask used in current dispatch */
30840ed5401bSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version);
30859bd392adSmrg
30869bd392adSmrg	/* Writes shader state to HW */
30870ed5401bSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version);
30889bd392adSmrg
30899bd392adSmrg	/* Write constant data */
30909bd392adSmrg	/* Writes the texture resource constants data to the SGPRs */
30919bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
30929bd392adSmrg	ptr_cmd[i++] = 0x240;
30939bd392adSmrg	ptr_cmd[i++] = mc_address_src;
30949bd392adSmrg	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
30959bd392adSmrg	ptr_cmd[i++] = 0x400000;
30960ed5401bSmrg	if (version == 9)
30970ed5401bSmrg		ptr_cmd[i++] = 0x74fac;
30980ed5401bSmrg	else if (version == 10)
30990ed5401bSmrg		ptr_cmd[i++] = 0x1104bfac;
31009bd392adSmrg
31019bd392adSmrg	/* Writes the UAV constant data to the SGPRs. */
31029bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
31039bd392adSmrg	ptr_cmd[i++] = 0x244;
31049bd392adSmrg	ptr_cmd[i++] = mc_address_dst;
31059bd392adSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
31069bd392adSmrg	ptr_cmd[i++] = 0x400000;
31070ed5401bSmrg	if (version == 9)
31080ed5401bSmrg		ptr_cmd[i++] = 0x74fac;
31090ed5401bSmrg	else if (version == 10)
31100ed5401bSmrg		ptr_cmd[i++] = 0x1104bfac;
31119bd392adSmrg
31129bd392adSmrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
31139bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
31149bd392adSmrg	ptr_cmd[i++] = 0x215;
31159bd392adSmrg	ptr_cmd[i++] = 0;
31169bd392adSmrg
31179bd392adSmrg	/* dispatch direct command */
31189bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
31199bd392adSmrg	ptr_cmd[i++] = 0x10000;
31209bd392adSmrg	ptr_cmd[i++] = 1;
31219bd392adSmrg	ptr_cmd[i++] = 1;
31229bd392adSmrg	ptr_cmd[i++] = 1;
31239bd392adSmrg
31249bd392adSmrg	while (i & 7)
31259bd392adSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
31269bd392adSmrg
31279bd392adSmrg	resources[0] = bo_shader;
31289bd392adSmrg	resources[1] = bo_src;
31299bd392adSmrg	resources[2] = bo_dst;
31309bd392adSmrg	resources[3] = bo_cmd;
31319bd392adSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
31329bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
31339bd392adSmrg
31349bd392adSmrg	ib_info.ib_mc_address = mc_address_cmd;
31359bd392adSmrg	ib_info.size = i;
31369bd392adSmrg	ibs_request.ip_type = ip_type;
31379bd392adSmrg	ibs_request.ring = ring;
31389bd392adSmrg	ibs_request.resources = bo_list;
31399bd392adSmrg	ibs_request.number_of_ibs = 1;
31409bd392adSmrg	ibs_request.ibs = &ib_info;
31419bd392adSmrg	ibs_request.fence_info.handle = NULL;
31429bd392adSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
31439bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
31449bd392adSmrg
31459bd392adSmrg	fence_status.ip_type = ip_type;
31469bd392adSmrg	fence_status.ip_instance = 0;
31479bd392adSmrg	fence_status.ring = ring;
31489bd392adSmrg	fence_status.context = context_handle;
31499bd392adSmrg	fence_status.fence = ibs_request.seq_no;
31509bd392adSmrg
31519bd392adSmrg	/* wait for IB accomplished */
31529bd392adSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
31539bd392adSmrg					 AMDGPU_TIMEOUT_INFINITE,
31549bd392adSmrg					 0, &expired);
31559bd392adSmrg
31569bd392adSmrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
31579bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
31589bd392adSmrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
31599bd392adSmrg
31609bd392adSmrg	r = amdgpu_bo_list_destroy(bo_list);
31619bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
31629bd392adSmrg
31639bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
31649bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
31659bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
31669bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
31679bd392adSmrg
31689bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
31699bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
31709bd392adSmrg
31719bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
31729bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
31739bd392adSmrg
31749bd392adSmrg	r = amdgpu_cs_ctx_free(context_handle);
31759bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
31769bd392adSmrg}
31779bd392adSmrg
31789bd392adSmrgvoid amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
31799bd392adSmrg{
31809bd392adSmrg	int r;
31819bd392adSmrg	struct drm_amdgpu_info_hw_ip info;
31820ed5401bSmrg	uint32_t ring_id, version;
31839bd392adSmrg
31849bd392adSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
31859bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
31869bd392adSmrg	if (!info.available_rings)
31879bd392adSmrg		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
31889bd392adSmrg
31890ed5401bSmrg	version = info.hw_ip_version_major;
31900ed5401bSmrg	if (version != 9 && version != 10) {
31910ed5401bSmrg		printf("SKIP ... unsupported gfx version %d\n", version);
31920ed5401bSmrg		return;
31930ed5401bSmrg	}
31940ed5401bSmrg
31959bd392adSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
31960ed5401bSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
31970ed5401bSmrg		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id, version);
31980ed5401bSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
31999bd392adSmrg	}
32009bd392adSmrg}
32019bd392adSmrg
32029bd392adSmrgstatic int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
32039bd392adSmrg{
32049bd392adSmrg	struct amdgpu_test_shader *shader;
32059bd392adSmrg	int i, loop = 0x40000;
32069bd392adSmrg
32079bd392adSmrg	switch (family) {
32089bd392adSmrg		case AMDGPU_FAMILY_AI:
32099bd392adSmrg		case AMDGPU_FAMILY_RV:
32109bd392adSmrg			shader = &memcpy_ps_hang_slow_ai;
32119bd392adSmrg			break;
32129bd392adSmrg		default:
32139bd392adSmrg			return -1;
32149bd392adSmrg			break;
32155324fb0dSmrg	}
32169bd392adSmrg
32179bd392adSmrg	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
32189bd392adSmrg
32199bd392adSmrg	for (i = 0; i < loop; i++)
32209bd392adSmrg		memcpy(ptr + shader->header_length + shader->body_length * i,
32219bd392adSmrg			shader->shader + shader->header_length,
32229bd392adSmrg			shader->body_length * sizeof(uint32_t));
32239bd392adSmrg
32249bd392adSmrg	memcpy(ptr + shader->header_length + shader->body_length * loop,
32259bd392adSmrg		shader->shader + shader->header_length + shader->body_length,
32269bd392adSmrg		shader->foot_length * sizeof(uint32_t));
32279bd392adSmrg
32289bd392adSmrg	return 0;
32295324fb0dSmrg}
32305324fb0dSmrg
32310ed5401bSmrgstatic int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type, uint32_t version)
32325324fb0dSmrg{
32335324fb0dSmrg	int i;
32345324fb0dSmrg	uint32_t shader_offset= 256;
32355324fb0dSmrg	uint32_t mem_offset, patch_code_offset;
32365324fb0dSmrg	uint32_t shader_size, patchinfo_code_size;
32375324fb0dSmrg	const uint32_t *shader;
32385324fb0dSmrg	const uint32_t *patchinfo_code;
32395324fb0dSmrg	const uint32_t *patchcode_offset;
32405324fb0dSmrg
32415324fb0dSmrg	switch (ps_type) {
32425324fb0dSmrg		case PS_CONST:
32430ed5401bSmrg			if (version == 9) {
32440ed5401bSmrg				shader = ps_const_shader_gfx9;
32450ed5401bSmrg				shader_size = sizeof(ps_const_shader_gfx9);
32460ed5401bSmrg				patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
32470ed5401bSmrg				patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
32480ed5401bSmrg				patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
32490ed5401bSmrg			} else if (version == 10){
32500ed5401bSmrg				shader = ps_const_shader_gfx10;
32510ed5401bSmrg				shader_size = sizeof(ps_const_shader_gfx10);
32520ed5401bSmrg				patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx10;
32530ed5401bSmrg				patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx10;
32540ed5401bSmrg				patchcode_offset = ps_const_shader_patchinfo_offset_gfx10;
32550ed5401bSmrg			}
32565324fb0dSmrg			break;
32575324fb0dSmrg		case PS_TEX:
32580ed5401bSmrg			if (version == 9) {
32590ed5401bSmrg				shader = ps_tex_shader_gfx9;
32600ed5401bSmrg				shader_size = sizeof(ps_tex_shader_gfx9);
32610ed5401bSmrg				patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
32620ed5401bSmrg				patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
32630ed5401bSmrg				patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
32640ed5401bSmrg			} else if (version == 10) {
32650ed5401bSmrg				shader = ps_tex_shader_gfx10;
32660ed5401bSmrg				shader_size = sizeof(ps_tex_shader_gfx10);
32670ed5401bSmrg				patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx10;
32680ed5401bSmrg				patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx10;
32690ed5401bSmrg				patchcode_offset = ps_tex_shader_patchinfo_offset_gfx10;
32700ed5401bSmrg			}
32715324fb0dSmrg			break;
32729bd392adSmrg		case PS_HANG:
32739bd392adSmrg			shader = memcpy_ps_hang;
32749bd392adSmrg			shader_size = sizeof(memcpy_ps_hang);
32759bd392adSmrg
32769bd392adSmrg			memcpy(ptr, shader, shader_size);
32779bd392adSmrg			return 0;
32785324fb0dSmrg		default:
32795324fb0dSmrg			return -1;
32805324fb0dSmrg			break;
32815324fb0dSmrg	}
32825324fb0dSmrg
32835324fb0dSmrg	/* write main shader program */
32845324fb0dSmrg	for (i = 0 ; i < 10; i++) {
32855324fb0dSmrg		mem_offset = i * shader_offset;
32865324fb0dSmrg		memcpy(ptr + mem_offset, shader, shader_size);
32875324fb0dSmrg	}
32885324fb0dSmrg
32895324fb0dSmrg	/* overwrite patch codes */
32905324fb0dSmrg	for (i = 0 ; i < 10; i++) {
32915324fb0dSmrg		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
32925324fb0dSmrg		patch_code_offset = i * patchinfo_code_size;
32935324fb0dSmrg		memcpy(ptr + mem_offset,
32945324fb0dSmrg			patchinfo_code + patch_code_offset,
32955324fb0dSmrg			patchinfo_code_size * sizeof(uint32_t));
32965324fb0dSmrg	}
32975324fb0dSmrg
32985324fb0dSmrg	return 0;
32995324fb0dSmrg}
33005324fb0dSmrg
33015324fb0dSmrg/* load RectPosTexFast_VS */
33020ed5401bSmrgstatic int amdgpu_draw_load_vs_shader(uint8_t *ptr, uint32_t version)
33035324fb0dSmrg{
33045324fb0dSmrg	const uint32_t *shader;
33055324fb0dSmrg	uint32_t shader_size;
33065324fb0dSmrg
33070ed5401bSmrg	if (version == 9) {
33080ed5401bSmrg		shader = vs_RectPosTexFast_shader_gfx9;
33090ed5401bSmrg		shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
33100ed5401bSmrg	} else if (version == 10) {
33110ed5401bSmrg		shader = vs_RectPosTexFast_shader_gfx10;
33120ed5401bSmrg		shader_size = sizeof(vs_RectPosTexFast_shader_gfx10);
33130ed5401bSmrg	}
33145324fb0dSmrg
33155324fb0dSmrg	memcpy(ptr, shader, shader_size);
33165324fb0dSmrg
33175324fb0dSmrg	return 0;
33185324fb0dSmrg}
33195324fb0dSmrg
33200ed5401bSmrgstatic int amdgpu_draw_init(uint32_t *ptr, uint32_t version)
33215324fb0dSmrg{
33225324fb0dSmrg	int i = 0;
33235324fb0dSmrg	const uint32_t *preamblecache_ptr;
33245324fb0dSmrg	uint32_t preamblecache_size;
33255324fb0dSmrg
33265324fb0dSmrg	/* Write context control and load shadowing register if necessary */
33275324fb0dSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
33285324fb0dSmrg	ptr[i++] = 0x80000000;
33295324fb0dSmrg	ptr[i++] = 0x80000000;
33305324fb0dSmrg
33310ed5401bSmrg	if (version == 9) {
33320ed5401bSmrg		preamblecache_ptr = preamblecache_gfx9;
33330ed5401bSmrg		preamblecache_size = sizeof(preamblecache_gfx9);
33340ed5401bSmrg	} else if (version == 10) {
33350ed5401bSmrg		preamblecache_ptr = preamblecache_gfx10;
33360ed5401bSmrg		preamblecache_size = sizeof(preamblecache_gfx10);
33370ed5401bSmrg	}
33385324fb0dSmrg
33395324fb0dSmrg	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
33405324fb0dSmrg	return i + preamblecache_size/sizeof(uint32_t);
33415324fb0dSmrg}
33425324fb0dSmrg
33435324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
33449bd392adSmrg							 uint64_t dst_addr,
33450ed5401bSmrg							 uint32_t version,
33469bd392adSmrg							 int hang_slow)
33475324fb0dSmrg{
33485324fb0dSmrg	int i = 0;
33495324fb0dSmrg
33505324fb0dSmrg	/* setup color buffer */
33510ed5401bSmrg	if (version == 9) {
33520ed5401bSmrg		/* offset   reg
33530ed5401bSmrg		   0xA318   CB_COLOR0_BASE
33540ed5401bSmrg		   0xA319   CB_COLOR0_BASE_EXT
33550ed5401bSmrg		   0xA31A   CB_COLOR0_ATTRIB2
33560ed5401bSmrg		   0xA31B   CB_COLOR0_VIEW
33570ed5401bSmrg		   0xA31C   CB_COLOR0_INFO
33580ed5401bSmrg		   0xA31D   CB_COLOR0_ATTRIB
33590ed5401bSmrg		   0xA31E   CB_COLOR0_DCC_CONTROL
33600ed5401bSmrg		   0xA31F   CB_COLOR0_CMASK
33610ed5401bSmrg		   0xA320   CB_COLOR0_CMASK_BASE_EXT
33620ed5401bSmrg		   0xA321   CB_COLOR0_FMASK
33630ed5401bSmrg		   0xA322   CB_COLOR0_FMASK_BASE_EXT
33640ed5401bSmrg		   0xA323   CB_COLOR0_CLEAR_WORD0
33650ed5401bSmrg		   0xA324   CB_COLOR0_CLEAR_WORD1
33660ed5401bSmrg		   0xA325   CB_COLOR0_DCC_BASE
33670ed5401bSmrg		   0xA326   CB_COLOR0_DCC_BASE_EXT */
33680ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
33690ed5401bSmrg		ptr[i++] = 0x318;
33700ed5401bSmrg		ptr[i++] = dst_addr >> 8;
33710ed5401bSmrg		ptr[i++] = dst_addr >> 40;
33720ed5401bSmrg		ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f;
33730ed5401bSmrg		ptr[i++] = 0;
33740ed5401bSmrg		ptr[i++] = 0x50438;
33750ed5401bSmrg		ptr[i++] = 0x10140000;
33760ed5401bSmrg		i += 9;
33770ed5401bSmrg
33780ed5401bSmrg		/* mmCB_MRT0_EPITCH */
33790ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
33800ed5401bSmrg		ptr[i++] = 0x1e8;
33810ed5401bSmrg		ptr[i++] = hang_slow ? 0xfff : 0x1f;
33820ed5401bSmrg	} else if (version == 10) {
33830ed5401bSmrg		/* 0xA318   CB_COLOR0_BASE
33840ed5401bSmrg		   0xA319   CB_COLOR0_PITCH
33850ed5401bSmrg		   0xA31A   CB_COLOR0_SLICE
33860ed5401bSmrg		   0xA31B   CB_COLOR0_VIEW
33870ed5401bSmrg		   0xA31C   CB_COLOR0_INFO
33880ed5401bSmrg		   0xA31D   CB_COLOR0_ATTRIB
33890ed5401bSmrg		   0xA31E   CB_COLOR0_DCC_CONTROL
33900ed5401bSmrg		   0xA31F   CB_COLOR0_CMASK
33910ed5401bSmrg		   0xA320   CB_COLOR0_CMASK_SLICE
33920ed5401bSmrg		   0xA321   CB_COLOR0_FMASK
33930ed5401bSmrg		   0xA322   CB_COLOR0_FMASK_SLICE
33940ed5401bSmrg		   0xA323   CB_COLOR0_CLEAR_WORD0
33950ed5401bSmrg		   0xA324   CB_COLOR0_CLEAR_WORD1
33960ed5401bSmrg		   0xA325   CB_COLOR0_DCC_BASE */
33970ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 14);
33980ed5401bSmrg		ptr[i++] = 0x318;
33990ed5401bSmrg		ptr[i++] = dst_addr >> 8;
34000ed5401bSmrg		i += 3;
34010ed5401bSmrg		ptr[i++] = 0x50438;
34020ed5401bSmrg		i += 9;
34030ed5401bSmrg
34040ed5401bSmrg		/* 0xA390   CB_COLOR0_BASE_EXT */
34050ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
34060ed5401bSmrg		ptr[i++] = 0x390;
34070ed5401bSmrg		ptr[i++] = dst_addr >> 40;
34080ed5401bSmrg
34090ed5401bSmrg		/* 0xA398   CB_COLOR0_CMASK_BASE_EXT */
34100ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
34110ed5401bSmrg		ptr[i++] = 0x398;
34120ed5401bSmrg		ptr[i++] = 0;
34135324fb0dSmrg
34140ed5401bSmrg		/* 0xA3A0   CB_COLOR0_FMASK_BASE_EXT */
34150ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
34160ed5401bSmrg		ptr[i++] = 0x3a0;
34170ed5401bSmrg		ptr[i++] = 0;
34180ed5401bSmrg
34190ed5401bSmrg		/* 0xA3A8   CB_COLOR0_DCC_BASE_EXT */
34200ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
34210ed5401bSmrg		ptr[i++] = 0x3a8;
34220ed5401bSmrg		ptr[i++] = 0;
34230ed5401bSmrg
34240ed5401bSmrg		/* 0xA3B0   CB_COLOR0_ATTRIB2 */
34250ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
34260ed5401bSmrg		ptr[i++] = 0x3b0;
34270ed5401bSmrg		ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f;
34280ed5401bSmrg
34290ed5401bSmrg		/* 0xA3B8   CB_COLOR0_ATTRIB3 */
34300ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
34310ed5401bSmrg		ptr[i++] = 0x3b8;
34320ed5401bSmrg		ptr[i++] = 0x9014000;
34330ed5401bSmrg	}
34345324fb0dSmrg
34355324fb0dSmrg	/* 0xA32B   CB_COLOR1_BASE */
34365324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
34375324fb0dSmrg	ptr[i++] = 0x32b;
34385324fb0dSmrg	ptr[i++] = 0;
34395324fb0dSmrg
34405324fb0dSmrg	/* 0xA33A   CB_COLOR1_BASE */
34415324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
34425324fb0dSmrg	ptr[i++] = 0x33a;
34435324fb0dSmrg	ptr[i++] = 0;
34445324fb0dSmrg
34455324fb0dSmrg	/* SPI_SHADER_COL_FORMAT */
34465324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
34475324fb0dSmrg	ptr[i++] = 0x1c5;
34485324fb0dSmrg	ptr[i++] = 9;
34495324fb0dSmrg
34505324fb0dSmrg	/* Setup depth buffer */
34510ed5401bSmrg	if (version == 9) {
34520ed5401bSmrg		/* mmDB_Z_INFO */
34530ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
34540ed5401bSmrg		ptr[i++] = 0xe;
34550ed5401bSmrg		i += 2;
34560ed5401bSmrg	} else if (version == 10) {
34570ed5401bSmrg		/* mmDB_Z_INFO */
34580ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
34590ed5401bSmrg		ptr[i++] = 0x10;
34600ed5401bSmrg		i += 2;
34610ed5401bSmrg	}
34625324fb0dSmrg
34635324fb0dSmrg	return i;
34645324fb0dSmrg}
34655324fb0dSmrg
34660ed5401bSmrgstatic int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr,
34670ed5401bSmrg						     uint32_t version,
34680ed5401bSmrg						     int hang_slow)
34695324fb0dSmrg{
34705324fb0dSmrg	int i = 0;
34715324fb0dSmrg	const uint32_t *cached_cmd_ptr;
34725324fb0dSmrg	uint32_t cached_cmd_size;
34735324fb0dSmrg
34745324fb0dSmrg	/* mmPA_SC_TILE_STEERING_OVERRIDE */
34755324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
34765324fb0dSmrg	ptr[i++] = 0xd7;
34775324fb0dSmrg	ptr[i++] = 0;
34785324fb0dSmrg
34795324fb0dSmrg	ptr[i++] = 0xffff1000;
34805324fb0dSmrg	ptr[i++] = 0xc0021000;
34815324fb0dSmrg
34825324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
34835324fb0dSmrg	ptr[i++] = 0xd7;
34840ed5401bSmrg	if (version == 9)
34850ed5401bSmrg		ptr[i++] = 1;
34860ed5401bSmrg	else if (version == 10)
34870ed5401bSmrg		ptr[i++] = 0;
34885324fb0dSmrg
34895324fb0dSmrg	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
34905324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
34915324fb0dSmrg	ptr[i++] = 0x2fe;
34925324fb0dSmrg	i += 16;
34935324fb0dSmrg
34945324fb0dSmrg	/* mmPA_SC_CENTROID_PRIORITY_0 */
34955324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
34965324fb0dSmrg	ptr[i++] = 0x2f5;
34975324fb0dSmrg	i += 2;
34985324fb0dSmrg
34990ed5401bSmrg	if (version == 9) {
35000ed5401bSmrg		cached_cmd_ptr = cached_cmd_gfx9;
35010ed5401bSmrg		cached_cmd_size = sizeof(cached_cmd_gfx9);
35020ed5401bSmrg	} else if (version == 10) {
35030ed5401bSmrg		cached_cmd_ptr = cached_cmd_gfx10;
35040ed5401bSmrg		cached_cmd_size = sizeof(cached_cmd_gfx10);
35050ed5401bSmrg	}
35065324fb0dSmrg
35075324fb0dSmrg	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
35089bd392adSmrg	if (hang_slow)
35099bd392adSmrg		*(ptr + i + 12) = 0x8000800;
35105324fb0dSmrg	i += cached_cmd_size/sizeof(uint32_t);
35115324fb0dSmrg
35120ed5401bSmrg	if (version == 10) {
35130ed5401bSmrg		/* mmCB_RMI_GL2_CACHE_CONTROL */
35140ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
35150ed5401bSmrg		ptr[i++] = 0x104;
35160ed5401bSmrg		ptr[i++] = 0x40aa0055;
35170ed5401bSmrg		/* mmDB_RMI_L2_CACHE_CONTROL */
35180ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
35190ed5401bSmrg		ptr[i++] = 0x1f;
35200ed5401bSmrg		ptr[i++] = 0x2a0055;
35210ed5401bSmrg	}
35220ed5401bSmrg
35235324fb0dSmrg	return i;
35245324fb0dSmrg}
35255324fb0dSmrg
35265324fb0dSmrgstatic int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
35275324fb0dSmrg						  int ps_type,
35289bd392adSmrg						  uint64_t shader_addr,
35290ed5401bSmrg						  uint32_t version,
35309bd392adSmrg						  int hang_slow)
35315324fb0dSmrg{
35325324fb0dSmrg	int i = 0;
35335324fb0dSmrg
35345324fb0dSmrg	/* mmPA_CL_VS_OUT_CNTL */
35355324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
35365324fb0dSmrg	ptr[i++] = 0x207;
35375324fb0dSmrg	ptr[i++] = 0;
35385324fb0dSmrg
35390ed5401bSmrg	if (version == 9) {
35400ed5401bSmrg		/* mmSPI_SHADER_PGM_RSRC3_VS */
35410ed5401bSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
35420ed5401bSmrg		ptr[i++] = 0x46;
35430ed5401bSmrg		ptr[i++] = 0xffff;
35440ed5401bSmrg	} else if (version == 10) {
35450ed5401bSmrg		/* mmSPI_SHADER_PGM_RSRC3_VS */
35460ed5401bSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
35470ed5401bSmrg		ptr[i++] = 0x30000046;
35480ed5401bSmrg		ptr[i++] = 0xffff;
35490ed5401bSmrg		/* mmSPI_SHADER_PGM_RSRC4_VS */
35500ed5401bSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
35510ed5401bSmrg		ptr[i++] = 0x30000041;
35520ed5401bSmrg		ptr[i++] = 0xffff;
35530ed5401bSmrg	}
35545324fb0dSmrg
35555324fb0dSmrg	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
35565324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
35575324fb0dSmrg	ptr[i++] = 0x48;
35585324fb0dSmrg	ptr[i++] = shader_addr >> 8;
35595324fb0dSmrg	ptr[i++] = shader_addr >> 40;
35605324fb0dSmrg
35615324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC1_VS */
35625324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
35635324fb0dSmrg	ptr[i++] = 0x4a;
35640ed5401bSmrg	if (version == 9)
35650ed5401bSmrg		ptr[i++] = 0xc0081;
35660ed5401bSmrg	else if (version == 10)
35670ed5401bSmrg		ptr[i++] = 0xc0041;
35685324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC2_VS */
35695324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
35705324fb0dSmrg	ptr[i++] = 0x4b;
35715324fb0dSmrg	ptr[i++] = 0x18;
35725324fb0dSmrg
35735324fb0dSmrg	/* mmSPI_VS_OUT_CONFIG */
35745324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
35755324fb0dSmrg	ptr[i++] = 0x1b1;
35765324fb0dSmrg	ptr[i++] = 2;
35775324fb0dSmrg
35785324fb0dSmrg	/* mmSPI_SHADER_POS_FORMAT */
35795324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
35805324fb0dSmrg	ptr[i++] = 0x1c3;
35815324fb0dSmrg	ptr[i++] = 4;
35825324fb0dSmrg
35835324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
35845324fb0dSmrg	ptr[i++] = 0x4c;
35855324fb0dSmrg	i += 2;
35869bd392adSmrg	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
35879bd392adSmrg	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
35885324fb0dSmrg
35895324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
35905324fb0dSmrg	ptr[i++] = 0x50;
35915324fb0dSmrg	i += 2;
35925324fb0dSmrg	if (ps_type == PS_CONST) {
35935324fb0dSmrg		i += 2;
35945324fb0dSmrg	} else if (ps_type == PS_TEX) {
35955324fb0dSmrg		ptr[i++] = 0x3f800000;
35965324fb0dSmrg		ptr[i++] = 0x3f800000;
35975324fb0dSmrg	}
35985324fb0dSmrg
35995324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
36005324fb0dSmrg	ptr[i++] = 0x54;
36015324fb0dSmrg	i += 4;
36025324fb0dSmrg
36035324fb0dSmrg	return i;
36045324fb0dSmrg}
36055324fb0dSmrg
36065324fb0dSmrgstatic int amdgpu_draw_ps_write2hw(uint32_t *ptr,
36075324fb0dSmrg				   int ps_type,
36080ed5401bSmrg				   uint64_t shader_addr,
36090ed5401bSmrg				   uint32_t version)
36105324fb0dSmrg{
36115324fb0dSmrg	int i, j;
36125324fb0dSmrg	const uint32_t *sh_registers;
36135324fb0dSmrg	const uint32_t *context_registers;
36145324fb0dSmrg	uint32_t num_sh_reg, num_context_reg;
36155324fb0dSmrg
36165324fb0dSmrg	if (ps_type == PS_CONST) {
36170ed5401bSmrg		if (version == 9) {
36180ed5401bSmrg			sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
36190ed5401bSmrg			num_sh_reg = ps_num_sh_registers_gfx9;
36200ed5401bSmrg		} else if (version == 10) {
36210ed5401bSmrg			sh_registers = (const uint32_t *)ps_const_sh_registers_gfx10;
36220ed5401bSmrg			num_sh_reg = ps_num_sh_registers_gfx10;
36230ed5401bSmrg		}
36245324fb0dSmrg		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
36255324fb0dSmrg		num_context_reg = ps_num_context_registers_gfx9;
36265324fb0dSmrg	} else if (ps_type == PS_TEX) {
36275324fb0dSmrg		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
36285324fb0dSmrg		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
36295324fb0dSmrg		num_sh_reg = ps_num_sh_registers_gfx9;
36305324fb0dSmrg		num_context_reg = ps_num_context_registers_gfx9;
36315324fb0dSmrg	}
36325324fb0dSmrg
36335324fb0dSmrg	i = 0;
36345324fb0dSmrg
36350ed5401bSmrg	if (version == 9) {
36360ed5401bSmrg		/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
36370ed5401bSmrg		   0x2c08   SPI_SHADER_PGM_LO_PS
36380ed5401bSmrg		   0x2c09   SPI_SHADER_PGM_HI_PS */
36390ed5401bSmrg		/* multiplicator 9 is from  SPI_SHADER_COL_FORMAT */
36400ed5401bSmrg		shader_addr += 256 * 9;
36410ed5401bSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
36420ed5401bSmrg		ptr[i++] = 0x7;
36430ed5401bSmrg		ptr[i++] = 0xffff;
36440ed5401bSmrg		ptr[i++] = shader_addr >> 8;
36450ed5401bSmrg		ptr[i++] = shader_addr >> 40;
36460ed5401bSmrg	} else if (version == 10) {
36470ed5401bSmrg		shader_addr += 256 * 9;
36480ed5401bSmrg		/* 0x2c08	 SPI_SHADER_PGM_LO_PS
36490ed5401bSmrg		     0x2c09	 SPI_SHADER_PGM_HI_PS */
36500ed5401bSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
36510ed5401bSmrg		ptr[i++] = 0x8;
36520ed5401bSmrg		ptr[i++] = shader_addr >> 8;
36530ed5401bSmrg		ptr[i++] = shader_addr >> 40;
36540ed5401bSmrg
36550ed5401bSmrg		/* mmSPI_SHADER_PGM_RSRC3_PS */
36560ed5401bSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
36570ed5401bSmrg		ptr[i++] = 0x30000007;
36580ed5401bSmrg		ptr[i++] = 0xffff;
36590ed5401bSmrg		/* mmSPI_SHADER_PGM_RSRC4_PS */
36600ed5401bSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
36610ed5401bSmrg		ptr[i++] = 0x30000001;
36620ed5401bSmrg		ptr[i++] = 0xffff;
36630ed5401bSmrg	}
36645324fb0dSmrg
36655324fb0dSmrg	for (j = 0; j < num_sh_reg; j++) {
36665324fb0dSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
36675324fb0dSmrg		ptr[i++] = sh_registers[j * 2] - 0x2c00;
36685324fb0dSmrg		ptr[i++] = sh_registers[j * 2 + 1];
36695324fb0dSmrg	}
36705324fb0dSmrg
36715324fb0dSmrg	for (j = 0; j < num_context_reg; j++) {
36725324fb0dSmrg		if (context_registers[j * 2] != 0xA1C5) {
36735324fb0dSmrg			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
36745324fb0dSmrg			ptr[i++] = context_registers[j * 2] - 0xa000;
36755324fb0dSmrg			ptr[i++] = context_registers[j * 2 + 1];
36765324fb0dSmrg		}
36775324fb0dSmrg
36785324fb0dSmrg		if (context_registers[j * 2] == 0xA1B4) {
36795324fb0dSmrg			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
36805324fb0dSmrg			ptr[i++] = 0x1b3;
36815324fb0dSmrg			ptr[i++] = 2;
36825324fb0dSmrg		}
36835324fb0dSmrg	}
36845324fb0dSmrg
36855324fb0dSmrg	return i;
36865324fb0dSmrg}
36875324fb0dSmrg
36880ed5401bSmrgstatic int amdgpu_draw_draw(uint32_t *ptr, uint32_t version)
36895324fb0dSmrg{
36905324fb0dSmrg	int i = 0;
36915324fb0dSmrg
36920ed5401bSmrg	if (version == 9) {
36930ed5401bSmrg		/* mmIA_MULTI_VGT_PARAM */
36940ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
36950ed5401bSmrg		ptr[i++] = 0x40000258;
36960ed5401bSmrg		ptr[i++] = 0xd00ff;
36970ed5401bSmrg		/* mmVGT_PRIMITIVE_TYPE */
36980ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
36990ed5401bSmrg		ptr[i++] = 0x10000242;
37000ed5401bSmrg		ptr[i++] = 0x11;
37010ed5401bSmrg	} else if (version == 10) {
37020ed5401bSmrg		/* mmGE_CNTL */
37030ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
37040ed5401bSmrg		ptr[i++] = 0x25b;
37050ed5401bSmrg		ptr[i++] = 0xff;
37060ed5401bSmrg		/* mmVGT_PRIMITIVE_TYPE */
37070ed5401bSmrg		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
37080ed5401bSmrg		ptr[i++] = 0x242;
37090ed5401bSmrg		ptr[i++] = 0x11;
37100ed5401bSmrg	}
37115324fb0dSmrg
37125324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
37135324fb0dSmrg	ptr[i++] = 3;
37145324fb0dSmrg	ptr[i++] = 2;
37155324fb0dSmrg
37165324fb0dSmrg	return i;
37175324fb0dSmrg}
37185324fb0dSmrg
37195324fb0dSmrgvoid amdgpu_memset_draw(amdgpu_device_handle device_handle,
37205324fb0dSmrg			amdgpu_bo_handle bo_shader_ps,
37215324fb0dSmrg			amdgpu_bo_handle bo_shader_vs,
37225324fb0dSmrg			uint64_t mc_address_shader_ps,
37235324fb0dSmrg			uint64_t mc_address_shader_vs,
37240ed5401bSmrg			uint32_t ring_id, uint32_t version)
37255324fb0dSmrg{
37265324fb0dSmrg	amdgpu_context_handle context_handle;
37275324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
37285324fb0dSmrg	volatile unsigned char *ptr_dst;
37295324fb0dSmrg	uint32_t *ptr_cmd;
37305324fb0dSmrg	uint64_t mc_address_dst, mc_address_cmd;
37315324fb0dSmrg	amdgpu_va_handle va_dst, va_cmd;
37325324fb0dSmrg	int i, r;
37335324fb0dSmrg	int bo_dst_size = 16384;
37345324fb0dSmrg	int bo_cmd_size = 4096;
37355324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
37365324fb0dSmrg	struct amdgpu_cs_ib_info ib_info = {0};
37375324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
37385324fb0dSmrg	uint32_t expired;
37395324fb0dSmrg	amdgpu_bo_list_handle bo_list;
37405324fb0dSmrg
37415324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
37425324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
37435324fb0dSmrg
37445324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
37455324fb0dSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
37465324fb0dSmrg					&bo_cmd, (void **)&ptr_cmd,
37475324fb0dSmrg					&mc_address_cmd, &va_cmd);
37485324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
37495324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
37505324fb0dSmrg
37515324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
37525324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
37535324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
37545324fb0dSmrg					&mc_address_dst, &va_dst);
37555324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
37565324fb0dSmrg
37575324fb0dSmrg	i = 0;
37580ed5401bSmrg	i += amdgpu_draw_init(ptr_cmd + i, version);
37595324fb0dSmrg
37600ed5401bSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0);
37615324fb0dSmrg
37620ed5401bSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0);
37635324fb0dSmrg
37640ed5401bSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs,
37650ed5401bSmrg						    version, 0);
37665324fb0dSmrg
37670ed5401bSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps, version);
37685324fb0dSmrg
37695324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
37705324fb0dSmrg	ptr_cmd[i++] = 0xc;
37715324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
37725324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
37735324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
37745324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
37755324fb0dSmrg
37760ed5401bSmrg	i += amdgpu_draw_draw(ptr_cmd + i, version);
37775324fb0dSmrg
37785324fb0dSmrg	while (i & 7)
37795324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
37805324fb0dSmrg
37815324fb0dSmrg	resources[0] = bo_dst;
37825324fb0dSmrg	resources[1] = bo_shader_ps;
37835324fb0dSmrg	resources[2] = bo_shader_vs;
37845324fb0dSmrg	resources[3] = bo_cmd;
37859bd392adSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
37865324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
37875324fb0dSmrg
37885324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
37895324fb0dSmrg	ib_info.size = i;
37905324fb0dSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
37915324fb0dSmrg	ibs_request.ring = ring_id;
37925324fb0dSmrg	ibs_request.resources = bo_list;
37935324fb0dSmrg	ibs_request.number_of_ibs = 1;
37945324fb0dSmrg	ibs_request.ibs = &ib_info;
37955324fb0dSmrg	ibs_request.fence_info.handle = NULL;
37965324fb0dSmrg
37975324fb0dSmrg	/* submit CS */
37985324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
37995324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
38005324fb0dSmrg
38015324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
38025324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
38035324fb0dSmrg
38045324fb0dSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
38055324fb0dSmrg	fence_status.ip_instance = 0;
38065324fb0dSmrg	fence_status.ring = ring_id;
38075324fb0dSmrg	fence_status.context = context_handle;
38085324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
38095324fb0dSmrg
38105324fb0dSmrg	/* wait for IB accomplished */
38115324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
38125324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
38135324fb0dSmrg					 0, &expired);
38145324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
38155324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
38165324fb0dSmrg
38175324fb0dSmrg	/* verify if memset test result meets with expected */
38185324fb0dSmrg	i = 0;
38195324fb0dSmrg	while(i < bo_dst_size) {
38205324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
38215324fb0dSmrg	}
38225324fb0dSmrg
38235324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
38245324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
38255324fb0dSmrg
38265324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
38275324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
38285324fb0dSmrg
38295324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
38305324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
38315324fb0dSmrg}
38325324fb0dSmrg
38335324fb0dSmrgstatic void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
38340ed5401bSmrg				    uint32_t ring, int version)
38355324fb0dSmrg{
38365324fb0dSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
38375324fb0dSmrg	void *ptr_shader_ps;
38385324fb0dSmrg	void *ptr_shader_vs;
38395324fb0dSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
38405324fb0dSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
38415324fb0dSmrg	int r;
38425324fb0dSmrg	int bo_shader_size = 4096;
38435324fb0dSmrg
38445324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
38455324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
38465324fb0dSmrg					&bo_shader_ps, &ptr_shader_ps,
38475324fb0dSmrg					&mc_address_shader_ps, &va_shader_ps);
38485324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
384988f8a8d2Smrg	memset(ptr_shader_ps, 0, bo_shader_size);
38505324fb0dSmrg
38515324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
38525324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
38535324fb0dSmrg					&bo_shader_vs, &ptr_shader_vs,
38545324fb0dSmrg					&mc_address_shader_vs, &va_shader_vs);
38555324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
385688f8a8d2Smrg	memset(ptr_shader_vs, 0, bo_shader_size);
38575324fb0dSmrg
38580ed5401bSmrg	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST, version);
38595324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
38605324fb0dSmrg
38610ed5401bSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
38625324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
38635324fb0dSmrg
38645324fb0dSmrg	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
38650ed5401bSmrg			mc_address_shader_ps, mc_address_shader_vs,
38660ed5401bSmrg			ring, version);
38675324fb0dSmrg
38685324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
38695324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
38705324fb0dSmrg
38715324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
38725324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
38735324fb0dSmrg}
38745324fb0dSmrg
38755324fb0dSmrgstatic void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
38765324fb0dSmrg			       amdgpu_bo_handle bo_shader_ps,
38775324fb0dSmrg			       amdgpu_bo_handle bo_shader_vs,
38785324fb0dSmrg			       uint64_t mc_address_shader_ps,
38795324fb0dSmrg			       uint64_t mc_address_shader_vs,
38800ed5401bSmrg			       uint32_t ring, int version, int hang)
38815324fb0dSmrg{
38825324fb0dSmrg	amdgpu_context_handle context_handle;
38835324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
38845324fb0dSmrg	volatile unsigned char *ptr_dst;
38855324fb0dSmrg	unsigned char *ptr_src;
38865324fb0dSmrg	uint32_t *ptr_cmd;
38875324fb0dSmrg	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
38885324fb0dSmrg	amdgpu_va_handle va_dst, va_src, va_cmd;
38895324fb0dSmrg	int i, r;
38905324fb0dSmrg	int bo_size = 16384;
38915324fb0dSmrg	int bo_cmd_size = 4096;
38925324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
38935324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
38949bd392adSmrg	uint32_t hang_state, hangs;
38959bd392adSmrg	uint32_t expired;
38965324fb0dSmrg	amdgpu_bo_list_handle bo_list;
38975324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
38985324fb0dSmrg
38995324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
39005324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
39015324fb0dSmrg
39025324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
39035324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
39045324fb0dSmrg				    &bo_cmd, (void **)&ptr_cmd,
39055324fb0dSmrg				    &mc_address_cmd, &va_cmd);
39065324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
39075324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
39085324fb0dSmrg
39095324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
39105324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
39115324fb0dSmrg					&bo_src, (void **)&ptr_src,
39125324fb0dSmrg					&mc_address_src, &va_src);
39135324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
39145324fb0dSmrg
39155324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
39165324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
39175324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
39185324fb0dSmrg					&mc_address_dst, &va_dst);
39195324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
39205324fb0dSmrg
39215324fb0dSmrg	memset(ptr_src, 0x55, bo_size);
39225324fb0dSmrg
39235324fb0dSmrg	i = 0;
39240ed5401bSmrg	i += amdgpu_draw_init(ptr_cmd + i, version);
39255324fb0dSmrg
39260ed5401bSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0);
39275324fb0dSmrg
39280ed5401bSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0);
39295324fb0dSmrg
39300ed5401bSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs,
39310ed5401bSmrg						    version, 0);
39325324fb0dSmrg
39330ed5401bSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version);
39345324fb0dSmrg
39355324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
39360ed5401bSmrg	if (version == 9) {
39370ed5401bSmrg		ptr_cmd[i++] = 0xc;
39380ed5401bSmrg		ptr_cmd[i++] = mc_address_src >> 8;
39390ed5401bSmrg		ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
39400ed5401bSmrg		ptr_cmd[i++] = 0x7c01f;
39410ed5401bSmrg		ptr_cmd[i++] = 0x90500fac;
39420ed5401bSmrg		ptr_cmd[i++] = 0x3e000;
39430ed5401bSmrg		i += 3;
39440ed5401bSmrg	} else if (version == 10) {
39450ed5401bSmrg		ptr_cmd[i++] = 0xc;
39460ed5401bSmrg		ptr_cmd[i++] = mc_address_src >> 8;
39470ed5401bSmrg		ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000;
39480ed5401bSmrg		ptr_cmd[i++] = 0x8007c007;
39490ed5401bSmrg		ptr_cmd[i++] = 0x90500fac;
39500ed5401bSmrg		i += 2;
39510ed5401bSmrg		ptr_cmd[i++] = 0x400;
39520ed5401bSmrg		i++;
39530ed5401bSmrg	}
39545324fb0dSmrg
39555324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
39565324fb0dSmrg	ptr_cmd[i++] = 0x14;
39575324fb0dSmrg	ptr_cmd[i++] = 0x92;
39585324fb0dSmrg	i += 3;
39595324fb0dSmrg
396088f8a8d2Smrg	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
39615324fb0dSmrg	ptr_cmd[i++] = 0x191;
39625324fb0dSmrg	ptr_cmd[i++] = 0;
39635324fb0dSmrg
39640ed5401bSmrg	i += amdgpu_draw_draw(ptr_cmd + i, version);
39655324fb0dSmrg
39665324fb0dSmrg	while (i & 7)
39675324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
39685324fb0dSmrg
39695324fb0dSmrg	resources[0] = bo_dst;
39705324fb0dSmrg	resources[1] = bo_src;
39715324fb0dSmrg	resources[2] = bo_shader_ps;
39725324fb0dSmrg	resources[3] = bo_shader_vs;
39735324fb0dSmrg	resources[4] = bo_cmd;
39745324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
39755324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
39765324fb0dSmrg
39775324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
39785324fb0dSmrg	ib_info.size = i;
39795324fb0dSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
39805324fb0dSmrg	ibs_request.ring = ring;
39815324fb0dSmrg	ibs_request.resources = bo_list;
39825324fb0dSmrg	ibs_request.number_of_ibs = 1;
39835324fb0dSmrg	ibs_request.ibs = &ib_info;
39845324fb0dSmrg	ibs_request.fence_info.handle = NULL;
39855324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
39865324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
39875324fb0dSmrg
39885324fb0dSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
39895324fb0dSmrg	fence_status.ip_instance = 0;
39905324fb0dSmrg	fence_status.ring = ring;
39915324fb0dSmrg	fence_status.context = context_handle;
39925324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
39935324fb0dSmrg
39945324fb0dSmrg	/* wait for IB accomplished */
39955324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
39965324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
39975324fb0dSmrg					 0, &expired);
39989bd392adSmrg	if (!hang) {
39999bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
40009bd392adSmrg		CU_ASSERT_EQUAL(expired, true);
40015324fb0dSmrg
40029bd392adSmrg		/* verify if memcpy test result meets with expected */
40039bd392adSmrg		i = 0;
40049bd392adSmrg		while(i < bo_size) {
40059bd392adSmrg			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
40069bd392adSmrg			i++;
40079bd392adSmrg		}
40089bd392adSmrg	} else {
40099bd392adSmrg		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
40109bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
40119bd392adSmrg		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
40125324fb0dSmrg	}
40135324fb0dSmrg
40145324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
40155324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
40165324fb0dSmrg
40175324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
40185324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
40195324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
40205324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
40215324fb0dSmrg
40225324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
40235324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
40245324fb0dSmrg
40255324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
40265324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
40275324fb0dSmrg}
40285324fb0dSmrg
40299bd392adSmrgvoid amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
40300ed5401bSmrg			     int version, int hang)
40315324fb0dSmrg{
40325324fb0dSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
40335324fb0dSmrg	void *ptr_shader_ps;
40345324fb0dSmrg	void *ptr_shader_vs;
40355324fb0dSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
40365324fb0dSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
40375324fb0dSmrg	int bo_shader_size = 4096;
40389bd392adSmrg	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
40395324fb0dSmrg	int r;
40405324fb0dSmrg
40415324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
40425324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
40435324fb0dSmrg					&bo_shader_ps, &ptr_shader_ps,
40445324fb0dSmrg					&mc_address_shader_ps, &va_shader_ps);
40455324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
404688f8a8d2Smrg	memset(ptr_shader_ps, 0, bo_shader_size);
40475324fb0dSmrg
40485324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
40495324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
40505324fb0dSmrg					&bo_shader_vs, &ptr_shader_vs,
40515324fb0dSmrg					&mc_address_shader_vs, &va_shader_vs);
40525324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
405388f8a8d2Smrg	memset(ptr_shader_vs, 0, bo_shader_size);
40545324fb0dSmrg
40550ed5401bSmrg	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type, version);
40565324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
40575324fb0dSmrg
40580ed5401bSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
40595324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
40605324fb0dSmrg
40615324fb0dSmrg	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
40620ed5401bSmrg			mc_address_shader_ps, mc_address_shader_vs,
40630ed5401bSmrg			ring, version, hang);
40645324fb0dSmrg
40655324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
40665324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
40675324fb0dSmrg
40685324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
40695324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
40705324fb0dSmrg}
40715324fb0dSmrg
40725324fb0dSmrgstatic void amdgpu_draw_test(void)
40735324fb0dSmrg{
40745324fb0dSmrg	int r;
40755324fb0dSmrg	struct drm_amdgpu_info_hw_ip info;
40760ed5401bSmrg	uint32_t ring_id, version;
40775324fb0dSmrg
40785324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
40795324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
408088f8a8d2Smrg	if (!info.available_rings)
408188f8a8d2Smrg		printf("SKIP ... as there's no graphics ring\n");
40825324fb0dSmrg
40830ed5401bSmrg	version = info.hw_ip_version_major;
40840ed5401bSmrg	if (version != 9 && version != 10) {
40850ed5401bSmrg		printf("SKIP ... unsupported gfx version %d\n", version);
40860ed5401bSmrg		return;
40870ed5401bSmrg	}
40880ed5401bSmrg
40895324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
40900ed5401bSmrg		amdgpu_memset_draw_test(device_handle, ring_id, version);
40910ed5401bSmrg		amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0);
40925324fb0dSmrg	}
40935324fb0dSmrg}
409488f8a8d2Smrg
40950ed5401bSmrgvoid amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring, int version)
40969bd392adSmrg{
40979bd392adSmrg	amdgpu_context_handle context_handle;
40989bd392adSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
40999bd392adSmrg	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
41009bd392adSmrg	void *ptr_shader_ps;
41019bd392adSmrg	void *ptr_shader_vs;
41029bd392adSmrg	volatile unsigned char *ptr_dst;
41039bd392adSmrg	unsigned char *ptr_src;
41049bd392adSmrg	uint32_t *ptr_cmd;
41059bd392adSmrg	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
41069bd392adSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
41079bd392adSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
41089bd392adSmrg	amdgpu_va_handle va_dst, va_src, va_cmd;
41099bd392adSmrg	struct amdgpu_gpu_info gpu_info = {0};
41109bd392adSmrg	int i, r;
41119bd392adSmrg	int bo_size = 0x4000000;
41129bd392adSmrg	int bo_shader_ps_size = 0x400000;
41139bd392adSmrg	int bo_shader_vs_size = 4096;
41149bd392adSmrg	int bo_cmd_size = 4096;
41159bd392adSmrg	struct amdgpu_cs_request ibs_request = {0};
41169bd392adSmrg	struct amdgpu_cs_ib_info ib_info= {0};
41179bd392adSmrg	uint32_t hang_state, hangs, expired;
41189bd392adSmrg	amdgpu_bo_list_handle bo_list;
41199bd392adSmrg	struct amdgpu_cs_fence fence_status = {0};
41209bd392adSmrg
41219bd392adSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
41229bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
41239bd392adSmrg
41249bd392adSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
41259bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
41269bd392adSmrg
41279bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
41289bd392adSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
41299bd392adSmrg				    &bo_cmd, (void **)&ptr_cmd,
41309bd392adSmrg				    &mc_address_cmd, &va_cmd);
41319bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
41329bd392adSmrg	memset(ptr_cmd, 0, bo_cmd_size);
41339bd392adSmrg
41349bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
41359bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
41369bd392adSmrg					&bo_shader_ps, &ptr_shader_ps,
41379bd392adSmrg					&mc_address_shader_ps, &va_shader_ps);
41389bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
41399bd392adSmrg	memset(ptr_shader_ps, 0, bo_shader_ps_size);
41409bd392adSmrg
41419bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
41429bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
41439bd392adSmrg					&bo_shader_vs, &ptr_shader_vs,
41449bd392adSmrg					&mc_address_shader_vs, &va_shader_vs);
41459bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
41469bd392adSmrg	memset(ptr_shader_vs, 0, bo_shader_vs_size);
41479bd392adSmrg
41489bd392adSmrg	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
41499bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
41509bd392adSmrg
41510ed5401bSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
41529bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
41539bd392adSmrg
41549bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
41559bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
41569bd392adSmrg					&bo_src, (void **)&ptr_src,
41579bd392adSmrg					&mc_address_src, &va_src);
41589bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
41599bd392adSmrg
41609bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
41619bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
41629bd392adSmrg					&bo_dst, (void **)&ptr_dst,
41639bd392adSmrg					&mc_address_dst, &va_dst);
41649bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
41659bd392adSmrg
41669bd392adSmrg	memset(ptr_src, 0x55, bo_size);
41679bd392adSmrg
41689bd392adSmrg	i = 0;
41690ed5401bSmrg	i += amdgpu_draw_init(ptr_cmd + i, version);
41709bd392adSmrg
41710ed5401bSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 1);
41729bd392adSmrg
41730ed5401bSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 1);
41749bd392adSmrg
41759bd392adSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
41760ed5401bSmrg							mc_address_shader_vs, version, 1);
41779bd392adSmrg
41780ed5401bSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version);
41799bd392adSmrg
41809bd392adSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
41810ed5401bSmrg
41820ed5401bSmrg	if (version == 9) {
41830ed5401bSmrg		ptr_cmd[i++] = 0xc;
41840ed5401bSmrg		ptr_cmd[i++] = mc_address_src >> 8;
41850ed5401bSmrg		ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
41860ed5401bSmrg		ptr_cmd[i++] = 0x1ffcfff;
41870ed5401bSmrg		ptr_cmd[i++] = 0x90500fac;
41880ed5401bSmrg		ptr_cmd[i++] = 0x1ffe000;
41890ed5401bSmrg		i += 3;
41900ed5401bSmrg	} else if (version == 10) {
41910ed5401bSmrg		ptr_cmd[i++] = 0xc;
41920ed5401bSmrg		ptr_cmd[i++] = mc_address_src >> 8;
41930ed5401bSmrg		ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000;
41940ed5401bSmrg		ptr_cmd[i++] = 0x81ffc1ff;
41950ed5401bSmrg		ptr_cmd[i++] = 0x90500fac;
41960ed5401bSmrg		i += 4;
41970ed5401bSmrg	}
41989bd392adSmrg
41999bd392adSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
42009bd392adSmrg	ptr_cmd[i++] = 0x14;
42019bd392adSmrg	ptr_cmd[i++] = 0x92;
42029bd392adSmrg	i += 3;
42039bd392adSmrg
42049bd392adSmrg	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
42059bd392adSmrg	ptr_cmd[i++] = 0x191;
42069bd392adSmrg	ptr_cmd[i++] = 0;
42079bd392adSmrg
42080ed5401bSmrg	i += amdgpu_draw_draw(ptr_cmd + i, version);
42099bd392adSmrg
42109bd392adSmrg	while (i & 7)
42119bd392adSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
42129bd392adSmrg
42139bd392adSmrg	resources[0] = bo_dst;
42149bd392adSmrg	resources[1] = bo_src;
42159bd392adSmrg	resources[2] = bo_shader_ps;
42169bd392adSmrg	resources[3] = bo_shader_vs;
42179bd392adSmrg	resources[4] = bo_cmd;
42189bd392adSmrg	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
42199bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
42209bd392adSmrg
42219bd392adSmrg	ib_info.ib_mc_address = mc_address_cmd;
42229bd392adSmrg	ib_info.size = i;
42239bd392adSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
42249bd392adSmrg	ibs_request.ring = ring;
42259bd392adSmrg	ibs_request.resources = bo_list;
42269bd392adSmrg	ibs_request.number_of_ibs = 1;
42279bd392adSmrg	ibs_request.ibs = &ib_info;
42289bd392adSmrg	ibs_request.fence_info.handle = NULL;
42299bd392adSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
42309bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
42319bd392adSmrg
42329bd392adSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
42339bd392adSmrg	fence_status.ip_instance = 0;
42349bd392adSmrg	fence_status.ring = ring;
42359bd392adSmrg	fence_status.context = context_handle;
42369bd392adSmrg	fence_status.fence = ibs_request.seq_no;
42379bd392adSmrg
42389bd392adSmrg	/* wait for IB accomplished */
42399bd392adSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
42409bd392adSmrg					 AMDGPU_TIMEOUT_INFINITE,
42419bd392adSmrg					 0, &expired);
42429bd392adSmrg
42439bd392adSmrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
42449bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
42459bd392adSmrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
42469bd392adSmrg
42479bd392adSmrg	r = amdgpu_bo_list_destroy(bo_list);
42489bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
42499bd392adSmrg
42509bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
42519bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
42529bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
42539bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
42549bd392adSmrg
42559bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
42569bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
42579bd392adSmrg
42589bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
42599bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
42609bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
42619bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
42629bd392adSmrg
42639bd392adSmrg	r = amdgpu_cs_ctx_free(context_handle);
42649bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
42659bd392adSmrg}
42669bd392adSmrg
426788f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void)
426888f8a8d2Smrg{
426988f8a8d2Smrg	int r;
427088f8a8d2Smrg	char debugfs_path[256], tmp[10];
427188f8a8d2Smrg	int fd;
427288f8a8d2Smrg	struct stat sbuf;
427388f8a8d2Smrg	amdgpu_context_handle context_handle;
427488f8a8d2Smrg	uint32_t hang_state, hangs;
427588f8a8d2Smrg
427688f8a8d2Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
427788f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
427888f8a8d2Smrg
427988f8a8d2Smrg	r = fstat(drm_amdgpu[0], &sbuf);
428088f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
428188f8a8d2Smrg
428288f8a8d2Smrg	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
428388f8a8d2Smrg	fd = open(debugfs_path, O_RDONLY);
428488f8a8d2Smrg	CU_ASSERT(fd >= 0);
428588f8a8d2Smrg
428688f8a8d2Smrg	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
428788f8a8d2Smrg	CU_ASSERT(r > 0);
428888f8a8d2Smrg
428988f8a8d2Smrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
429088f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
429188f8a8d2Smrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
429288f8a8d2Smrg
429388f8a8d2Smrg	close(fd);
429488f8a8d2Smrg	r = amdgpu_cs_ctx_free(context_handle);
429588f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
429688f8a8d2Smrg
429788f8a8d2Smrg	amdgpu_compute_dispatch_test();
429888f8a8d2Smrg	amdgpu_gfx_dispatch_test();
429988f8a8d2Smrg}
43000ed5401bSmrg
43010ed5401bSmrgstatic void amdgpu_stable_pstate_test(void)
43020ed5401bSmrg{
43030ed5401bSmrg	int r;
43040ed5401bSmrg	amdgpu_context_handle context_handle;
43050ed5401bSmrg	uint32_t current_pstate = 0, new_pstate = 0;
43060ed5401bSmrg
43070ed5401bSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
43080ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
43090ed5401bSmrg
43100ed5401bSmrg	r = amdgpu_cs_ctx_stable_pstate(context_handle,
43110ed5401bSmrg					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
43120ed5401bSmrg					0, &current_pstate);
43130ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
43140ed5401bSmrg	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE);
43150ed5401bSmrg
43160ed5401bSmrg	r = amdgpu_cs_ctx_stable_pstate(context_handle,
43170ed5401bSmrg					AMDGPU_CTX_OP_SET_STABLE_PSTATE,
43180ed5401bSmrg					AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL);
43190ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
43200ed5401bSmrg
43210ed5401bSmrg	r = amdgpu_cs_ctx_stable_pstate(context_handle,
43220ed5401bSmrg					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
43230ed5401bSmrg					0, &new_pstate);
43240ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
43250ed5401bSmrg	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK);
43260ed5401bSmrg
43270ed5401bSmrg	r = amdgpu_cs_ctx_free(context_handle);
43280ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
43290ed5401bSmrg}
4330