13f012e29Smrg/*
23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc.
33f012e29Smrg *
43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a
53f012e29Smrg * copy of this software and associated documentation files (the "Software"),
63f012e29Smrg * to deal in the Software without restriction, including without limitation
73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the
93f012e29Smrg * Software is furnished to do so, subject to the following conditions:
103f012e29Smrg *
113f012e29Smrg * The above copyright notice and this permission notice shall be included in
123f012e29Smrg * all copies or substantial portions of the Software.
133f012e29Smrg *
143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE.
213f012e29Smrg *
223f012e29Smrg*/
233f012e29Smrg
243f012e29Smrg#include <stdio.h>
253f012e29Smrg#include <stdlib.h>
263f012e29Smrg#include <unistd.h>
2788f8a8d2Smrg#include <sys/types.h>
2888f8a8d2Smrg#ifdef MAJOR_IN_SYSMACROS
2988f8a8d2Smrg#include <sys/sysmacros.h>
3088f8a8d2Smrg#endif
3188f8a8d2Smrg#include <sys/stat.h>
3288f8a8d2Smrg#include <fcntl.h>
339bd392adSmrg#if HAVE_ALLOCA_H
343f012e29Smrg# include <alloca.h>
353f012e29Smrg#endif
3600a23bdaSmrg#include <sys/wait.h>
373f012e29Smrg
383f012e29Smrg#include "CUnit/Basic.h"
393f012e29Smrg
403f012e29Smrg#include "amdgpu_test.h"
413f012e29Smrg#include "amdgpu_drm.h"
4241687f09Smrg#include "amdgpu_internal.h"
437cdc0497Smrg#include "util_math.h"
443f012e29Smrg
453f012e29Smrgstatic  amdgpu_device_handle device_handle;
463f012e29Smrgstatic  uint32_t  major_version;
473f012e29Smrgstatic  uint32_t  minor_version;
48d8807b2fSmrgstatic  uint32_t  family_id;
494babd585Smrgstatic  uint32_t  chip_id;
504babd585Smrgstatic  uint32_t  chip_rev;
513f012e29Smrg
523f012e29Smrgstatic void amdgpu_query_info_test(void);
533f012e29Smrgstatic void amdgpu_command_submission_gfx(void);
543f012e29Smrgstatic void amdgpu_command_submission_compute(void);
55d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void);
563f012e29Smrgstatic void amdgpu_command_submission_sdma(void);
573f012e29Smrgstatic void amdgpu_userptr_test(void);
583f012e29Smrgstatic void amdgpu_semaphore_test(void);
5900a23bdaSmrgstatic void amdgpu_sync_dependency_test(void);
6000a23bdaSmrgstatic void amdgpu_bo_eviction_test(void);
6188f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void);
6288f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void);
635324fb0dSmrgstatic void amdgpu_draw_test(void);
6488f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void);
650ed5401bSmrgstatic void amdgpu_stable_pstate_test(void);
663f012e29Smrg
673f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
683f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
693f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
7000a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
7100a23bdaSmrg				       unsigned ip_type,
7200a23bdaSmrg				       int instance, int pm4_dw, uint32_t *pm4_src,
7300a23bdaSmrg				       int res_cnt, amdgpu_bo_handle *resources,
7400a23bdaSmrg				       struct amdgpu_cs_ib_info *ib_info,
7500a23bdaSmrg				       struct amdgpu_cs_request *ibs_request);
7641687f09Smrg
773f012e29SmrgCU_TestInfo basic_tests[] = {
783f012e29Smrg	{ "Query Info Test",  amdgpu_query_info_test },
793f012e29Smrg	{ "Userptr Test",  amdgpu_userptr_test },
8000a23bdaSmrg	{ "bo eviction Test",  amdgpu_bo_eviction_test },
813f012e29Smrg	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
823f012e29Smrg	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
83d8807b2fSmrg	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
843f012e29Smrg	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
853f012e29Smrg	{ "SW semaphore Test",  amdgpu_semaphore_test },
8600a23bdaSmrg	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
8788f8a8d2Smrg	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
8888f8a8d2Smrg	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
895324fb0dSmrg	{ "Draw Test",  amdgpu_draw_test },
9088f8a8d2Smrg	{ "GPU reset Test", amdgpu_gpu_reset_test },
910ed5401bSmrg	{ "Stable pstate Test", amdgpu_stable_pstate_test },
923f012e29Smrg	CU_TEST_INFO_NULL,
933f012e29Smrg};
949bd392adSmrg#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
953f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0
963f012e29Smrg#define SDMA_PKT_HEADER_op_mask   0x000000FF
973f012e29Smrg#define SDMA_PKT_HEADER_op_shift  0
983f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
993f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL  11
1003f012e29Smrg#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
1013f012e29Smrg	/* 0 = byte fill
1023f012e29Smrg	 * 2 = DW fill
1033f012e29Smrg	 */
1043f012e29Smrg#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
1053f012e29Smrg					(((sub_op) & 0xFF) << 8) |	\
1063f012e29Smrg					(((op) & 0xFF) << 0))
1073f012e29Smrg#define	SDMA_OPCODE_WRITE				  2
1083f012e29Smrg#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
1093f012e29Smrg#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
1103f012e29Smrg
1113f012e29Smrg#define	SDMA_OPCODE_COPY				  1
1123f012e29Smrg#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
1133f012e29Smrg
11441687f09Smrg#define	SDMA_OPCODE_ATOMIC				  10
11541687f09Smrg#		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
11641687f09Smrg        /* 0 - single_pass_atomic.
11741687f09Smrg         * 1 - loop_until_compare_satisfied.
11841687f09Smrg         */
11941687f09Smrg#		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
12041687f09Smrg		/* 0 - non-TMZ.
12141687f09Smrg		 * 1 - TMZ.
12241687f09Smrg	     */
12341687f09Smrg#		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
12441687f09Smrg		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
12541687f09Smrg		 * same as Packet 3
12641687f09Smrg		 */
12741687f09Smrg
1283f012e29Smrg#define GFX_COMPUTE_NOP  0xffff1000
1293f012e29Smrg#define SDMA_NOP  0x0
1303f012e29Smrg
1313f012e29Smrg/* PM4 */
1323f012e29Smrg#define	PACKET_TYPE0	0
1333f012e29Smrg#define	PACKET_TYPE1	1
1343f012e29Smrg#define	PACKET_TYPE2	2
1353f012e29Smrg#define	PACKET_TYPE3	3
1363f012e29Smrg
1373f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
1383f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
1393f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
1403f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
1413f012e29Smrg#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
1423f012e29Smrg			 ((reg) & 0xFFFF) |			\
1433f012e29Smrg			 ((n) & 0x3FFF) << 16)
1443f012e29Smrg#define CP_PACKET2			0x80000000
1453f012e29Smrg#define		PACKET2_PAD_SHIFT		0
1463f012e29Smrg#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
1473f012e29Smrg
1483f012e29Smrg#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
1493f012e29Smrg
1503f012e29Smrg#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
1513f012e29Smrg			 (((op) & 0xFF) << 8) |				\
1523f012e29Smrg			 ((n) & 0x3FFF) << 16)
1535324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
1543f012e29Smrg
1553f012e29Smrg/* Packet 3 types */
1563f012e29Smrg#define	PACKET3_NOP					0x10
1573f012e29Smrg
1583f012e29Smrg#define	PACKET3_WRITE_DATA				0x37
1593f012e29Smrg#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
1603f012e29Smrg		/* 0 - register
1613f012e29Smrg		 * 1 - memory (sync - via GRBM)
1623f012e29Smrg		 * 2 - gl2
1633f012e29Smrg		 * 3 - gds
1643f012e29Smrg		 * 4 - reserved
1653f012e29Smrg		 * 5 - memory (async - direct)
1663f012e29Smrg		 */
1673f012e29Smrg#define		WR_ONE_ADDR                             (1 << 16)
1683f012e29Smrg#define		WR_CONFIRM                              (1 << 20)
1693f012e29Smrg#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
1703f012e29Smrg		/* 0 - LRU
1713f012e29Smrg		 * 1 - Stream
1723f012e29Smrg		 */
1733f012e29Smrg#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
1743f012e29Smrg		/* 0 - me
1753f012e29Smrg		 * 1 - pfp
1763f012e29Smrg		 * 2 - ce
1773f012e29Smrg		 */
1783f012e29Smrg
17941687f09Smrg#define	PACKET3_ATOMIC_MEM				0x1E
18041687f09Smrg#define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
18141687f09Smrg#define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
18241687f09Smrg            /* 0 - single_pass_atomic.
18341687f09Smrg             * 1 - loop_until_compare_satisfied.
18441687f09Smrg             */
18541687f09Smrg#define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
18641687f09Smrg            /* 0 - lru.
18741687f09Smrg             * 1 - stream.
18841687f09Smrg             */
18941687f09Smrg#define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
19041687f09Smrg            /* 0 - micro_engine.
19141687f09Smrg			 */
19241687f09Smrg
1933f012e29Smrg#define	PACKET3_DMA_DATA				0x50
1943f012e29Smrg/* 1. header
1953f012e29Smrg * 2. CONTROL
1963f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0]
1973f012e29Smrg * 4. SRC_ADDR_HI [31:0]
1983f012e29Smrg * 5. DST_ADDR_LO [31:0]
1993f012e29Smrg * 6. DST_ADDR_HI [7:0]
2003f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
2013f012e29Smrg */
2023f012e29Smrg/* CONTROL */
2033f012e29Smrg#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
2043f012e29Smrg		/* 0 - ME
2053f012e29Smrg		 * 1 - PFP
2063f012e29Smrg		 */
2073f012e29Smrg#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
2083f012e29Smrg		/* 0 - LRU
2093f012e29Smrg		 * 1 - Stream
2103f012e29Smrg		 * 2 - Bypass
2113f012e29Smrg		 */
2123f012e29Smrg#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
2133f012e29Smrg#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
2143f012e29Smrg		/* 0 - DST_ADDR using DAS
2153f012e29Smrg		 * 1 - GDS
2163f012e29Smrg		 * 3 - DST_ADDR using L2
2173f012e29Smrg		 */
2183f012e29Smrg#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
2193f012e29Smrg		/* 0 - LRU
2203f012e29Smrg		 * 1 - Stream
2213f012e29Smrg		 * 2 - Bypass
2223f012e29Smrg		 */
2233f012e29Smrg#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
2243f012e29Smrg#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
2253f012e29Smrg		/* 0 - SRC_ADDR using SAS
2263f012e29Smrg		 * 1 - GDS
2273f012e29Smrg		 * 2 - DATA
2283f012e29Smrg		 * 3 - SRC_ADDR using L2
2293f012e29Smrg		 */
2303f012e29Smrg#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
2313f012e29Smrg/* COMMAND */
2323f012e29Smrg#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
2333f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
2343f012e29Smrg		/* 0 - none
2353f012e29Smrg		 * 1 - 8 in 16
2363f012e29Smrg		 * 2 - 8 in 32
2373f012e29Smrg		 * 3 - 8 in 64
2383f012e29Smrg		 */
2393f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
2403f012e29Smrg		/* 0 - none
2413f012e29Smrg		 * 1 - 8 in 16
2423f012e29Smrg		 * 2 - 8 in 32
2433f012e29Smrg		 * 3 - 8 in 64
2443f012e29Smrg		 */
2453f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
2463f012e29Smrg		/* 0 - memory
2473f012e29Smrg		 * 1 - register
2483f012e29Smrg		 */
2493f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
2503f012e29Smrg		/* 0 - memory
2513f012e29Smrg		 * 1 - register
2523f012e29Smrg		 */
2533f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
2543f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
2553f012e29Smrg#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
2563f012e29Smrg
257d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
258d8807b2fSmrg						(((b) & 0x1) << 26) |		\
259d8807b2fSmrg						(((t) & 0x1) << 23) |		\
260d8807b2fSmrg						(((s) & 0x1) << 22) |		\
261d8807b2fSmrg						(((cnt) & 0xFFFFF) << 0))
262d8807b2fSmrg#define	SDMA_OPCODE_COPY_SI	3
263d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI	13
264d8807b2fSmrg#define SDMA_NOP_SI  0xf
265d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000
266d8807b2fSmrg#define	PACKET3_DMA_DATA_SI	0x41
267d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
268d8807b2fSmrg		/* 0 - ME
269d8807b2fSmrg		 * 1 - PFP
270d8807b2fSmrg		 */
271d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
272d8807b2fSmrg		/* 0 - DST_ADDR using DAS
273d8807b2fSmrg		 * 1 - GDS
274d8807b2fSmrg		 * 3 - DST_ADDR using L2
275d8807b2fSmrg		 */
276d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
277d8807b2fSmrg		/* 0 - SRC_ADDR using SAS
278d8807b2fSmrg		 * 1 - GDS
279d8807b2fSmrg		 * 2 - DATA
280d8807b2fSmrg		 * 3 - SRC_ADDR using L2
281d8807b2fSmrg		 */
282d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
283d8807b2fSmrg
28400a23bdaSmrg
28500a23bdaSmrg#define PKT3_CONTEXT_CONTROL                   0x28
28600a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
28700a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
28800a23bdaSmrg#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
28900a23bdaSmrg
29000a23bdaSmrg#define PKT3_CLEAR_STATE                       0x12
29100a23bdaSmrg
29200a23bdaSmrg#define PKT3_SET_SH_REG                        0x76
29300a23bdaSmrg#define		PACKET3_SET_SH_REG_START			0x00002c00
29400a23bdaSmrg
2950ed5401bSmrg#define PKT3_SET_SH_REG_INDEX			0x9B
2960ed5401bSmrg
29700a23bdaSmrg#define	PACKET3_DISPATCH_DIRECT				0x15
2985324fb0dSmrg#define PACKET3_EVENT_WRITE				0x46
2995324fb0dSmrg#define PACKET3_ACQUIRE_MEM				0x58
3005324fb0dSmrg#define PACKET3_SET_CONTEXT_REG				0x69
3015324fb0dSmrg#define PACKET3_SET_UCONFIG_REG				0x79
3025324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO				0x2D
30300a23bdaSmrg/* gfx 8 */
30400a23bdaSmrg#define mmCOMPUTE_PGM_LO                                                        0x2e0c
30500a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
30600a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
30700a23bdaSmrg#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
30800a23bdaSmrg#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
30900a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
31000a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
31100a23bdaSmrg
31200a23bdaSmrg
31300a23bdaSmrg
31400a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
31500a23bdaSmrg		      ((num & 0x0000ff00) << 8) | \
31600a23bdaSmrg		      ((num & 0x00ff0000) >> 8) | \
31700a23bdaSmrg		      ((num & 0x000000ff) << 24))
31800a23bdaSmrg
31900a23bdaSmrg
32000a23bdaSmrg/* Shader code
32100a23bdaSmrg * void main()
32200a23bdaSmrg{
32300a23bdaSmrg
32400a23bdaSmrg	float x = some_input;
32500a23bdaSmrg		for (unsigned i = 0; i < 1000000; i++)
32600a23bdaSmrg  	x = sin(x);
32700a23bdaSmrg
32800a23bdaSmrg	u[0] = 42u;
32900a23bdaSmrg}
33000a23bdaSmrg*/
33100a23bdaSmrg
33200a23bdaSmrgstatic  uint32_t shader_bin[] = {
33300a23bdaSmrg	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
33400a23bdaSmrg	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
33500a23bdaSmrg	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
33600a23bdaSmrg	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
33700a23bdaSmrg};
33800a23bdaSmrg
33900a23bdaSmrg#define CODE_OFFSET 512
34000a23bdaSmrg#define DATA_OFFSET 1024
34100a23bdaSmrg
3425324fb0dSmrgenum cs_type {
3435324fb0dSmrg	CS_BUFFERCLEAR,
3449bd392adSmrg	CS_BUFFERCOPY,
3459bd392adSmrg	CS_HANG,
3469bd392adSmrg	CS_HANG_SLOW
3475324fb0dSmrg};
3485324fb0dSmrg
3495324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = {
3504babd585Smrg    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
3514babd585Smrg    0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
3524babd585Smrg    0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
3534babd585Smrg    0xbf810000
3545324fb0dSmrg};
3555324fb0dSmrg
3560ed5401bSmrgstatic const uint32_t bufferclear_cs_shader_gfx10[] = {
3570ed5401bSmrg	0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205,
3580ed5401bSmrg	0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004,
3590ed5401bSmrg	0xBF810000
3600ed5401bSmrg};
3610ed5401bSmrg
3625324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
3635324fb0dSmrg	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
3645324fb0dSmrg	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
3655324fb0dSmrg	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
3665324fb0dSmrg	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
3675324fb0dSmrg	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
3685324fb0dSmrg};
3695324fb0dSmrg
3705324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
3715324fb0dSmrg
3725324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = {
3734babd585Smrg    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
3744babd585Smrg    0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
3754babd585Smrg    0xe01c2000, 0x80010200, 0xbf810000
3765324fb0dSmrg};
3775324fb0dSmrg
3780ed5401bSmrgstatic const uint32_t buffercopy_cs_shader_gfx10[] = {
3790ed5401bSmrg	0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201,
3800ed5401bSmrg	0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000
3810ed5401bSmrg};
3820ed5401bSmrg
3835324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = {
3845324fb0dSmrg	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
3855324fb0dSmrg	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
3865324fb0dSmrg	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
3875324fb0dSmrg	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
3885324fb0dSmrg	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
3895324fb0dSmrg	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
3905324fb0dSmrg	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
3915324fb0dSmrg	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
39288f8a8d2Smrg	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
3935324fb0dSmrg	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
3945324fb0dSmrg	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
3955324fb0dSmrg	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
3965324fb0dSmrg	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
3975324fb0dSmrg	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
3985324fb0dSmrg	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
39988f8a8d2Smrg	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
40088f8a8d2Smrg	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
4015324fb0dSmrg	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
4025324fb0dSmrg	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
4035324fb0dSmrg	0xc0017900, 0x24b, 0x0
4045324fb0dSmrg};
4055324fb0dSmrg
4060ed5401bSmrgstatic const uint32_t preamblecache_gfx10[] = {
4070ed5401bSmrg	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
4080ed5401bSmrg	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
4090ed5401bSmrg	0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
4100ed5401bSmrg	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
4110ed5401bSmrg	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
4120ed5401bSmrg	0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
4130ed5401bSmrg	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
4140ed5401bSmrg	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
4150ed5401bSmrg	0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20,
4160ed5401bSmrg	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
4170ed5401bSmrg	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0,
4180ed5401bSmrg	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
4190ed5401bSmrg	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
4200ed5401bSmrg	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
4210ed5401bSmrg	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
4220ed5401bSmrg	0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
4230ed5401bSmrg	0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2,
4240ed5401bSmrg	0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0,
4250ed5401bSmrg	0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff,
4260ed5401bSmrg	0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0,
4270ed5401bSmrg	0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0,
4280ed5401bSmrg	0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
4290ed5401bSmrg	0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
4300ed5401bSmrg};
4310ed5401bSmrg
4325324fb0dSmrgenum ps_type {
4335324fb0dSmrg	PS_CONST,
4349bd392adSmrg	PS_TEX,
4359bd392adSmrg	PS_HANG,
4369bd392adSmrg	PS_HANG_SLOW
4375324fb0dSmrg};
4385324fb0dSmrg
4395324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = {
4405324fb0dSmrg    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
4415324fb0dSmrg    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
4425324fb0dSmrg    0xC4001C0F, 0x00000100, 0xBF810000
4435324fb0dSmrg};
4445324fb0dSmrg
4455324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
4465324fb0dSmrg
4475324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
4485324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
4495324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
4505324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
4515324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
4525324fb0dSmrg     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
4535324fb0dSmrg     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
4545324fb0dSmrg     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
4555324fb0dSmrg     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
4565324fb0dSmrg     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
4575324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
4585324fb0dSmrg    }
4595324fb0dSmrg};
4605324fb0dSmrg
4615324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
4625324fb0dSmrg    0x00000004
4635324fb0dSmrg};
4645324fb0dSmrg
4655324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2;
4665324fb0dSmrg
4675324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = {
4685324fb0dSmrg    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
4695324fb0dSmrg    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
4705324fb0dSmrg};
4715324fb0dSmrg
4725324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7;
4735324fb0dSmrg
4745324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = {
4755324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
4765324fb0dSmrg    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
4775324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
4785324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
4795324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
4805324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
4815324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
4825324fb0dSmrg};
4835324fb0dSmrg
4840ed5401bSmrgstatic const uint32_t ps_const_shader_gfx10[] = {
4850ed5401bSmrg    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
4860ed5401bSmrg    0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000,
4870ed5401bSmrg    0xF8001C0F, 0x00000100, 0xBF810000
4880ed5401bSmrg};
4890ed5401bSmrg
4900ed5401bSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6;
4910ed5401bSmrg
4920ed5401bSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = {
4930ed5401bSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
4940ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 },
4950ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 },
4960ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 },
4970ed5401bSmrg     { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
4980ed5401bSmrg     { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 },
4990ed5401bSmrg     { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 },
5000ed5401bSmrg     { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 },
5010ed5401bSmrg     { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 },
5020ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 }
5030ed5401bSmrg    }
5040ed5401bSmrg};
5050ed5401bSmrg
5060ed5401bSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = {
5070ed5401bSmrg    0x00000004
5080ed5401bSmrg};
5090ed5401bSmrg
5100ed5401bSmrgstatic const uint32_t ps_num_sh_registers_gfx10 = 2;
5110ed5401bSmrg
5120ed5401bSmrgstatic const uint32_t ps_const_sh_registers_gfx10[][2] = {
5130ed5401bSmrg    {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 },
5140ed5401bSmrg    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
5150ed5401bSmrg};
5160ed5401bSmrg
5175324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = {
5185324fb0dSmrg    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
5195324fb0dSmrg    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
5205324fb0dSmrg    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
5215324fb0dSmrg    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
5225324fb0dSmrg    0x00000100, 0xBF810000
5235324fb0dSmrg};
5245324fb0dSmrg
5255324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
5265324fb0dSmrg    0x0000000B
5275324fb0dSmrg};
5285324fb0dSmrg
5295324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
5305324fb0dSmrg
5315324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
5325324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
5335324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
5345324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
5355324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
5365324fb0dSmrg     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
5375324fb0dSmrg     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
5385324fb0dSmrg     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
5395324fb0dSmrg     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
5405324fb0dSmrg     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
5415324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
5425324fb0dSmrg    }
5435324fb0dSmrg};
5445324fb0dSmrg
5455324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = {
5465324fb0dSmrg    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
5475324fb0dSmrg    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
5485324fb0dSmrg};
5495324fb0dSmrg
5505324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = {
5515324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
5525324fb0dSmrg    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
5535324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
5545324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
5555324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
5565324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
5575324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
5585324fb0dSmrg};
5595324fb0dSmrg
5600ed5401bSmrgstatic const uint32_t ps_tex_shader_gfx10[] = {
5610ed5401bSmrg    0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000,
5620ed5401bSmrg    0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A,
5630ed5401bSmrg    0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70,
5640ed5401bSmrg    0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000,
5650ed5401bSmrg    0xF8001C0F, 0x00000100, 0xBF810000
5660ed5401bSmrg};
5670ed5401bSmrg
5680ed5401bSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = {
5690ed5401bSmrg    0x0000000C
5700ed5401bSmrg};
5710ed5401bSmrg
5720ed5401bSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6;
5730ed5401bSmrg
5740ed5401bSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = {
5750ed5401bSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
5760ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 },
5770ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 },
5780ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 },
5790ed5401bSmrg     { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
5800ed5401bSmrg     { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 },
5810ed5401bSmrg     { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 },
5820ed5401bSmrg     { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
5830ed5401bSmrg     { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
5840ed5401bSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 }
5850ed5401bSmrg    }
5860ed5401bSmrg};
5870ed5401bSmrg
5885324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
5895324fb0dSmrg    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
5905324fb0dSmrg    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
5915324fb0dSmrg    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
5925324fb0dSmrg    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
5935324fb0dSmrg    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
5945324fb0dSmrg    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
5955324fb0dSmrg    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
5965324fb0dSmrg    0xC400020F, 0x05060403, 0xBF810000
5975324fb0dSmrg};
5985324fb0dSmrg
5990ed5401bSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx10[] = {
6000ed5401bSmrg    0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206,
6010ed5401bSmrg    0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200,
6020ed5401bSmrg    0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207,
6030ed5401bSmrg    0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001,
6040ed5401bSmrg    0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002,
6050ed5401bSmrg    0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209,
6060ed5401bSmrg    0xF80008CF, 0x05030100, 0xF800020F, 0x05060402,
6070ed5401bSmrg    0xBF810000
6080ed5401bSmrg};
6090ed5401bSmrg
6105324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = {
6115324fb0dSmrg	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
6125324fb0dSmrg	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
6135324fb0dSmrg	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
6149bd392adSmrg	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
6155324fb0dSmrg	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
6165324fb0dSmrg	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
6175324fb0dSmrg	0xc0026900, 0x292, 0x20, 0x60201b8,
6185324fb0dSmrg	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
6195324fb0dSmrg};
62000a23bdaSmrg
6210ed5401bSmrgstatic const uint32_t cached_cmd_gfx10[] = {
6220ed5401bSmrg	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
6230ed5401bSmrg	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
6240ed5401bSmrg	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
6250ed5401bSmrg	0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18,
6260ed5401bSmrg	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
6270ed5401bSmrg	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
6280ed5401bSmrg	0xc0026900, 0x292, 0x20, 0x6020000,
6290ed5401bSmrg	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
6300ed5401bSmrg};
6310ed5401bSmrg
6329bd392adSmrgunsigned int memcpy_ps_hang[] = {
6339bd392adSmrg        0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
6349bd392adSmrg        0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
6359bd392adSmrg        0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
6369bd392adSmrg        0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
6379bd392adSmrg        0xF800180F, 0x03020100, 0xBF810000
6389bd392adSmrg};
6399bd392adSmrg
6409bd392adSmrgstruct amdgpu_test_shader {
6419bd392adSmrg	uint32_t *shader;
6429bd392adSmrg	uint32_t header_length;
6439bd392adSmrg	uint32_t body_length;
6449bd392adSmrg	uint32_t foot_length;
6459bd392adSmrg};
6469bd392adSmrg
6479bd392adSmrgunsigned int memcpy_cs_hang_slow_ai_codes[] = {
6489bd392adSmrg    0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
6499bd392adSmrg    0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
6509bd392adSmrg};
6519bd392adSmrg
6529bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
6539bd392adSmrg        memcpy_cs_hang_slow_ai_codes,
6549bd392adSmrg        4,
6559bd392adSmrg        3,
6569bd392adSmrg        1
6579bd392adSmrg};
6589bd392adSmrg
6599bd392adSmrgunsigned int memcpy_cs_hang_slow_rv_codes[] = {
6609bd392adSmrg    0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
6619bd392adSmrg    0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
6629bd392adSmrg};
6639bd392adSmrg
6649bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
6659bd392adSmrg        memcpy_cs_hang_slow_rv_codes,
6669bd392adSmrg        4,
6679bd392adSmrg        3,
6689bd392adSmrg        1
6699bd392adSmrg};
6709bd392adSmrg
6710ed5401bSmrgunsigned int memcpy_cs_hang_slow_nv_codes[] = {
6720ed5401bSmrg    0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100,
6730ed5401bSmrg    0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000
6740ed5401bSmrg};
6750ed5401bSmrg
6760ed5401bSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_nv = {
6770ed5401bSmrg        memcpy_cs_hang_slow_nv_codes,
6780ed5401bSmrg        4,
6790ed5401bSmrg        3,
6800ed5401bSmrg        1
6810ed5401bSmrg};
6820ed5401bSmrg
6839bd392adSmrgunsigned int memcpy_ps_hang_slow_ai_codes[] = {
6849bd392adSmrg        0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
6859bd392adSmrg        0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
6869bd392adSmrg        0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
6879bd392adSmrg        0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
6889bd392adSmrg        0x03020100, 0xbf810000
6899bd392adSmrg};
6909bd392adSmrg
6919bd392adSmrgstruct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
6929bd392adSmrg        memcpy_ps_hang_slow_ai_codes,
6939bd392adSmrg        7,
6949bd392adSmrg        2,
6959bd392adSmrg        9
6969bd392adSmrg};
6979bd392adSmrg
6987cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
6997cdc0497Smrg			unsigned alignment, unsigned heap, uint64_t alloc_flags,
7007cdc0497Smrg			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
7017cdc0497Smrg			uint64_t *mc_address,
7027cdc0497Smrg			amdgpu_va_handle *va_handle)
7037cdc0497Smrg{
7047cdc0497Smrg	struct amdgpu_bo_alloc_request request = {};
7057cdc0497Smrg	amdgpu_bo_handle buf_handle;
7067cdc0497Smrg	amdgpu_va_handle handle;
7077cdc0497Smrg	uint64_t vmc_addr;
7087cdc0497Smrg	int r;
7097cdc0497Smrg
7107cdc0497Smrg	request.alloc_size = size;
7117cdc0497Smrg	request.phys_alignment = alignment;
7127cdc0497Smrg	request.preferred_heap = heap;
7137cdc0497Smrg	request.flags = alloc_flags;
7147cdc0497Smrg
7157cdc0497Smrg	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
7167cdc0497Smrg	if (r)
7177cdc0497Smrg		return r;
7187cdc0497Smrg
7197cdc0497Smrg	r = amdgpu_va_range_alloc(dev,
7207cdc0497Smrg				  amdgpu_gpu_va_range_general,
7217cdc0497Smrg				  size, alignment, 0, &vmc_addr,
7227cdc0497Smrg				  &handle, 0);
7237cdc0497Smrg	if (r)
7247cdc0497Smrg		goto error_va_alloc;
7257cdc0497Smrg
7267cdc0497Smrg	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
7277cdc0497Smrg				   AMDGPU_VM_PAGE_READABLE |
7287cdc0497Smrg				   AMDGPU_VM_PAGE_WRITEABLE |
7297cdc0497Smrg				   AMDGPU_VM_PAGE_EXECUTABLE |
7307cdc0497Smrg				   mapping_flags,
7317cdc0497Smrg				   AMDGPU_VA_OP_MAP);
7327cdc0497Smrg	if (r)
7337cdc0497Smrg		goto error_va_map;
7347cdc0497Smrg
7357cdc0497Smrg	r = amdgpu_bo_cpu_map(buf_handle, cpu);
7367cdc0497Smrg	if (r)
7377cdc0497Smrg		goto error_cpu_map;
7387cdc0497Smrg
7397cdc0497Smrg	*bo = buf_handle;
7407cdc0497Smrg	*mc_address = vmc_addr;
7417cdc0497Smrg	*va_handle = handle;
7427cdc0497Smrg
7437cdc0497Smrg	return 0;
7447cdc0497Smrg
7457cdc0497Smrg error_cpu_map:
7467cdc0497Smrg	amdgpu_bo_cpu_unmap(buf_handle);
7477cdc0497Smrg
7487cdc0497Smrg error_va_map:
7497cdc0497Smrg	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
7507cdc0497Smrg
7517cdc0497Smrg error_va_alloc:
7527cdc0497Smrg	amdgpu_bo_free(buf_handle);
7537cdc0497Smrg	return r;
7547cdc0497Smrg}
7557cdc0497Smrg
7567cdc0497Smrg
7577cdc0497Smrg
75841687f09SmrgCU_BOOL suite_basic_tests_enable(void)
75941687f09Smrg{
76041687f09Smrg
76141687f09Smrg	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
76241687f09Smrg					     &minor_version, &device_handle))
76341687f09Smrg		return CU_FALSE;
76441687f09Smrg
7654babd585Smrg
7664babd585Smrg	family_id = device_handle->info.family_id;
7674babd585Smrg	chip_id = device_handle->info.chip_external_rev;
7684babd585Smrg	chip_rev = device_handle->info.chip_rev;
76941687f09Smrg
77041687f09Smrg	if (amdgpu_device_deinitialize(device_handle))
77141687f09Smrg		return CU_FALSE;
77241687f09Smrg
7734babd585Smrg	/* disable gfx engine basic test cases for some asics have no CPG */
7744babd585Smrg	if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
77541687f09Smrg		if (amdgpu_set_test_active("Basic Tests",
77641687f09Smrg					"Command submission Test (GFX)",
77741687f09Smrg					CU_FALSE))
77841687f09Smrg			fprintf(stderr, "test deactivation failed - %s\n",
77941687f09Smrg				CU_get_error_msg());
78041687f09Smrg
78141687f09Smrg		if (amdgpu_set_test_active("Basic Tests",
78241687f09Smrg					"Command submission Test (Multi-Fence)",
78341687f09Smrg					CU_FALSE))
78441687f09Smrg			fprintf(stderr, "test deactivation failed - %s\n",
78541687f09Smrg				CU_get_error_msg());
78641687f09Smrg
78741687f09Smrg		if (amdgpu_set_test_active("Basic Tests",
78841687f09Smrg					"Sync dependency Test",
78941687f09Smrg					CU_FALSE))
79041687f09Smrg			fprintf(stderr, "test deactivation failed - %s\n",
79141687f09Smrg				CU_get_error_msg());
79241687f09Smrg	}
79341687f09Smrg
79441687f09Smrg	return CU_TRUE;
79541687f09Smrg}
79641687f09Smrg
7973f012e29Smrgint suite_basic_tests_init(void)
7983f012e29Smrg{
799d8807b2fSmrg	struct amdgpu_gpu_info gpu_info = {0};
8003f012e29Smrg	int r;
8013f012e29Smrg
8023f012e29Smrg	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
8033f012e29Smrg				   &minor_version, &device_handle);
8043f012e29Smrg
805d8807b2fSmrg	if (r) {
806037b3c26Smrg		if ((r == -EACCES) && (errno == EACCES))
807037b3c26Smrg			printf("\n\nError:%s. "
808037b3c26Smrg				"Hint:Try to run this test program as root.",
809037b3c26Smrg				strerror(errno));
8103f012e29Smrg		return CUE_SINIT_FAILED;
811037b3c26Smrg	}
812d8807b2fSmrg
813d8807b2fSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
814d8807b2fSmrg	if (r)
815d8807b2fSmrg		return CUE_SINIT_FAILED;
816d8807b2fSmrg
817d8807b2fSmrg	family_id = gpu_info.family_id;
818d8807b2fSmrg
819d8807b2fSmrg	return CUE_SUCCESS;
8203f012e29Smrg}
8213f012e29Smrg
8223f012e29Smrgint suite_basic_tests_clean(void)
8233f012e29Smrg{
8243f012e29Smrg	int r = amdgpu_device_deinitialize(device_handle);
8253f012e29Smrg
8263f012e29Smrg	if (r == 0)
8273f012e29Smrg		return CUE_SUCCESS;
8283f012e29Smrg	else
8293f012e29Smrg		return CUE_SCLEAN_FAILED;
8303f012e29Smrg}
8313f012e29Smrg
8323f012e29Smrgstatic void amdgpu_query_info_test(void)
8333f012e29Smrg{
8343f012e29Smrg	struct amdgpu_gpu_info gpu_info = {0};
8353f012e29Smrg	uint32_t version, feature;
8363f012e29Smrg	int r;
8373f012e29Smrg
8383f012e29Smrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
8393f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8403f012e29Smrg
8413f012e29Smrg	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
8423f012e29Smrg					  0, &version, &feature);
8433f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8443f012e29Smrg}
8453f012e29Smrg
8463f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void)
8473f012e29Smrg{
8483f012e29Smrg	amdgpu_context_handle context_handle;
8493f012e29Smrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
8503f012e29Smrg	void *ib_result_cpu, *ib_result_ce_cpu;
8513f012e29Smrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
8523f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
8533f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
8543f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
8553f012e29Smrg	uint32_t *ptr;
8563f012e29Smrg	uint32_t expired;
8573f012e29Smrg	amdgpu_bo_list_handle bo_list;
8583f012e29Smrg	amdgpu_va_handle va_handle, va_handle_ce;
859d8807b2fSmrg	int r, i = 0;
860b0ab5608Smrg	struct drm_amdgpu_info_hw_ip info;
861b0ab5608Smrg
862b0ab5608Smrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
863b0ab5608Smrg	CU_ASSERT_EQUAL(r, 0);
864b0ab5608Smrg
865b0ab5608Smrg	if (info.hw_ip_version_major >= 11)
866b0ab5608Smrg		return;
8673f012e29Smrg
8683f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
8693f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8703f012e29Smrg
8713f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
8723f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
8733f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
8743f012e29Smrg				    &ib_result_mc_address, &va_handle);
8753f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8763f012e29Smrg
8773f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
8783f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
8793f012e29Smrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
8803f012e29Smrg				    &ib_result_ce_mc_address, &va_handle_ce);
8813f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8823f012e29Smrg
8833f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
8843f012e29Smrg			       ib_result_ce_handle, &bo_list);
8853f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8863f012e29Smrg
8873f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
8883f012e29Smrg
8893f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
8903f012e29Smrg	ptr = ib_result_ce_cpu;
891d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
892d8807b2fSmrg		ptr[i++] = 0xc0008900;
893d8807b2fSmrg		ptr[i++] = 0;
894d8807b2fSmrg	}
895d8807b2fSmrg	ptr[i++] = 0xc0008400;
896d8807b2fSmrg	ptr[i++] = 1;
8973f012e29Smrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
898d8807b2fSmrg	ib_info[0].size = i;
8993f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
9003f012e29Smrg
9013f012e29Smrg	/* IT_WAIT_ON_CE_COUNTER */
9023f012e29Smrg	ptr = ib_result_cpu;
9033f012e29Smrg	ptr[0] = 0xc0008600;
9043f012e29Smrg	ptr[1] = 0x00000001;
9053f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address;
9063f012e29Smrg	ib_info[1].size = 2;
9073f012e29Smrg
9083f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
9093f012e29Smrg	ibs_request.number_of_ibs = 2;
9103f012e29Smrg	ibs_request.ibs = ib_info;
9113f012e29Smrg	ibs_request.resources = bo_list;
9123f012e29Smrg	ibs_request.fence_info.handle = NULL;
9133f012e29Smrg
9143f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
9153f012e29Smrg
9163f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9173f012e29Smrg
9183f012e29Smrg	fence_status.context = context_handle;
9193f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
9203f012e29Smrg	fence_status.ip_instance = 0;
9213f012e29Smrg	fence_status.fence = ibs_request.seq_no;
9223f012e29Smrg
9233f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
9243f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
9253f012e29Smrg					 0, &expired);
9263f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9273f012e29Smrg
9283f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
9293f012e29Smrg				     ib_result_mc_address, 4096);
9303f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9313f012e29Smrg
9323f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
9333f012e29Smrg				     ib_result_ce_mc_address, 4096);
9343f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9353f012e29Smrg
9363f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
9373f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9383f012e29Smrg
9393f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
9403f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9413f012e29Smrg
9423f012e29Smrg}
9433f012e29Smrg
9443f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void)
9453f012e29Smrg{
9463f012e29Smrg	amdgpu_context_handle context_handle;
9473f012e29Smrg	amdgpu_bo_handle ib_result_handle;
9483f012e29Smrg	void *ib_result_cpu;
9493f012e29Smrg	uint64_t ib_result_mc_address;
9503f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
9513f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
9523f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
9533f012e29Smrg	uint32_t *ptr;
9543f012e29Smrg	uint32_t expired;
9553f012e29Smrg	amdgpu_bo_list_handle bo_list;
9563f012e29Smrg	amdgpu_va_handle va_handle;
957d8807b2fSmrg	int r, i = 0;
958b0ab5608Smrg	struct drm_amdgpu_info_hw_ip info;
959b0ab5608Smrg
960b0ab5608Smrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
961b0ab5608Smrg	CU_ASSERT_EQUAL(r, 0);
962b0ab5608Smrg
963b0ab5608Smrg	if (info.hw_ip_version_major >= 11)
964b0ab5608Smrg		return;
9653f012e29Smrg
9663f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
9673f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9683f012e29Smrg
9693f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
9703f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
9713f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
9723f012e29Smrg				    &ib_result_mc_address, &va_handle);
9733f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9743f012e29Smrg
9753f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
9763f012e29Smrg			       &bo_list);
9773f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9783f012e29Smrg
9793f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
9803f012e29Smrg
9813f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
9823f012e29Smrg	ptr = ib_result_cpu;
983d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
984d8807b2fSmrg		ptr[i++] = 0xc0008900;
985d8807b2fSmrg		ptr[i++] = 0;
986d8807b2fSmrg	}
987d8807b2fSmrg	ptr[i++] = 0xc0008400;
988d8807b2fSmrg	ptr[i++] = 1;
9893f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address;
990d8807b2fSmrg	ib_info[0].size = i;
9913f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
9923f012e29Smrg
9933f012e29Smrg	ptr = (uint32_t *)ib_result_cpu + 4;
9943f012e29Smrg	ptr[0] = 0xc0008600;
9953f012e29Smrg	ptr[1] = 0x00000001;
9963f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
9973f012e29Smrg	ib_info[1].size = 2;
9983f012e29Smrg
9993f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
10003f012e29Smrg	ibs_request.number_of_ibs = 2;
10013f012e29Smrg	ibs_request.ibs = ib_info;
10023f012e29Smrg	ibs_request.resources = bo_list;
10033f012e29Smrg	ibs_request.fence_info.handle = NULL;
10043f012e29Smrg
10053f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
10063f012e29Smrg
10073f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10083f012e29Smrg
10093f012e29Smrg	fence_status.context = context_handle;
10103f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
10113f012e29Smrg	fence_status.ip_instance = 0;
10123f012e29Smrg	fence_status.fence = ibs_request.seq_no;
10133f012e29Smrg
10143f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
10153f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
10163f012e29Smrg					 0, &expired);
10173f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10183f012e29Smrg
10193f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
10203f012e29Smrg				     ib_result_mc_address, 4096);
10213f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10223f012e29Smrg
10233f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
10243f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10253f012e29Smrg
10263f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
10273f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10283f012e29Smrg}
10293f012e29Smrg
10303f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void)
10313f012e29Smrg{
10323f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
10333f012e29Smrg}
10343f012e29Smrg
10353f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void)
10363f012e29Smrg{
10373f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
10383f012e29Smrg}
10393f012e29Smrg
10403f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void)
10413f012e29Smrg{
10423f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
10433f012e29Smrg}
10443f012e29Smrg
104500a23bdaSmrgstatic void amdgpu_bo_eviction_test(void)
104600a23bdaSmrg{
104700a23bdaSmrg	const int sdma_write_length = 1024;
104800a23bdaSmrg	const int pm4_dw = 256;
104900a23bdaSmrg	amdgpu_context_handle context_handle;
105000a23bdaSmrg	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
105100a23bdaSmrg	amdgpu_bo_handle *resources;
105200a23bdaSmrg	uint32_t *pm4;
105300a23bdaSmrg	struct amdgpu_cs_ib_info *ib_info;
105400a23bdaSmrg	struct amdgpu_cs_request *ibs_request;
105500a23bdaSmrg	uint64_t bo1_mc, bo2_mc;
105600a23bdaSmrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
105700a23bdaSmrg	int i, j, r, loop1, loop2;
105800a23bdaSmrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
105900a23bdaSmrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
106000a23bdaSmrg	struct amdgpu_heap_info vram_info, gtt_info;
106100a23bdaSmrg
106200a23bdaSmrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
106300a23bdaSmrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
106400a23bdaSmrg
106500a23bdaSmrg	ib_info = calloc(1, sizeof(*ib_info));
106600a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
106700a23bdaSmrg
106800a23bdaSmrg	ibs_request = calloc(1, sizeof(*ibs_request));
106900a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
107000a23bdaSmrg
107100a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
107200a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
107300a23bdaSmrg
107400a23bdaSmrg	/* prepare resource */
107500a23bdaSmrg	resources = calloc(4, sizeof(amdgpu_bo_handle));
107600a23bdaSmrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
107700a23bdaSmrg
107800a23bdaSmrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
107900a23bdaSmrg				   0, &vram_info);
108000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
108100a23bdaSmrg
108200a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
108300a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
108400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
108500a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
108600a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
108700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
108800a23bdaSmrg
10894babd585Smrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
10904babd585Smrg				   0, &gtt_info);
10914babd585Smrg	CU_ASSERT_EQUAL(r, 0);
10924babd585Smrg
109300a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
109400a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
109500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
109600a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
109700a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
109800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
109900a23bdaSmrg
110000a23bdaSmrg
110100a23bdaSmrg
110200a23bdaSmrg	loop1 = loop2 = 0;
110300a23bdaSmrg	/* run 9 circle to test all mapping combination */
110400a23bdaSmrg	while(loop1 < 2) {
110500a23bdaSmrg		while(loop2 < 2) {
110600a23bdaSmrg			/* allocate UC bo1for sDMA use */
110700a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
110800a23bdaSmrg						    sdma_write_length, 4096,
110900a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
111000a23bdaSmrg						    gtt_flags[loop1], &bo1,
111100a23bdaSmrg						    (void**)&bo1_cpu, &bo1_mc,
111200a23bdaSmrg						    &bo1_va_handle);
111300a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
111400a23bdaSmrg
111500a23bdaSmrg			/* set bo1 */
111600a23bdaSmrg			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
111700a23bdaSmrg
111800a23bdaSmrg			/* allocate UC bo2 for sDMA use */
111900a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
112000a23bdaSmrg						    sdma_write_length, 4096,
112100a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
112200a23bdaSmrg						    gtt_flags[loop2], &bo2,
112300a23bdaSmrg						    (void**)&bo2_cpu, &bo2_mc,
112400a23bdaSmrg						    &bo2_va_handle);
112500a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
112600a23bdaSmrg
112700a23bdaSmrg			/* clear bo2 */
112800a23bdaSmrg			memset((void*)bo2_cpu, 0, sdma_write_length);
112900a23bdaSmrg
113000a23bdaSmrg			resources[0] = bo1;
113100a23bdaSmrg			resources[1] = bo2;
113200a23bdaSmrg			resources[2] = vram_max[loop2];
113300a23bdaSmrg			resources[3] = gtt_max[loop2];
113400a23bdaSmrg
113500a23bdaSmrg			/* fulfill PM4: test DMA copy linear */
113600a23bdaSmrg			i = j = 0;
113700a23bdaSmrg			if (family_id == AMDGPU_FAMILY_SI) {
113800a23bdaSmrg				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
113900a23bdaSmrg							  sdma_write_length);
114000a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
114100a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
114200a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
114300a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
114400a23bdaSmrg			} else {
114500a23bdaSmrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
114600a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
114700a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
114800a23bdaSmrg				else
114900a23bdaSmrg					pm4[i++] = sdma_write_length;
115000a23bdaSmrg				pm4[i++] = 0;
115100a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
115200a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
115300a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
115400a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
115500a23bdaSmrg			}
115600a23bdaSmrg
115700a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
115800a23bdaSmrg						   AMDGPU_HW_IP_DMA, 0,
115900a23bdaSmrg						   i, pm4,
116000a23bdaSmrg						   4, resources,
116100a23bdaSmrg						   ib_info, ibs_request);
116200a23bdaSmrg
116300a23bdaSmrg			/* verify if SDMA test result meets with expected */
116400a23bdaSmrg			i = 0;
116500a23bdaSmrg			while(i < sdma_write_length) {
116600a23bdaSmrg				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
116700a23bdaSmrg			}
116800a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
116900a23bdaSmrg						     sdma_write_length);
117000a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
117100a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
117200a23bdaSmrg						     sdma_write_length);
117300a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
117400a23bdaSmrg			loop2++;
117500a23bdaSmrg		}
117600a23bdaSmrg		loop2 = 0;
117700a23bdaSmrg		loop1++;
117800a23bdaSmrg	}
117900a23bdaSmrg	amdgpu_bo_free(vram_max[0]);
118000a23bdaSmrg	amdgpu_bo_free(vram_max[1]);
118100a23bdaSmrg	amdgpu_bo_free(gtt_max[0]);
118200a23bdaSmrg	amdgpu_bo_free(gtt_max[1]);
118300a23bdaSmrg	/* clean resources */
118400a23bdaSmrg	free(resources);
118500a23bdaSmrg	free(ibs_request);
118600a23bdaSmrg	free(ib_info);
118700a23bdaSmrg	free(pm4);
118800a23bdaSmrg
118900a23bdaSmrg	/* end of test */
119000a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle);
119100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
119200a23bdaSmrg}
119300a23bdaSmrg
119400a23bdaSmrg
11953f012e29Smrgstatic void amdgpu_command_submission_gfx(void)
11963f012e29Smrg{
11973f012e29Smrg	/* write data using the CP */
11983f012e29Smrg	amdgpu_command_submission_gfx_cp_write_data();
11993f012e29Smrg	/* const fill using the CP */
12003f012e29Smrg	amdgpu_command_submission_gfx_cp_const_fill();
12013f012e29Smrg	/* copy data using the CP */
12023f012e29Smrg	amdgpu_command_submission_gfx_cp_copy_data();
12033f012e29Smrg	/* separate IB buffers for multi-IB submission */
12043f012e29Smrg	amdgpu_command_submission_gfx_separate_ibs();
12053f012e29Smrg	/* shared IB buffer for multi-IB submission */
12063f012e29Smrg	amdgpu_command_submission_gfx_shared_ib();
12073f012e29Smrg}
12083f012e29Smrg
12093f012e29Smrgstatic void amdgpu_semaphore_test(void)
12103f012e29Smrg{
12113f012e29Smrg	amdgpu_context_handle context_handle[2];
12123f012e29Smrg	amdgpu_semaphore_handle sem;
12133f012e29Smrg	amdgpu_bo_handle ib_result_handle[2];
12143f012e29Smrg	void *ib_result_cpu[2];
12153f012e29Smrg	uint64_t ib_result_mc_address[2];
12163f012e29Smrg	struct amdgpu_cs_request ibs_request[2] = {0};
12173f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2] = {0};
12183f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
12193f012e29Smrg	uint32_t *ptr;
12203f012e29Smrg	uint32_t expired;
1221d8807b2fSmrg	uint32_t sdma_nop, gfx_nop;
12223f012e29Smrg	amdgpu_bo_list_handle bo_list[2];
12233f012e29Smrg	amdgpu_va_handle va_handle[2];
12243f012e29Smrg	int r, i;
12254babd585Smrg	struct amdgpu_gpu_info gpu_info = {0};
12264babd585Smrg	unsigned gc_ip_type;
12274babd585Smrg
12284babd585Smrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
12294babd585Smrg	CU_ASSERT_EQUAL(r, 0);
12304babd585Smrg
12314babd585Smrg	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
12324babd585Smrg			AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
12333f012e29Smrg
1234d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI) {
1235d8807b2fSmrg		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1236d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP_SI;
1237d8807b2fSmrg	} else {
1238d8807b2fSmrg		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1239d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP;
1240d8807b2fSmrg	}
1241d8807b2fSmrg
12423f012e29Smrg	r = amdgpu_cs_create_semaphore(&sem);
12433f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12443f012e29Smrg	for (i = 0; i < 2; i++) {
12453f012e29Smrg		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
12463f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12473f012e29Smrg
12483f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
12493f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
12503f012e29Smrg					    &ib_result_handle[i], &ib_result_cpu[i],
12513f012e29Smrg					    &ib_result_mc_address[i], &va_handle[i]);
12523f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12533f012e29Smrg
12543f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
12553f012e29Smrg				       NULL, &bo_list[i]);
12563f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12573f012e29Smrg	}
12583f012e29Smrg
12593f012e29Smrg	/* 1. same context different engine */
12603f012e29Smrg	ptr = ib_result_cpu[0];
1261d8807b2fSmrg	ptr[0] = sdma_nop;
12623f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
12633f012e29Smrg	ib_info[0].size = 1;
12643f012e29Smrg
12653f012e29Smrg	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
12663f012e29Smrg	ibs_request[0].number_of_ibs = 1;
12673f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
12683f012e29Smrg	ibs_request[0].resources = bo_list[0];
12693f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
12703f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
12713f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12723f012e29Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
12733f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12743f012e29Smrg
12754babd585Smrg	r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
12763f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12773f012e29Smrg	ptr = ib_result_cpu[1];
1278d8807b2fSmrg	ptr[0] = gfx_nop;
12793f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
12803f012e29Smrg	ib_info[1].size = 1;
12813f012e29Smrg
12824babd585Smrg	ibs_request[1].ip_type = gc_ip_type;
12833f012e29Smrg	ibs_request[1].number_of_ibs = 1;
12843f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
12853f012e29Smrg	ibs_request[1].resources = bo_list[1];
12863f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
12873f012e29Smrg
12883f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
12893f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12903f012e29Smrg
12913f012e29Smrg	fence_status.context = context_handle[0];
12924babd585Smrg	fence_status.ip_type = gc_ip_type;
12933f012e29Smrg	fence_status.ip_instance = 0;
12943f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
12953f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
1296bbff01ceSmrg					 AMDGPU_TIMEOUT_INFINITE, 0, &expired);
12973f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12983f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
12993f012e29Smrg
13003f012e29Smrg	/* 2. same engine different context */
13013f012e29Smrg	ptr = ib_result_cpu[0];
1302d8807b2fSmrg	ptr[0] = gfx_nop;
13033f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
13043f012e29Smrg	ib_info[0].size = 1;
13053f012e29Smrg
13064babd585Smrg	ibs_request[0].ip_type = gc_ip_type;
13073f012e29Smrg	ibs_request[0].number_of_ibs = 1;
13083f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
13093f012e29Smrg	ibs_request[0].resources = bo_list[0];
13103f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
13113f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
13123f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13134babd585Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
13143f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13153f012e29Smrg
13164babd585Smrg	r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
13173f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13183f012e29Smrg	ptr = ib_result_cpu[1];
1319d8807b2fSmrg	ptr[0] = gfx_nop;
13203f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
13213f012e29Smrg	ib_info[1].size = 1;
13223f012e29Smrg
13234babd585Smrg	ibs_request[1].ip_type = gc_ip_type;
13243f012e29Smrg	ibs_request[1].number_of_ibs = 1;
13253f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
13263f012e29Smrg	ibs_request[1].resources = bo_list[1];
13273f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
13283f012e29Smrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
13293f012e29Smrg
13303f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13313f012e29Smrg
13323f012e29Smrg	fence_status.context = context_handle[1];
13334babd585Smrg	fence_status.ip_type = gc_ip_type;
13343f012e29Smrg	fence_status.ip_instance = 0;
13353f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
13363f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
1337bbff01ceSmrg					 AMDGPU_TIMEOUT_INFINITE, 0, &expired);
13383f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13393f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
1340d8807b2fSmrg
13413f012e29Smrg	for (i = 0; i < 2; i++) {
13423f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
13433f012e29Smrg					     ib_result_mc_address[i], 4096);
13443f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
13453f012e29Smrg
13463f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list[i]);
13473f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
13483f012e29Smrg
13493f012e29Smrg		r = amdgpu_cs_ctx_free(context_handle[i]);
13503f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
13513f012e29Smrg	}
13523f012e29Smrg
13533f012e29Smrg	r = amdgpu_cs_destroy_semaphore(sem);
13543f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13553f012e29Smrg}
13563f012e29Smrg
13573f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void)
13583f012e29Smrg{
13593f012e29Smrg	amdgpu_context_handle context_handle;
13603f012e29Smrg	amdgpu_bo_handle ib_result_handle;
13613f012e29Smrg	void *ib_result_cpu;
13623f012e29Smrg	uint64_t ib_result_mc_address;
13633f012e29Smrg	struct amdgpu_cs_request ibs_request;
13643f012e29Smrg	struct amdgpu_cs_ib_info ib_info;
13653f012e29Smrg	struct amdgpu_cs_fence fence_status;
13663f012e29Smrg	uint32_t *ptr;
13673f012e29Smrg	uint32_t expired;
136800a23bdaSmrg	int r, instance;
13693f012e29Smrg	amdgpu_bo_list_handle bo_list;
13703f012e29Smrg	amdgpu_va_handle va_handle;
1371d8807b2fSmrg	struct drm_amdgpu_info_hw_ip info;
1372d8807b2fSmrg
1373d8807b2fSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1374d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
13753f012e29Smrg
13763f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
13773f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13783f012e29Smrg
1379d8807b2fSmrg	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
13803f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
13813f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
13823f012e29Smrg					    &ib_result_handle, &ib_result_cpu,
13833f012e29Smrg					    &ib_result_mc_address, &va_handle);
13843f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
13853f012e29Smrg
13863f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
13873f012e29Smrg				       &bo_list);
13883f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
13893f012e29Smrg
13903f012e29Smrg		ptr = ib_result_cpu;
1391d8807b2fSmrg		memset(ptr, 0, 16);
1392d8807b2fSmrg		ptr[0]=PACKET3(PACKET3_NOP, 14);
13933f012e29Smrg
13943f012e29Smrg		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
13953f012e29Smrg		ib_info.ib_mc_address = ib_result_mc_address;
13963f012e29Smrg		ib_info.size = 16;
13973f012e29Smrg
13983f012e29Smrg		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
13993f012e29Smrg		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
14003f012e29Smrg		ibs_request.ring = instance;
14013f012e29Smrg		ibs_request.number_of_ibs = 1;
14023f012e29Smrg		ibs_request.ibs = &ib_info;
14033f012e29Smrg		ibs_request.resources = bo_list;
14043f012e29Smrg		ibs_request.fence_info.handle = NULL;
14053f012e29Smrg
14063f012e29Smrg		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
14073f012e29Smrg		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
14083f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
14093f012e29Smrg
14103f012e29Smrg		fence_status.context = context_handle;
14113f012e29Smrg		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
14123f012e29Smrg		fence_status.ip_instance = 0;
14133f012e29Smrg		fence_status.ring = instance;
14143f012e29Smrg		fence_status.fence = ibs_request.seq_no;
14153f012e29Smrg
14163f012e29Smrg		r = amdgpu_cs_query_fence_status(&fence_status,
14173f012e29Smrg						 AMDGPU_TIMEOUT_INFINITE,
14183f012e29Smrg						 0, &expired);
14193f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
14203f012e29Smrg
14213f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list);
14223f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
14233f012e29Smrg
14243f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
14253f012e29Smrg					     ib_result_mc_address, 4096);
14263f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
14273f012e29Smrg	}
14283f012e29Smrg
14293f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
14303f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
14313f012e29Smrg}
14323f012e29Smrg
14333f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void)
14343f012e29Smrg{
14353f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
14363f012e29Smrg}
14373f012e29Smrg
14383f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void)
14393f012e29Smrg{
14403f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
14413f012e29Smrg}
14423f012e29Smrg
14433f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void)
14443f012e29Smrg{
14453f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
14463f012e29Smrg}
14473f012e29Smrg
14483f012e29Smrgstatic void amdgpu_command_submission_compute(void)
14493f012e29Smrg{
14503f012e29Smrg	/* write data using the CP */
14513f012e29Smrg	amdgpu_command_submission_compute_cp_write_data();
14523f012e29Smrg	/* const fill using the CP */
14533f012e29Smrg	amdgpu_command_submission_compute_cp_const_fill();
14543f012e29Smrg	/* copy data using the CP */
14553f012e29Smrg	amdgpu_command_submission_compute_cp_copy_data();
14563f012e29Smrg	/* nop test */
14573f012e29Smrg	amdgpu_command_submission_compute_nop();
14583f012e29Smrg}
14593f012e29Smrg
14603f012e29Smrg/*
14613f012e29Smrg * caller need create/release:
14623f012e29Smrg * pm4_src, resources, ib_info, and ibs_request
14633f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished
14643f012e29Smrg */
146541687f09Smrgvoid
146641687f09Smrgamdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
146741687f09Smrg			       amdgpu_context_handle context_handle,
146841687f09Smrg			       unsigned ip_type, int instance, int pm4_dw,
146941687f09Smrg			       uint32_t *pm4_src, int res_cnt,
147041687f09Smrg			       amdgpu_bo_handle *resources,
147141687f09Smrg			       struct amdgpu_cs_ib_info *ib_info,
147241687f09Smrg			       struct amdgpu_cs_request *ibs_request,
147341687f09Smrg			       bool secure)
14743f012e29Smrg{
14753f012e29Smrg	int r;
14763f012e29Smrg	uint32_t expired;
14773f012e29Smrg	uint32_t *ring_ptr;
14783f012e29Smrg	amdgpu_bo_handle ib_result_handle;
14793f012e29Smrg	void *ib_result_cpu;
14803f012e29Smrg	uint64_t ib_result_mc_address;
14813f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
14823f012e29Smrg	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
14833f012e29Smrg	amdgpu_va_handle va_handle;
14843f012e29Smrg
14853f012e29Smrg	/* prepare CS */
14863f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
14873f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
14883f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
14893f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
14903f012e29Smrg	CU_ASSERT_TRUE(pm4_dw <= 1024);
14913f012e29Smrg
14923f012e29Smrg	/* allocate IB */
14933f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
14943f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
14953f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
14963f012e29Smrg				    &ib_result_mc_address, &va_handle);
14973f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
14983f012e29Smrg
14993f012e29Smrg	/* copy PM4 packet to ring from caller */
15003f012e29Smrg	ring_ptr = ib_result_cpu;
15013f012e29Smrg	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
15023f012e29Smrg
15033f012e29Smrg	ib_info->ib_mc_address = ib_result_mc_address;
15043f012e29Smrg	ib_info->size = pm4_dw;
150541687f09Smrg	if (secure)
150641687f09Smrg		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
15073f012e29Smrg
15083f012e29Smrg	ibs_request->ip_type = ip_type;
15093f012e29Smrg	ibs_request->ring = instance;
15103f012e29Smrg	ibs_request->number_of_ibs = 1;
15113f012e29Smrg	ibs_request->ibs = ib_info;
15123f012e29Smrg	ibs_request->fence_info.handle = NULL;
15133f012e29Smrg
15143f012e29Smrg	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
15153f012e29Smrg	all_res[res_cnt] = ib_result_handle;
15163f012e29Smrg
15173f012e29Smrg	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
15183f012e29Smrg				  NULL, &ibs_request->resources);
15193f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15203f012e29Smrg
15213f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
15223f012e29Smrg
15233f012e29Smrg	/* submit CS */
15243f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
15253f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15263f012e29Smrg
15273f012e29Smrg	r = amdgpu_bo_list_destroy(ibs_request->resources);
15283f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15293f012e29Smrg
15303f012e29Smrg	fence_status.ip_type = ip_type;
15313f012e29Smrg	fence_status.ip_instance = 0;
15323f012e29Smrg	fence_status.ring = ibs_request->ring;
15333f012e29Smrg	fence_status.context = context_handle;
15343f012e29Smrg	fence_status.fence = ibs_request->seq_no;
15353f012e29Smrg
15363f012e29Smrg	/* wait for IB accomplished */
15373f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
15383f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
15393f012e29Smrg					 0, &expired);
15403f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15413f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
15423f012e29Smrg
15433f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
15443f012e29Smrg				     ib_result_mc_address, 4096);
15453f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15463f012e29Smrg}
15473f012e29Smrg
154841687f09Smrgstatic void
154941687f09Smrgamdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
155041687f09Smrg			   unsigned ip_type, int instance, int pm4_dw,
155141687f09Smrg			   uint32_t *pm4_src, int res_cnt,
155241687f09Smrg			   amdgpu_bo_handle *resources,
155341687f09Smrg			   struct amdgpu_cs_ib_info *ib_info,
155441687f09Smrg			   struct amdgpu_cs_request *ibs_request)
155541687f09Smrg{
155641687f09Smrg	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
155741687f09Smrg				       ip_type, instance, pm4_dw, pm4_src,
155841687f09Smrg				       res_cnt, resources, ib_info,
155941687f09Smrg				       ibs_request, false);
156041687f09Smrg}
156141687f09Smrg
156241687f09Smrgvoid
156341687f09Smrgamdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
156441687f09Smrg							  device, unsigned
156541687f09Smrg							  ip_type, bool secure)
15663f012e29Smrg{
15673f012e29Smrg	const int sdma_write_length = 128;
15683f012e29Smrg	const int pm4_dw = 256;
15693f012e29Smrg	amdgpu_context_handle context_handle;
15703f012e29Smrg	amdgpu_bo_handle bo;
15713f012e29Smrg	amdgpu_bo_handle *resources;
15723f012e29Smrg	uint32_t *pm4;
15733f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
15743f012e29Smrg	struct amdgpu_cs_request *ibs_request;
15753f012e29Smrg	uint64_t bo_mc;
15763f012e29Smrg	volatile uint32_t *bo_cpu;
157741687f09Smrg	uint32_t bo_cpu_origin;
157800a23bdaSmrg	int i, j, r, loop, ring_id;
15793f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
15803f012e29Smrg	amdgpu_va_handle va_handle;
158100a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
15823f012e29Smrg
15833f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
15843f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
15853f012e29Smrg
15863f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
15873f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
15883f012e29Smrg
15893f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
15903f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
15913f012e29Smrg
159241687f09Smrg	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
159300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
159400a23bdaSmrg
159541687f09Smrg	for (i = 0; secure && (i < 2); i++)
159641687f09Smrg		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
159741687f09Smrg
159841687f09Smrg	r = amdgpu_cs_ctx_create(device, &context_handle);
159941687f09Smrg
16003f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
16013f012e29Smrg
16023f012e29Smrg	/* prepare resource */
16033f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
16043f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
16053f012e29Smrg
160600a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
160700a23bdaSmrg		loop = 0;
160800a23bdaSmrg		while(loop < 2) {
160900a23bdaSmrg			/* allocate UC bo for sDMA use */
161041687f09Smrg			r = amdgpu_bo_alloc_and_map(device,
161100a23bdaSmrg						    sdma_write_length * sizeof(uint32_t),
161200a23bdaSmrg						    4096, AMDGPU_GEM_DOMAIN_GTT,
161300a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
161400a23bdaSmrg						    &bo_mc, &va_handle);
161500a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
16163f012e29Smrg
161700a23bdaSmrg			/* clear bo */
161800a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
16193f012e29Smrg
162000a23bdaSmrg			resources[0] = bo;
16213f012e29Smrg
162200a23bdaSmrg			/* fulfill PM4: test DMA write-linear */
162300a23bdaSmrg			i = j = 0;
162400a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
162500a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI)
162600a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
162700a23bdaSmrg								  sdma_write_length);
162800a23bdaSmrg				else
162900a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
163041687f09Smrg							       SDMA_WRITE_SUB_OPCODE_LINEAR,
163141687f09Smrg							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
163241687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
163300a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
163400a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
163500a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
163600a23bdaSmrg				else if (family_id != AMDGPU_FAMILY_SI)
163700a23bdaSmrg					pm4[i++] = sdma_write_length;
163800a23bdaSmrg				while(j++ < sdma_write_length)
163900a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
164000a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
164100a23bdaSmrg				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
164200a23bdaSmrg				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
164300a23bdaSmrg				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
164400a23bdaSmrg				pm4[i++] = 0xfffffffc & bo_mc;
164500a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
164600a23bdaSmrg				while(j++ < sdma_write_length)
164700a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
164800a23bdaSmrg			}
16493f012e29Smrg
165041687f09Smrg			amdgpu_test_exec_cs_helper_raw(device, context_handle,
165141687f09Smrg						       ip_type, ring_id, i, pm4,
165241687f09Smrg						       1, resources, ib_info,
165341687f09Smrg						       ibs_request, secure);
16543f012e29Smrg
165500a23bdaSmrg			/* verify if SDMA test result meets with expected */
165600a23bdaSmrg			i = 0;
165741687f09Smrg			if (!secure) {
165841687f09Smrg				while(i < sdma_write_length) {
165941687f09Smrg					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
166041687f09Smrg				}
166141687f09Smrg			} else if (ip_type == AMDGPU_HW_IP_GFX) {
166241687f09Smrg				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
166341687f09Smrg				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
166441687f09Smrg				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
166541687f09Smrg				 * command, 1-loop_until_compare_satisfied.
166641687f09Smrg				 * single_pass_atomic, 0-lru
166741687f09Smrg				 * engine_sel, 0-micro_engine
166841687f09Smrg				 */
166941687f09Smrg				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
167041687f09Smrg							ATOMIC_MEM_COMMAND(1) |
167141687f09Smrg							ATOMIC_MEM_CACHEPOLICAY(0) |
167241687f09Smrg							ATOMIC_MEM_ENGINESEL(0));
167341687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
167441687f09Smrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
167541687f09Smrg				pm4[i++] = 0x12345678;
167641687f09Smrg				pm4[i++] = 0x0;
167741687f09Smrg				pm4[i++] = 0xdeadbeaf;
167841687f09Smrg				pm4[i++] = 0x0;
167941687f09Smrg				pm4[i++] = 0x100;
168041687f09Smrg				amdgpu_test_exec_cs_helper_raw(device, context_handle,
168141687f09Smrg							ip_type, ring_id, i, pm4,
168241687f09Smrg							1, resources, ib_info,
168341687f09Smrg							ibs_request, true);
168441687f09Smrg			} else if (ip_type == AMDGPU_HW_IP_DMA) {
168541687f09Smrg				/* restore the bo_cpu to compare */
168641687f09Smrg				bo_cpu_origin = bo_cpu[0];
168741687f09Smrg				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
168841687f09Smrg				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
168941687f09Smrg				 * loop, 1-loop_until_compare_satisfied.
169041687f09Smrg				 * single_pass_atomic, 0-lru
169141687f09Smrg				 */
169241687f09Smrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
169341687f09Smrg							       0,
169441687f09Smrg							       SDMA_ATOMIC_LOOP(1) |
169541687f09Smrg							       SDMA_ATOMIC_TMZ(1) |
169641687f09Smrg							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
169741687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
169841687f09Smrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
169941687f09Smrg				pm4[i++] = 0x12345678;
170041687f09Smrg				pm4[i++] = 0x0;
170141687f09Smrg				pm4[i++] = 0xdeadbeaf;
170241687f09Smrg				pm4[i++] = 0x0;
170341687f09Smrg				pm4[i++] = 0x100;
170441687f09Smrg				amdgpu_test_exec_cs_helper_raw(device, context_handle,
170541687f09Smrg							ip_type, ring_id, i, pm4,
170641687f09Smrg							1, resources, ib_info,
170741687f09Smrg							ibs_request, true);
170841687f09Smrg				/* DMA's atomic behavir is unlike GFX
170941687f09Smrg				 * If the comparing data is not equal to destination data,
171041687f09Smrg				 * For GFX, loop again till gfx timeout(system hang).
171141687f09Smrg				 * For DMA, loop again till timer expired and then send interrupt.
171241687f09Smrg				 * So testcase can't use interrupt mechanism.
171341687f09Smrg				 * We take another way to verify. When the comparing data is not
171441687f09Smrg				 * equal to destination data, overwrite the source data to the destination
171541687f09Smrg				 * buffer. Otherwise, original destination data unchanged.
171641687f09Smrg				 * So if the bo_cpu data is overwritten, the result is passed.
171741687f09Smrg				 */
171841687f09Smrg				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
171941687f09Smrg
172041687f09Smrg				/* compare again for the case of dest_data != cmp_data */
172141687f09Smrg				i = 0;
172241687f09Smrg				/* restore again, here dest_data should be */
172341687f09Smrg				bo_cpu_origin = bo_cpu[0];
172441687f09Smrg				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
172541687f09Smrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
172641687f09Smrg							       0,
172741687f09Smrg							       SDMA_ATOMIC_LOOP(1) |
172841687f09Smrg							       SDMA_ATOMIC_TMZ(1) |
172941687f09Smrg							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
173041687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
173141687f09Smrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
173241687f09Smrg				pm4[i++] = 0x87654321;
173341687f09Smrg				pm4[i++] = 0x0;
173441687f09Smrg				pm4[i++] = 0xdeadbeaf;
173541687f09Smrg				pm4[i++] = 0x0;
173641687f09Smrg				pm4[i++] = 0x100;
173741687f09Smrg				amdgpu_test_exec_cs_helper_raw(device, context_handle,
173841687f09Smrg							ip_type, ring_id, i, pm4,
173941687f09Smrg							1, resources, ib_info,
174041687f09Smrg							ibs_request, true);
174141687f09Smrg				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
174241687f09Smrg				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
174300a23bdaSmrg			}
17443f012e29Smrg
174500a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
174600a23bdaSmrg						     sdma_write_length * sizeof(uint32_t));
174700a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
174800a23bdaSmrg			loop++;
17493f012e29Smrg		}
17503f012e29Smrg	}
17513f012e29Smrg	/* clean resources */
17523f012e29Smrg	free(resources);
17533f012e29Smrg	free(ibs_request);
17543f012e29Smrg	free(ib_info);
17553f012e29Smrg	free(pm4);
17563f012e29Smrg
17573f012e29Smrg	/* end of test */
17583f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
17593f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17603f012e29Smrg}
17613f012e29Smrg
176241687f09Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
176341687f09Smrg{
176441687f09Smrg	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
176541687f09Smrg								  ip_type,
176641687f09Smrg								  false);
176741687f09Smrg}
176841687f09Smrg
17693f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void)
17703f012e29Smrg{
17713f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
17723f012e29Smrg}
17733f012e29Smrg
17743f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
17753f012e29Smrg{
17763f012e29Smrg	const int sdma_write_length = 1024 * 1024;
17773f012e29Smrg	const int pm4_dw = 256;
17783f012e29Smrg	amdgpu_context_handle context_handle;
17793f012e29Smrg	amdgpu_bo_handle bo;
17803f012e29Smrg	amdgpu_bo_handle *resources;
17813f012e29Smrg	uint32_t *pm4;
17823f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
17833f012e29Smrg	struct amdgpu_cs_request *ibs_request;
17843f012e29Smrg	uint64_t bo_mc;
17853f012e29Smrg	volatile uint32_t *bo_cpu;
178600a23bdaSmrg	int i, j, r, loop, ring_id;
17873f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
17883f012e29Smrg	amdgpu_va_handle va_handle;
178900a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
17903f012e29Smrg
17913f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
17923f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
17933f012e29Smrg
17943f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
17953f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
17963f012e29Smrg
17973f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
17983f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
17993f012e29Smrg
180000a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
180100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
180200a23bdaSmrg
18033f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
18043f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18053f012e29Smrg
18063f012e29Smrg	/* prepare resource */
18073f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
18083f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
18093f012e29Smrg
181000a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
181100a23bdaSmrg		loop = 0;
181200a23bdaSmrg		while(loop < 2) {
181300a23bdaSmrg			/* allocate UC bo for sDMA use */
181400a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
181500a23bdaSmrg						    sdma_write_length, 4096,
181600a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
181700a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
181800a23bdaSmrg						    &bo_mc, &va_handle);
181900a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
18203f012e29Smrg
182100a23bdaSmrg			/* clear bo */
182200a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length);
18233f012e29Smrg
182400a23bdaSmrg			resources[0] = bo;
18253f012e29Smrg
182600a23bdaSmrg			/* fulfill PM4: test DMA const fill */
182700a23bdaSmrg			i = j = 0;
182800a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
182900a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
183000a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
183100a23bdaSmrg								  0, 0, 0,
183200a23bdaSmrg								  sdma_write_length / 4);
183300a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
183400a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
183500a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
183600a23bdaSmrg				} else {
183700a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
183800a23bdaSmrg							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
183900a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
184000a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
184100a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
184200a23bdaSmrg					if (family_id >= AMDGPU_FAMILY_AI)
184300a23bdaSmrg						pm4[i++] = sdma_write_length - 1;
184400a23bdaSmrg					else
184500a23bdaSmrg						pm4[i++] = sdma_write_length;
184600a23bdaSmrg				}
184700a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
184800a23bdaSmrg				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
184900a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
185000a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
185100a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
185200a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
185300a23bdaSmrg						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
185400a23bdaSmrg						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
185500a23bdaSmrg						   PACKET3_DMA_DATA_SI_CP_SYNC;
185600a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
185700a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1858d8807b2fSmrg					pm4[i++] = sdma_write_length;
185900a23bdaSmrg				} else {
186000a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
186100a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
186200a23bdaSmrg						   PACKET3_DMA_DATA_DST_SEL(0) |
186300a23bdaSmrg						   PACKET3_DMA_DATA_SRC_SEL(2) |
186400a23bdaSmrg						   PACKET3_DMA_DATA_CP_SYNC;
186500a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
186600a23bdaSmrg					pm4[i++] = 0;
186700a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
186800a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
186900a23bdaSmrg					pm4[i++] = sdma_write_length;
187000a23bdaSmrg				}
1871d8807b2fSmrg			}
18723f012e29Smrg
187300a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
187400a23bdaSmrg						   ip_type, ring_id,
187500a23bdaSmrg						   i, pm4,
187600a23bdaSmrg						   1, resources,
187700a23bdaSmrg						   ib_info, ibs_request);
18783f012e29Smrg
187900a23bdaSmrg			/* verify if SDMA test result meets with expected */
188000a23bdaSmrg			i = 0;
188100a23bdaSmrg			while(i < (sdma_write_length / 4)) {
188200a23bdaSmrg				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
188300a23bdaSmrg			}
18843f012e29Smrg
188500a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
188600a23bdaSmrg						     sdma_write_length);
188700a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
188800a23bdaSmrg			loop++;
188900a23bdaSmrg		}
18903f012e29Smrg	}
18913f012e29Smrg	/* clean resources */
18923f012e29Smrg	free(resources);
18933f012e29Smrg	free(ibs_request);
18943f012e29Smrg	free(ib_info);
18953f012e29Smrg	free(pm4);
18963f012e29Smrg
18973f012e29Smrg	/* end of test */
18983f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
18993f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
19003f012e29Smrg}
19013f012e29Smrg
19023f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void)
19033f012e29Smrg{
19043f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
19053f012e29Smrg}
19063f012e29Smrg
19073f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
19083f012e29Smrg{
19093f012e29Smrg	const int sdma_write_length = 1024;
19103f012e29Smrg	const int pm4_dw = 256;
19113f012e29Smrg	amdgpu_context_handle context_handle;
19123f012e29Smrg	amdgpu_bo_handle bo1, bo2;
19133f012e29Smrg	amdgpu_bo_handle *resources;
19143f012e29Smrg	uint32_t *pm4;
19153f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
19163f012e29Smrg	struct amdgpu_cs_request *ibs_request;
19173f012e29Smrg	uint64_t bo1_mc, bo2_mc;
19183f012e29Smrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
191900a23bdaSmrg	int i, j, r, loop1, loop2, ring_id;
19203f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
19213f012e29Smrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
192200a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
19233f012e29Smrg
19243f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
19253f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
19263f012e29Smrg
19273f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
19283f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
19293f012e29Smrg
19303f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
19313f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
19323f012e29Smrg
193300a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
193400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
193500a23bdaSmrg
19363f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
19373f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
19383f012e29Smrg
19393f012e29Smrg	/* prepare resource */
19403f012e29Smrg	resources = calloc(2, sizeof(amdgpu_bo_handle));
19413f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
19423f012e29Smrg
194300a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
194400a23bdaSmrg		loop1 = loop2 = 0;
194500a23bdaSmrg		/* run 9 circle to test all mapping combination */
194600a23bdaSmrg		while(loop1 < 2) {
194700a23bdaSmrg			while(loop2 < 2) {
194800a23bdaSmrg				/* allocate UC bo1for sDMA use */
194900a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
195000a23bdaSmrg							    sdma_write_length, 4096,
195100a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
195200a23bdaSmrg							    gtt_flags[loop1], &bo1,
195300a23bdaSmrg							    (void**)&bo1_cpu, &bo1_mc,
195400a23bdaSmrg							    &bo1_va_handle);
195500a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
195600a23bdaSmrg
195700a23bdaSmrg				/* set bo1 */
195800a23bdaSmrg				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
195900a23bdaSmrg
196000a23bdaSmrg				/* allocate UC bo2 for sDMA use */
196100a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
196200a23bdaSmrg							    sdma_write_length, 4096,
196300a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
196400a23bdaSmrg							    gtt_flags[loop2], &bo2,
196500a23bdaSmrg							    (void**)&bo2_cpu, &bo2_mc,
196600a23bdaSmrg							    &bo2_va_handle);
196700a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
196800a23bdaSmrg
196900a23bdaSmrg				/* clear bo2 */
197000a23bdaSmrg				memset((void*)bo2_cpu, 0, sdma_write_length);
197100a23bdaSmrg
197200a23bdaSmrg				resources[0] = bo1;
197300a23bdaSmrg				resources[1] = bo2;
197400a23bdaSmrg
197500a23bdaSmrg				/* fulfill PM4: test DMA copy linear */
197600a23bdaSmrg				i = j = 0;
197700a23bdaSmrg				if (ip_type == AMDGPU_HW_IP_DMA) {
197800a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
197900a23bdaSmrg						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
198000a23bdaSmrg									  0, 0, 0,
198100a23bdaSmrg									  sdma_write_length);
198200a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
198300a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
198400a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
198500a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
198600a23bdaSmrg					} else {
198700a23bdaSmrg						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
198800a23bdaSmrg								       SDMA_COPY_SUB_OPCODE_LINEAR,
198900a23bdaSmrg								       0);
199000a23bdaSmrg						if (family_id >= AMDGPU_FAMILY_AI)
199100a23bdaSmrg							pm4[i++] = sdma_write_length - 1;
199200a23bdaSmrg						else
199300a23bdaSmrg							pm4[i++] = sdma_write_length;
199400a23bdaSmrg						pm4[i++] = 0;
199500a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
199600a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
199700a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
199800a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
199900a23bdaSmrg					}
200000a23bdaSmrg				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
200100a23bdaSmrg					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
200200a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
200300a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
200400a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
200500a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
200600a23bdaSmrg							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
200700a23bdaSmrg							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
200800a23bdaSmrg							   PACKET3_DMA_DATA_SI_CP_SYNC |
200900a23bdaSmrg							   (0xffff00000000 & bo1_mc) >> 32;
201000a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
201100a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
2012d8807b2fSmrg						pm4[i++] = sdma_write_length;
201300a23bdaSmrg					} else {
201400a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
201500a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
201600a23bdaSmrg							   PACKET3_DMA_DATA_DST_SEL(0) |
201700a23bdaSmrg							   PACKET3_DMA_DATA_SRC_SEL(0) |
201800a23bdaSmrg							   PACKET3_DMA_DATA_CP_SYNC;
201900a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
202000a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
202100a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
202200a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
202300a23bdaSmrg						pm4[i++] = sdma_write_length;
202400a23bdaSmrg					}
2025d8807b2fSmrg				}
20263f012e29Smrg
202700a23bdaSmrg				amdgpu_test_exec_cs_helper(context_handle,
202800a23bdaSmrg							   ip_type, ring_id,
202900a23bdaSmrg							   i, pm4,
203000a23bdaSmrg							   2, resources,
203100a23bdaSmrg							   ib_info, ibs_request);
20323f012e29Smrg
203300a23bdaSmrg				/* verify if SDMA test result meets with expected */
203400a23bdaSmrg				i = 0;
203500a23bdaSmrg				while(i < sdma_write_length) {
203600a23bdaSmrg					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
203700a23bdaSmrg				}
203800a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
203900a23bdaSmrg							     sdma_write_length);
204000a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
204100a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
204200a23bdaSmrg							     sdma_write_length);
204300a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
204400a23bdaSmrg				loop2++;
20453f012e29Smrg			}
204600a23bdaSmrg			loop1++;
20473f012e29Smrg		}
20483f012e29Smrg	}
20493f012e29Smrg	/* clean resources */
20503f012e29Smrg	free(resources);
20513f012e29Smrg	free(ibs_request);
20523f012e29Smrg	free(ib_info);
20533f012e29Smrg	free(pm4);
20543f012e29Smrg
20553f012e29Smrg	/* end of test */
20563f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
20573f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
20583f012e29Smrg}
20593f012e29Smrg
20603f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void)
20613f012e29Smrg{
20623f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
20633f012e29Smrg}
20643f012e29Smrg
20653f012e29Smrgstatic void amdgpu_command_submission_sdma(void)
20663f012e29Smrg{
20673f012e29Smrg	amdgpu_command_submission_sdma_write_linear();
20683f012e29Smrg	amdgpu_command_submission_sdma_const_fill();
20693f012e29Smrg	amdgpu_command_submission_sdma_copy_linear();
20703f012e29Smrg}
20713f012e29Smrg
2072d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
2073d8807b2fSmrg{
2074d8807b2fSmrg	amdgpu_context_handle context_handle;
2075d8807b2fSmrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
2076d8807b2fSmrg	void *ib_result_cpu, *ib_result_ce_cpu;
2077d8807b2fSmrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
2078d8807b2fSmrg	struct amdgpu_cs_request ibs_request[2] = {0};
2079d8807b2fSmrg	struct amdgpu_cs_ib_info ib_info[2];
2080d8807b2fSmrg	struct amdgpu_cs_fence fence_status[2] = {0};
2081d8807b2fSmrg	uint32_t *ptr;
2082d8807b2fSmrg	uint32_t expired;
2083d8807b2fSmrg	amdgpu_bo_list_handle bo_list;
2084d8807b2fSmrg	amdgpu_va_handle va_handle, va_handle_ce;
2085d8807b2fSmrg	int r;
2086d8807b2fSmrg	int i = 0, ib_cs_num = 2;
2087b0ab5608Smrg	struct drm_amdgpu_info_hw_ip info;
2088b0ab5608Smrg
2089b0ab5608Smrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2090b0ab5608Smrg	CU_ASSERT_EQUAL(r, 0);
2091b0ab5608Smrg
2092b0ab5608Smrg	if (info.hw_ip_version_major >= 11)
2093b0ab5608Smrg		return;
2094d8807b2fSmrg
2095d8807b2fSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2096d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2097d8807b2fSmrg
2098d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2099d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
2100d8807b2fSmrg				    &ib_result_handle, &ib_result_cpu,
2101d8807b2fSmrg				    &ib_result_mc_address, &va_handle);
2102d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2103d8807b2fSmrg
2104d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2105d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
2106d8807b2fSmrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
2107d8807b2fSmrg				    &ib_result_ce_mc_address, &va_handle_ce);
2108d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2109d8807b2fSmrg
2110d8807b2fSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
2111d8807b2fSmrg			       ib_result_ce_handle, &bo_list);
2112d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2113d8807b2fSmrg
2114d8807b2fSmrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
2115d8807b2fSmrg
2116d8807b2fSmrg	/* IT_SET_CE_DE_COUNTERS */
2117d8807b2fSmrg	ptr = ib_result_ce_cpu;
2118d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
2119d8807b2fSmrg		ptr[i++] = 0xc0008900;
2120d8807b2fSmrg		ptr[i++] = 0;
2121d8807b2fSmrg	}
2122d8807b2fSmrg	ptr[i++] = 0xc0008400;
2123d8807b2fSmrg	ptr[i++] = 1;
2124d8807b2fSmrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
2125d8807b2fSmrg	ib_info[0].size = i;
2126d8807b2fSmrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
2127d8807b2fSmrg
2128d8807b2fSmrg	/* IT_WAIT_ON_CE_COUNTER */
2129d8807b2fSmrg	ptr = ib_result_cpu;
2130d8807b2fSmrg	ptr[0] = 0xc0008600;
2131d8807b2fSmrg	ptr[1] = 0x00000001;
2132d8807b2fSmrg	ib_info[1].ib_mc_address = ib_result_mc_address;
2133d8807b2fSmrg	ib_info[1].size = 2;
2134d8807b2fSmrg
2135d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
2136d8807b2fSmrg		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
2137d8807b2fSmrg		ibs_request[i].number_of_ibs = 2;
2138d8807b2fSmrg		ibs_request[i].ibs = ib_info;
2139d8807b2fSmrg		ibs_request[i].resources = bo_list;
2140d8807b2fSmrg		ibs_request[i].fence_info.handle = NULL;
2141d8807b2fSmrg	}
2142d8807b2fSmrg
2143d8807b2fSmrg	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
2144d8807b2fSmrg
2145d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2146d8807b2fSmrg
2147d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
2148d8807b2fSmrg		fence_status[i].context = context_handle;
2149d8807b2fSmrg		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
2150d8807b2fSmrg		fence_status[i].fence = ibs_request[i].seq_no;
2151d8807b2fSmrg	}
2152d8807b2fSmrg
2153d8807b2fSmrg	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
2154d8807b2fSmrg				AMDGPU_TIMEOUT_INFINITE,
2155d8807b2fSmrg				&expired, NULL);
2156d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2157d8807b2fSmrg
2158d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2159d8807b2fSmrg				     ib_result_mc_address, 4096);
2160d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2161d8807b2fSmrg
2162d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2163d8807b2fSmrg				     ib_result_ce_mc_address, 4096);
2164d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2165d8807b2fSmrg
2166d8807b2fSmrg	r = amdgpu_bo_list_destroy(bo_list);
2167d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2168d8807b2fSmrg
2169d8807b2fSmrg	r = amdgpu_cs_ctx_free(context_handle);
2170d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2171d8807b2fSmrg}
2172d8807b2fSmrg
2173d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void)
2174d8807b2fSmrg{
2175d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(true);
2176d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(false);
2177d8807b2fSmrg}
2178d8807b2fSmrg
21793f012e29Smrgstatic void amdgpu_userptr_test(void)
21803f012e29Smrg{
21813f012e29Smrg	int i, r, j;
21823f012e29Smrg	uint32_t *pm4 = NULL;
21833f012e29Smrg	uint64_t bo_mc;
21843f012e29Smrg	void *ptr = NULL;
21853f012e29Smrg	int pm4_dw = 256;
21863f012e29Smrg	int sdma_write_length = 4;
21873f012e29Smrg	amdgpu_bo_handle handle;
21883f012e29Smrg	amdgpu_context_handle context_handle;
21893f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
21903f012e29Smrg	struct amdgpu_cs_request *ibs_request;
21913f012e29Smrg	amdgpu_bo_handle buf_handle;
21923f012e29Smrg	amdgpu_va_handle va_handle;
21933f012e29Smrg
21943f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
21953f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
21963f012e29Smrg
21973f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
21983f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
21993f012e29Smrg
22003f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
22013f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
22023f012e29Smrg
22033f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
22043f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
22053f012e29Smrg
22063f012e29Smrg	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
22073f012e29Smrg	CU_ASSERT_NOT_EQUAL(ptr, NULL);
22083f012e29Smrg	memset(ptr, 0, BUFFER_SIZE);
22093f012e29Smrg
22103f012e29Smrg	r = amdgpu_create_bo_from_user_mem(device_handle,
22113f012e29Smrg					   ptr, BUFFER_SIZE, &buf_handle);
22123f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
22133f012e29Smrg
22143f012e29Smrg	r = amdgpu_va_range_alloc(device_handle,
22153f012e29Smrg				  amdgpu_gpu_va_range_general,
22163f012e29Smrg				  BUFFER_SIZE, 1, 0, &bo_mc,
22173f012e29Smrg				  &va_handle, 0);
22183f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
22193f012e29Smrg
22203f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
22213f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
22223f012e29Smrg
22233f012e29Smrg	handle = buf_handle;
22243f012e29Smrg
22253f012e29Smrg	j = i = 0;
2226d8807b2fSmrg
2227d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI)
2228d8807b2fSmrg		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2229d8807b2fSmrg				sdma_write_length);
2230d8807b2fSmrg	else
2231d8807b2fSmrg		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2232d8807b2fSmrg				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
22333f012e29Smrg	pm4[i++] = 0xffffffff & bo_mc;
22343f012e29Smrg	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2235d8807b2fSmrg	if (family_id >= AMDGPU_FAMILY_AI)
2236d8807b2fSmrg		pm4[i++] = sdma_write_length - 1;
2237d8807b2fSmrg	else if (family_id != AMDGPU_FAMILY_SI)
2238d8807b2fSmrg		pm4[i++] = sdma_write_length;
22393f012e29Smrg
22403f012e29Smrg	while (j++ < sdma_write_length)
22413f012e29Smrg		pm4[i++] = 0xdeadbeaf;
22423f012e29Smrg
224300a23bdaSmrg	if (!fork()) {
224400a23bdaSmrg		pm4[0] = 0x0;
224500a23bdaSmrg		exit(0);
224600a23bdaSmrg	}
224700a23bdaSmrg
22483f012e29Smrg	amdgpu_test_exec_cs_helper(context_handle,
22493f012e29Smrg				   AMDGPU_HW_IP_DMA, 0,
22503f012e29Smrg				   i, pm4,
22513f012e29Smrg				   1, &handle,
22523f012e29Smrg				   ib_info, ibs_request);
22533f012e29Smrg	i = 0;
22543f012e29Smrg	while (i < sdma_write_length) {
22553f012e29Smrg		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
22563f012e29Smrg	}
22573f012e29Smrg	free(ibs_request);
22583f012e29Smrg	free(ib_info);
22593f012e29Smrg	free(pm4);
22603f012e29Smrg
22613f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
22623f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
22633f012e29Smrg	r = amdgpu_va_range_free(va_handle);
22643f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
22653f012e29Smrg	r = amdgpu_bo_free(buf_handle);
22663f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
22673f012e29Smrg	free(ptr);
22683f012e29Smrg
22693f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
22703f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
227100a23bdaSmrg
227200a23bdaSmrg	wait(NULL);
227300a23bdaSmrg}
227400a23bdaSmrg
227500a23bdaSmrgstatic void amdgpu_sync_dependency_test(void)
227600a23bdaSmrg{
227700a23bdaSmrg	amdgpu_context_handle context_handle[2];
227800a23bdaSmrg	amdgpu_bo_handle ib_result_handle;
227900a23bdaSmrg	void *ib_result_cpu;
228000a23bdaSmrg	uint64_t ib_result_mc_address;
228100a23bdaSmrg	struct amdgpu_cs_request ibs_request;
228200a23bdaSmrg	struct amdgpu_cs_ib_info ib_info;
228300a23bdaSmrg	struct amdgpu_cs_fence fence_status;
228400a23bdaSmrg	uint32_t expired;
228500a23bdaSmrg	int i, j, r;
228600a23bdaSmrg	amdgpu_bo_list_handle bo_list;
228700a23bdaSmrg	amdgpu_va_handle va_handle;
228800a23bdaSmrg	static uint32_t *ptr;
228900a23bdaSmrg	uint64_t seq_no;
229000a23bdaSmrg
229100a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
229200a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
229300a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
229400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
229500a23bdaSmrg
229600a23bdaSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
229700a23bdaSmrg			AMDGPU_GEM_DOMAIN_GTT, 0,
229800a23bdaSmrg						    &ib_result_handle, &ib_result_cpu,
229900a23bdaSmrg						    &ib_result_mc_address, &va_handle);
230000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
230100a23bdaSmrg
230200a23bdaSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
230300a23bdaSmrg			       &bo_list);
230400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
230500a23bdaSmrg
230600a23bdaSmrg	ptr = ib_result_cpu;
230700a23bdaSmrg	i = 0;
230800a23bdaSmrg
230900a23bdaSmrg	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
231000a23bdaSmrg
231100a23bdaSmrg	/* Dispatch minimal init config and verify it's executed */
231200a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
231300a23bdaSmrg	ptr[i++] = 0x80000000;
231400a23bdaSmrg	ptr[i++] = 0x80000000;
231500a23bdaSmrg
231600a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
231700a23bdaSmrg	ptr[i++] = 0x80000000;
231800a23bdaSmrg
231900a23bdaSmrg
232000a23bdaSmrg	/* Program compute regs */
232100a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
232200a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
232300a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
232400a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
232500a23bdaSmrg
232600a23bdaSmrg
232700a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
232800a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
232900a23bdaSmrg	/*
233000a23bdaSmrg	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
233100a23bdaSmrg	                                      SGPRS = 1
233200a23bdaSmrg	                                      PRIORITY = 0
233300a23bdaSmrg	                                      FLOAT_MODE = 192 (0xc0)
233400a23bdaSmrg	                                      PRIV = 0
233500a23bdaSmrg	                                      DX10_CLAMP = 1
233600a23bdaSmrg	                                      DEBUG_MODE = 0
233700a23bdaSmrg	                                      IEEE_MODE = 0
233800a23bdaSmrg	                                      BULKY = 0
233900a23bdaSmrg	                                      CDBG_USER = 0
234000a23bdaSmrg	 *
234100a23bdaSmrg	 */
234200a23bdaSmrg	ptr[i++] = 0x002c0040;
234300a23bdaSmrg
234400a23bdaSmrg
234500a23bdaSmrg	/*
234600a23bdaSmrg	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
234700a23bdaSmrg	                                      USER_SGPR = 8
234800a23bdaSmrg	                                      TRAP_PRESENT = 0
234900a23bdaSmrg	                                      TGID_X_EN = 0
235000a23bdaSmrg	                                      TGID_Y_EN = 0
235100a23bdaSmrg	                                      TGID_Z_EN = 0
235200a23bdaSmrg	                                      TG_SIZE_EN = 0
235300a23bdaSmrg	                                      TIDIG_COMP_CNT = 0
235400a23bdaSmrg	                                      EXCP_EN_MSB = 0
235500a23bdaSmrg	                                      LDS_SIZE = 0
235600a23bdaSmrg	                                      EXCP_EN = 0
235700a23bdaSmrg	 *
235800a23bdaSmrg	 */
235900a23bdaSmrg	ptr[i++] = 0x00000010;
236000a23bdaSmrg
236100a23bdaSmrg
236200a23bdaSmrg/*
236300a23bdaSmrg * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
236400a23bdaSmrg                                         WAVESIZE = 0
236500a23bdaSmrg *
236600a23bdaSmrg */
236700a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
236800a23bdaSmrg	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
236900a23bdaSmrg	ptr[i++] = 0x00000100;
237000a23bdaSmrg
237100a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
237200a23bdaSmrg	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
237300a23bdaSmrg	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
237400a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
237500a23bdaSmrg
237600a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
237700a23bdaSmrg	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
237800a23bdaSmrg	ptr[i++] = 0;
237900a23bdaSmrg
238000a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
238100a23bdaSmrg	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
238200a23bdaSmrg	ptr[i++] = 1;
238300a23bdaSmrg	ptr[i++] = 1;
238400a23bdaSmrg	ptr[i++] = 1;
238500a23bdaSmrg
238600a23bdaSmrg
238700a23bdaSmrg	/* Dispatch */
238800a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
238900a23bdaSmrg	ptr[i++] = 1;
239000a23bdaSmrg	ptr[i++] = 1;
239100a23bdaSmrg	ptr[i++] = 1;
239200a23bdaSmrg	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
239300a23bdaSmrg
239400a23bdaSmrg
239500a23bdaSmrg	while (i & 7)
239600a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
239700a23bdaSmrg
239800a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
239900a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address;
240000a23bdaSmrg	ib_info.size = i;
240100a23bdaSmrg
240200a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
240300a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
240400a23bdaSmrg	ibs_request.ring = 0;
240500a23bdaSmrg	ibs_request.number_of_ibs = 1;
240600a23bdaSmrg	ibs_request.ibs = &ib_info;
240700a23bdaSmrg	ibs_request.resources = bo_list;
240800a23bdaSmrg	ibs_request.fence_info.handle = NULL;
240900a23bdaSmrg
241000a23bdaSmrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
241100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
241200a23bdaSmrg	seq_no = ibs_request.seq_no;
241300a23bdaSmrg
241400a23bdaSmrg
241500a23bdaSmrg
241600a23bdaSmrg	/* Prepare second command with dependency on the first */
241700a23bdaSmrg	j = i;
241800a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
241900a23bdaSmrg	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
242000a23bdaSmrg	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
242100a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
242200a23bdaSmrg	ptr[i++] = 99;
242300a23bdaSmrg
242400a23bdaSmrg	while (i & 7)
242500a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
242600a23bdaSmrg
242700a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
242800a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
242900a23bdaSmrg	ib_info.size = i - j;
243000a23bdaSmrg
243100a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
243200a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
243300a23bdaSmrg	ibs_request.ring = 0;
243400a23bdaSmrg	ibs_request.number_of_ibs = 1;
243500a23bdaSmrg	ibs_request.ibs = &ib_info;
243600a23bdaSmrg	ibs_request.resources = bo_list;
243700a23bdaSmrg	ibs_request.fence_info.handle = NULL;
243800a23bdaSmrg
243900a23bdaSmrg	ibs_request.number_of_dependencies = 1;
244000a23bdaSmrg
244100a23bdaSmrg	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
244200a23bdaSmrg	ibs_request.dependencies[0].context = context_handle[1];
244300a23bdaSmrg	ibs_request.dependencies[0].ip_instance = 0;
244400a23bdaSmrg	ibs_request.dependencies[0].ring = 0;
244500a23bdaSmrg	ibs_request.dependencies[0].fence = seq_no;
244600a23bdaSmrg
244700a23bdaSmrg
244800a23bdaSmrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
244900a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
245000a23bdaSmrg
245100a23bdaSmrg
245200a23bdaSmrg	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
245300a23bdaSmrg	fence_status.context = context_handle[0];
245400a23bdaSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
245500a23bdaSmrg	fence_status.ip_instance = 0;
245600a23bdaSmrg	fence_status.ring = 0;
245700a23bdaSmrg	fence_status.fence = ibs_request.seq_no;
245800a23bdaSmrg
245900a23bdaSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
246000a23bdaSmrg		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
246100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
246200a23bdaSmrg
246300a23bdaSmrg	/* Expect the second command to wait for shader to complete */
246400a23bdaSmrg	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
246500a23bdaSmrg
246600a23bdaSmrg	r = amdgpu_bo_list_destroy(bo_list);
246700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
246800a23bdaSmrg
246900a23bdaSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
247000a23bdaSmrg				     ib_result_mc_address, 4096);
247100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
247200a23bdaSmrg
247300a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[0]);
247400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
247500a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[1]);
247600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
247700a23bdaSmrg
247800a23bdaSmrg	free(ibs_request.dependencies);
24793f012e29Smrg}
24805324fb0dSmrg
248188f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void)
24825324fb0dSmrg{
2483b0ab5608Smrg	amdgpu_test_dispatch_helper(device_handle, AMDGPU_HW_IP_COMPUTE);
248488f8a8d2Smrg}
248588f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void)
248688f8a8d2Smrg{
2487b0ab5608Smrg	amdgpu_test_dispatch_helper(device_handle, AMDGPU_HW_IP_GFX);
24889bd392adSmrg}
24899bd392adSmrg
2490b0ab5608Smrgstatic void amdgpu_draw_test(void)
24919bd392adSmrg{
2492b0ab5608Smrg	amdgpu_test_draw_helper(device_handle);
24939bd392adSmrg}
249488f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void)
249588f8a8d2Smrg{
249688f8a8d2Smrg	int r;
249788f8a8d2Smrg	char debugfs_path[256], tmp[10];
249888f8a8d2Smrg	int fd;
249988f8a8d2Smrg	struct stat sbuf;
250088f8a8d2Smrg	amdgpu_context_handle context_handle;
250188f8a8d2Smrg	uint32_t hang_state, hangs;
250288f8a8d2Smrg
250388f8a8d2Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
250488f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
250588f8a8d2Smrg
250688f8a8d2Smrg	r = fstat(drm_amdgpu[0], &sbuf);
250788f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
250888f8a8d2Smrg
250988f8a8d2Smrg	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
251088f8a8d2Smrg	fd = open(debugfs_path, O_RDONLY);
251188f8a8d2Smrg	CU_ASSERT(fd >= 0);
251288f8a8d2Smrg
251388f8a8d2Smrg	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
251488f8a8d2Smrg	CU_ASSERT(r > 0);
251588f8a8d2Smrg
251688f8a8d2Smrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
251788f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
251888f8a8d2Smrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
251988f8a8d2Smrg
252088f8a8d2Smrg	close(fd);
252188f8a8d2Smrg	r = amdgpu_cs_ctx_free(context_handle);
252288f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
252388f8a8d2Smrg
252488f8a8d2Smrg	amdgpu_compute_dispatch_test();
252588f8a8d2Smrg	amdgpu_gfx_dispatch_test();
252688f8a8d2Smrg}
25270ed5401bSmrg
25280ed5401bSmrgstatic void amdgpu_stable_pstate_test(void)
25290ed5401bSmrg{
25300ed5401bSmrg	int r;
25310ed5401bSmrg	amdgpu_context_handle context_handle;
25320ed5401bSmrg	uint32_t current_pstate = 0, new_pstate = 0;
25330ed5401bSmrg
25340ed5401bSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
25350ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
25360ed5401bSmrg
25370ed5401bSmrg	r = amdgpu_cs_ctx_stable_pstate(context_handle,
25380ed5401bSmrg					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
25390ed5401bSmrg					0, &current_pstate);
25400ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
25410ed5401bSmrg	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE);
25420ed5401bSmrg
25430ed5401bSmrg	r = amdgpu_cs_ctx_stable_pstate(context_handle,
25440ed5401bSmrg					AMDGPU_CTX_OP_SET_STABLE_PSTATE,
25450ed5401bSmrg					AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL);
25460ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
25470ed5401bSmrg
25480ed5401bSmrg	r = amdgpu_cs_ctx_stable_pstate(context_handle,
25490ed5401bSmrg					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
25500ed5401bSmrg					0, &new_pstate);
25510ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
25520ed5401bSmrg	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK);
25530ed5401bSmrg
25540ed5401bSmrg	r = amdgpu_cs_ctx_free(context_handle);
25550ed5401bSmrg	CU_ASSERT_EQUAL(r, 0);
25560ed5401bSmrg}
2557