basic_tests.c revision 4babd585
13f012e29Smrg/*
23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc.
33f012e29Smrg *
43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a
53f012e29Smrg * copy of this software and associated documentation files (the "Software"),
63f012e29Smrg * to deal in the Software without restriction, including without limitation
73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the
93f012e29Smrg * Software is furnished to do so, subject to the following conditions:
103f012e29Smrg *
113f012e29Smrg * The above copyright notice and this permission notice shall be included in
123f012e29Smrg * all copies or substantial portions of the Software.
133f012e29Smrg *
143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE.
213f012e29Smrg *
223f012e29Smrg*/
233f012e29Smrg
243f012e29Smrg#include <stdio.h>
253f012e29Smrg#include <stdlib.h>
263f012e29Smrg#include <unistd.h>
2788f8a8d2Smrg#include <sys/types.h>
2888f8a8d2Smrg#ifdef MAJOR_IN_SYSMACROS
2988f8a8d2Smrg#include <sys/sysmacros.h>
3088f8a8d2Smrg#endif
3188f8a8d2Smrg#include <sys/stat.h>
3288f8a8d2Smrg#include <fcntl.h>
339bd392adSmrg#if HAVE_ALLOCA_H
343f012e29Smrg# include <alloca.h>
353f012e29Smrg#endif
3600a23bdaSmrg#include <sys/wait.h>
373f012e29Smrg
383f012e29Smrg#include "CUnit/Basic.h"
393f012e29Smrg
403f012e29Smrg#include "amdgpu_test.h"
413f012e29Smrg#include "amdgpu_drm.h"
4241687f09Smrg#include "amdgpu_internal.h"
437cdc0497Smrg#include "util_math.h"
443f012e29Smrg
453f012e29Smrgstatic  amdgpu_device_handle device_handle;
463f012e29Smrgstatic  uint32_t  major_version;
473f012e29Smrgstatic  uint32_t  minor_version;
48d8807b2fSmrgstatic  uint32_t  family_id;
494babd585Smrgstatic  uint32_t  chip_id;
504babd585Smrgstatic  uint32_t  chip_rev;
513f012e29Smrg
523f012e29Smrgstatic void amdgpu_query_info_test(void);
533f012e29Smrgstatic void amdgpu_command_submission_gfx(void);
543f012e29Smrgstatic void amdgpu_command_submission_compute(void);
55d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void);
563f012e29Smrgstatic void amdgpu_command_submission_sdma(void);
573f012e29Smrgstatic void amdgpu_userptr_test(void);
583f012e29Smrgstatic void amdgpu_semaphore_test(void);
5900a23bdaSmrgstatic void amdgpu_sync_dependency_test(void);
6000a23bdaSmrgstatic void amdgpu_bo_eviction_test(void);
6188f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void);
6288f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void);
635324fb0dSmrgstatic void amdgpu_draw_test(void);
6488f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void);
653f012e29Smrg
663f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
673f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
683f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
6900a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
7000a23bdaSmrg				       unsigned ip_type,
7100a23bdaSmrg				       int instance, int pm4_dw, uint32_t *pm4_src,
7200a23bdaSmrg				       int res_cnt, amdgpu_bo_handle *resources,
7300a23bdaSmrg				       struct amdgpu_cs_ib_info *ib_info,
7400a23bdaSmrg				       struct amdgpu_cs_request *ibs_request);
7541687f09Smrg
763f012e29SmrgCU_TestInfo basic_tests[] = {
773f012e29Smrg	{ "Query Info Test",  amdgpu_query_info_test },
783f012e29Smrg	{ "Userptr Test",  amdgpu_userptr_test },
7900a23bdaSmrg	{ "bo eviction Test",  amdgpu_bo_eviction_test },
803f012e29Smrg	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
813f012e29Smrg	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
82d8807b2fSmrg	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
833f012e29Smrg	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
843f012e29Smrg	{ "SW semaphore Test",  amdgpu_semaphore_test },
8500a23bdaSmrg	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
8688f8a8d2Smrg	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
8788f8a8d2Smrg	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
885324fb0dSmrg	{ "Draw Test",  amdgpu_draw_test },
8988f8a8d2Smrg	{ "GPU reset Test", amdgpu_gpu_reset_test },
903f012e29Smrg	CU_TEST_INFO_NULL,
913f012e29Smrg};
929bd392adSmrg#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
933f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0
943f012e29Smrg#define SDMA_PKT_HEADER_op_mask   0x000000FF
953f012e29Smrg#define SDMA_PKT_HEADER_op_shift  0
963f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
973f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL  11
983f012e29Smrg#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
993f012e29Smrg	/* 0 = byte fill
1003f012e29Smrg	 * 2 = DW fill
1013f012e29Smrg	 */
1023f012e29Smrg#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
1033f012e29Smrg					(((sub_op) & 0xFF) << 8) |	\
1043f012e29Smrg					(((op) & 0xFF) << 0))
1053f012e29Smrg#define	SDMA_OPCODE_WRITE				  2
1063f012e29Smrg#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
1073f012e29Smrg#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
1083f012e29Smrg
1093f012e29Smrg#define	SDMA_OPCODE_COPY				  1
1103f012e29Smrg#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
1113f012e29Smrg
11241687f09Smrg#define	SDMA_OPCODE_ATOMIC				  10
11341687f09Smrg#		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
11441687f09Smrg        /* 0 - single_pass_atomic.
11541687f09Smrg         * 1 - loop_until_compare_satisfied.
11641687f09Smrg         */
11741687f09Smrg#		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
11841687f09Smrg		/* 0 - non-TMZ.
11941687f09Smrg		 * 1 - TMZ.
12041687f09Smrg	     */
12141687f09Smrg#		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
12241687f09Smrg		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
12341687f09Smrg		 * same as Packet 3
12441687f09Smrg		 */
12541687f09Smrg
1263f012e29Smrg#define GFX_COMPUTE_NOP  0xffff1000
1273f012e29Smrg#define SDMA_NOP  0x0
1283f012e29Smrg
1293f012e29Smrg/* PM4 */
1303f012e29Smrg#define	PACKET_TYPE0	0
1313f012e29Smrg#define	PACKET_TYPE1	1
1323f012e29Smrg#define	PACKET_TYPE2	2
1333f012e29Smrg#define	PACKET_TYPE3	3
1343f012e29Smrg
1353f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
1363f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
1373f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
1383f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
1393f012e29Smrg#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
1403f012e29Smrg			 ((reg) & 0xFFFF) |			\
1413f012e29Smrg			 ((n) & 0x3FFF) << 16)
1423f012e29Smrg#define CP_PACKET2			0x80000000
1433f012e29Smrg#define		PACKET2_PAD_SHIFT		0
1443f012e29Smrg#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
1453f012e29Smrg
1463f012e29Smrg#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
1473f012e29Smrg
1483f012e29Smrg#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
1493f012e29Smrg			 (((op) & 0xFF) << 8) |				\
1503f012e29Smrg			 ((n) & 0x3FFF) << 16)
1515324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
1523f012e29Smrg
1533f012e29Smrg/* Packet 3 types */
1543f012e29Smrg#define	PACKET3_NOP					0x10
1553f012e29Smrg
1563f012e29Smrg#define	PACKET3_WRITE_DATA				0x37
1573f012e29Smrg#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
1583f012e29Smrg		/* 0 - register
1593f012e29Smrg		 * 1 - memory (sync - via GRBM)
1603f012e29Smrg		 * 2 - gl2
1613f012e29Smrg		 * 3 - gds
1623f012e29Smrg		 * 4 - reserved
1633f012e29Smrg		 * 5 - memory (async - direct)
1643f012e29Smrg		 */
1653f012e29Smrg#define		WR_ONE_ADDR                             (1 << 16)
1663f012e29Smrg#define		WR_CONFIRM                              (1 << 20)
1673f012e29Smrg#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
1683f012e29Smrg		/* 0 - LRU
1693f012e29Smrg		 * 1 - Stream
1703f012e29Smrg		 */
1713f012e29Smrg#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
1723f012e29Smrg		/* 0 - me
1733f012e29Smrg		 * 1 - pfp
1743f012e29Smrg		 * 2 - ce
1753f012e29Smrg		 */
1763f012e29Smrg
17741687f09Smrg#define	PACKET3_ATOMIC_MEM				0x1E
17841687f09Smrg#define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
17941687f09Smrg#define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
18041687f09Smrg            /* 0 - single_pass_atomic.
18141687f09Smrg             * 1 - loop_until_compare_satisfied.
18241687f09Smrg             */
18341687f09Smrg#define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
18441687f09Smrg            /* 0 - lru.
18541687f09Smrg             * 1 - stream.
18641687f09Smrg             */
18741687f09Smrg#define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
18841687f09Smrg            /* 0 - micro_engine.
18941687f09Smrg			 */
19041687f09Smrg
1913f012e29Smrg#define	PACKET3_DMA_DATA				0x50
1923f012e29Smrg/* 1. header
1933f012e29Smrg * 2. CONTROL
1943f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0]
1953f012e29Smrg * 4. SRC_ADDR_HI [31:0]
1963f012e29Smrg * 5. DST_ADDR_LO [31:0]
1973f012e29Smrg * 6. DST_ADDR_HI [7:0]
1983f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
1993f012e29Smrg */
2003f012e29Smrg/* CONTROL */
2013f012e29Smrg#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
2023f012e29Smrg		/* 0 - ME
2033f012e29Smrg		 * 1 - PFP
2043f012e29Smrg		 */
2053f012e29Smrg#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
2063f012e29Smrg		/* 0 - LRU
2073f012e29Smrg		 * 1 - Stream
2083f012e29Smrg		 * 2 - Bypass
2093f012e29Smrg		 */
2103f012e29Smrg#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
2113f012e29Smrg#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
2123f012e29Smrg		/* 0 - DST_ADDR using DAS
2133f012e29Smrg		 * 1 - GDS
2143f012e29Smrg		 * 3 - DST_ADDR using L2
2153f012e29Smrg		 */
2163f012e29Smrg#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
2173f012e29Smrg		/* 0 - LRU
2183f012e29Smrg		 * 1 - Stream
2193f012e29Smrg		 * 2 - Bypass
2203f012e29Smrg		 */
2213f012e29Smrg#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
2223f012e29Smrg#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
2233f012e29Smrg		/* 0 - SRC_ADDR using SAS
2243f012e29Smrg		 * 1 - GDS
2253f012e29Smrg		 * 2 - DATA
2263f012e29Smrg		 * 3 - SRC_ADDR using L2
2273f012e29Smrg		 */
2283f012e29Smrg#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
2293f012e29Smrg/* COMMAND */
2303f012e29Smrg#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
2313f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
2323f012e29Smrg		/* 0 - none
2333f012e29Smrg		 * 1 - 8 in 16
2343f012e29Smrg		 * 2 - 8 in 32
2353f012e29Smrg		 * 3 - 8 in 64
2363f012e29Smrg		 */
2373f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
2383f012e29Smrg		/* 0 - none
2393f012e29Smrg		 * 1 - 8 in 16
2403f012e29Smrg		 * 2 - 8 in 32
2413f012e29Smrg		 * 3 - 8 in 64
2423f012e29Smrg		 */
2433f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
2443f012e29Smrg		/* 0 - memory
2453f012e29Smrg		 * 1 - register
2463f012e29Smrg		 */
2473f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
2483f012e29Smrg		/* 0 - memory
2493f012e29Smrg		 * 1 - register
2503f012e29Smrg		 */
2513f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
2523f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
2533f012e29Smrg#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
2543f012e29Smrg
255d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
256d8807b2fSmrg						(((b) & 0x1) << 26) |		\
257d8807b2fSmrg						(((t) & 0x1) << 23) |		\
258d8807b2fSmrg						(((s) & 0x1) << 22) |		\
259d8807b2fSmrg						(((cnt) & 0xFFFFF) << 0))
260d8807b2fSmrg#define	SDMA_OPCODE_COPY_SI	3
261d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI	13
262d8807b2fSmrg#define SDMA_NOP_SI  0xf
263d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000
264d8807b2fSmrg#define	PACKET3_DMA_DATA_SI	0x41
265d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
266d8807b2fSmrg		/* 0 - ME
267d8807b2fSmrg		 * 1 - PFP
268d8807b2fSmrg		 */
269d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
270d8807b2fSmrg		/* 0 - DST_ADDR using DAS
271d8807b2fSmrg		 * 1 - GDS
272d8807b2fSmrg		 * 3 - DST_ADDR using L2
273d8807b2fSmrg		 */
274d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
275d8807b2fSmrg		/* 0 - SRC_ADDR using SAS
276d8807b2fSmrg		 * 1 - GDS
277d8807b2fSmrg		 * 2 - DATA
278d8807b2fSmrg		 * 3 - SRC_ADDR using L2
279d8807b2fSmrg		 */
280d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
281d8807b2fSmrg
28200a23bdaSmrg
28300a23bdaSmrg#define PKT3_CONTEXT_CONTROL                   0x28
28400a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
28500a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
28600a23bdaSmrg#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
28700a23bdaSmrg
28800a23bdaSmrg#define PKT3_CLEAR_STATE                       0x12
28900a23bdaSmrg
29000a23bdaSmrg#define PKT3_SET_SH_REG                        0x76
29100a23bdaSmrg#define		PACKET3_SET_SH_REG_START			0x00002c00
29200a23bdaSmrg
29300a23bdaSmrg#define	PACKET3_DISPATCH_DIRECT				0x15
2945324fb0dSmrg#define PACKET3_EVENT_WRITE				0x46
2955324fb0dSmrg#define PACKET3_ACQUIRE_MEM				0x58
2965324fb0dSmrg#define PACKET3_SET_CONTEXT_REG				0x69
2975324fb0dSmrg#define PACKET3_SET_UCONFIG_REG				0x79
2985324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO				0x2D
29900a23bdaSmrg/* gfx 8 */
30000a23bdaSmrg#define mmCOMPUTE_PGM_LO                                                        0x2e0c
30100a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
30200a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
30300a23bdaSmrg#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
30400a23bdaSmrg#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
30500a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
30600a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
30700a23bdaSmrg
30800a23bdaSmrg
30900a23bdaSmrg
31000a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
31100a23bdaSmrg		      ((num & 0x0000ff00) << 8) | \
31200a23bdaSmrg		      ((num & 0x00ff0000) >> 8) | \
31300a23bdaSmrg		      ((num & 0x000000ff) << 24))
31400a23bdaSmrg
31500a23bdaSmrg
31600a23bdaSmrg/* Shader code
31700a23bdaSmrg * void main()
31800a23bdaSmrg{
31900a23bdaSmrg
32000a23bdaSmrg	float x = some_input;
32100a23bdaSmrg		for (unsigned i = 0; i < 1000000; i++)
32200a23bdaSmrg  	x = sin(x);
32300a23bdaSmrg
32400a23bdaSmrg	u[0] = 42u;
32500a23bdaSmrg}
32600a23bdaSmrg*/
32700a23bdaSmrg
32800a23bdaSmrgstatic  uint32_t shader_bin[] = {
32900a23bdaSmrg	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
33000a23bdaSmrg	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
33100a23bdaSmrg	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
33200a23bdaSmrg	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
33300a23bdaSmrg};
33400a23bdaSmrg
33500a23bdaSmrg#define CODE_OFFSET 512
33600a23bdaSmrg#define DATA_OFFSET 1024
33700a23bdaSmrg
3385324fb0dSmrgenum cs_type {
3395324fb0dSmrg	CS_BUFFERCLEAR,
3409bd392adSmrg	CS_BUFFERCOPY,
3419bd392adSmrg	CS_HANG,
3429bd392adSmrg	CS_HANG_SLOW
3435324fb0dSmrg};
3445324fb0dSmrg
3455324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = {
3464babd585Smrg    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
3474babd585Smrg    0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
3484babd585Smrg    0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
3494babd585Smrg    0xbf810000
3505324fb0dSmrg};
3515324fb0dSmrg
3525324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
3535324fb0dSmrg	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
3545324fb0dSmrg	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
3555324fb0dSmrg	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
3565324fb0dSmrg	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
3575324fb0dSmrg	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
3585324fb0dSmrg};
3595324fb0dSmrg
3605324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
3615324fb0dSmrg
3625324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = {
3634babd585Smrg    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
3644babd585Smrg    0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
3654babd585Smrg    0xe01c2000, 0x80010200, 0xbf810000
3665324fb0dSmrg};
3675324fb0dSmrg
3685324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = {
3695324fb0dSmrg	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
3705324fb0dSmrg	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
3715324fb0dSmrg	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
3725324fb0dSmrg	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
3735324fb0dSmrg	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
3745324fb0dSmrg	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
3755324fb0dSmrg	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
3765324fb0dSmrg	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
37788f8a8d2Smrg	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
3785324fb0dSmrg	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
3795324fb0dSmrg	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
3805324fb0dSmrg	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
3815324fb0dSmrg	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
3825324fb0dSmrg	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
3835324fb0dSmrg	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
38488f8a8d2Smrg	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
38588f8a8d2Smrg	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
3865324fb0dSmrg	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
3875324fb0dSmrg	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
3885324fb0dSmrg	0xc0017900, 0x24b, 0x0
3895324fb0dSmrg};
3905324fb0dSmrg
3915324fb0dSmrgenum ps_type {
3925324fb0dSmrg	PS_CONST,
3939bd392adSmrg	PS_TEX,
3949bd392adSmrg	PS_HANG,
3959bd392adSmrg	PS_HANG_SLOW
3965324fb0dSmrg};
3975324fb0dSmrg
3985324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = {
3995324fb0dSmrg    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
4005324fb0dSmrg    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
4015324fb0dSmrg    0xC4001C0F, 0x00000100, 0xBF810000
4025324fb0dSmrg};
4035324fb0dSmrg
4045324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
4055324fb0dSmrg
4065324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
4075324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
4085324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
4095324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
4105324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
4115324fb0dSmrg     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
4125324fb0dSmrg     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
4135324fb0dSmrg     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
4145324fb0dSmrg     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
4155324fb0dSmrg     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
4165324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
4175324fb0dSmrg    }
4185324fb0dSmrg};
4195324fb0dSmrg
4205324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
4215324fb0dSmrg    0x00000004
4225324fb0dSmrg};
4235324fb0dSmrg
4245324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2;
4255324fb0dSmrg
4265324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = {
4275324fb0dSmrg    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
4285324fb0dSmrg    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
4295324fb0dSmrg};
4305324fb0dSmrg
4315324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7;
4325324fb0dSmrg
4335324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = {
4345324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
4355324fb0dSmrg    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
4365324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
4375324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
4385324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
4395324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
4405324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
4415324fb0dSmrg};
4425324fb0dSmrg
4435324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = {
4445324fb0dSmrg    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
4455324fb0dSmrg    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
4465324fb0dSmrg    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
4475324fb0dSmrg    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
4485324fb0dSmrg    0x00000100, 0xBF810000
4495324fb0dSmrg};
4505324fb0dSmrg
4515324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
4525324fb0dSmrg    0x0000000B
4535324fb0dSmrg};
4545324fb0dSmrg
4555324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
4565324fb0dSmrg
4575324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
4585324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
4595324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
4605324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
4615324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
4625324fb0dSmrg     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4635324fb0dSmrg     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4645324fb0dSmrg     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4655324fb0dSmrg     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4665324fb0dSmrg     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4675324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
4685324fb0dSmrg    }
4695324fb0dSmrg};
4705324fb0dSmrg
4715324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = {
4725324fb0dSmrg    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
4735324fb0dSmrg    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
4745324fb0dSmrg};
4755324fb0dSmrg
4765324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = {
4775324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
4785324fb0dSmrg    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
4795324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
4805324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
4815324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
4825324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
4835324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
4845324fb0dSmrg};
4855324fb0dSmrg
4865324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
4875324fb0dSmrg    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
4885324fb0dSmrg    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
4895324fb0dSmrg    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
4905324fb0dSmrg    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
4915324fb0dSmrg    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
4925324fb0dSmrg    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
4935324fb0dSmrg    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
4945324fb0dSmrg    0xC400020F, 0x05060403, 0xBF810000
4955324fb0dSmrg};
4965324fb0dSmrg
4975324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = {
4985324fb0dSmrg	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
4995324fb0dSmrg	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
5005324fb0dSmrg	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
5019bd392adSmrg	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
5025324fb0dSmrg	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
5035324fb0dSmrg	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
5045324fb0dSmrg	0xc0026900, 0x292, 0x20, 0x60201b8,
5055324fb0dSmrg	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
5065324fb0dSmrg};
50700a23bdaSmrg
5089bd392adSmrgunsigned int memcpy_ps_hang[] = {
5099bd392adSmrg        0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
5109bd392adSmrg        0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
5119bd392adSmrg        0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
5129bd392adSmrg        0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
5139bd392adSmrg        0xF800180F, 0x03020100, 0xBF810000
5149bd392adSmrg};
5159bd392adSmrg
5169bd392adSmrgstruct amdgpu_test_shader {
5179bd392adSmrg	uint32_t *shader;
5189bd392adSmrg	uint32_t header_length;
5199bd392adSmrg	uint32_t body_length;
5209bd392adSmrg	uint32_t foot_length;
5219bd392adSmrg};
5229bd392adSmrg
5239bd392adSmrgunsigned int memcpy_cs_hang_slow_ai_codes[] = {
5249bd392adSmrg    0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
5259bd392adSmrg    0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
5269bd392adSmrg};
5279bd392adSmrg
5289bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
5299bd392adSmrg        memcpy_cs_hang_slow_ai_codes,
5309bd392adSmrg        4,
5319bd392adSmrg        3,
5329bd392adSmrg        1
5339bd392adSmrg};
5349bd392adSmrg
5359bd392adSmrgunsigned int memcpy_cs_hang_slow_rv_codes[] = {
5369bd392adSmrg    0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
5379bd392adSmrg    0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
5389bd392adSmrg};
5399bd392adSmrg
5409bd392adSmrgstruct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
5419bd392adSmrg        memcpy_cs_hang_slow_rv_codes,
5429bd392adSmrg        4,
5439bd392adSmrg        3,
5449bd392adSmrg        1
5459bd392adSmrg};
5469bd392adSmrg
5479bd392adSmrgunsigned int memcpy_ps_hang_slow_ai_codes[] = {
5489bd392adSmrg        0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
5499bd392adSmrg        0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
5509bd392adSmrg        0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
5519bd392adSmrg        0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
5529bd392adSmrg        0x03020100, 0xbf810000
5539bd392adSmrg};
5549bd392adSmrg
5559bd392adSmrgstruct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
5569bd392adSmrg        memcpy_ps_hang_slow_ai_codes,
5579bd392adSmrg        7,
5589bd392adSmrg        2,
5599bd392adSmrg        9
5609bd392adSmrg};
5619bd392adSmrg
5627cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
5637cdc0497Smrg			unsigned alignment, unsigned heap, uint64_t alloc_flags,
5647cdc0497Smrg			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
5657cdc0497Smrg			uint64_t *mc_address,
5667cdc0497Smrg			amdgpu_va_handle *va_handle)
5677cdc0497Smrg{
5687cdc0497Smrg	struct amdgpu_bo_alloc_request request = {};
5697cdc0497Smrg	amdgpu_bo_handle buf_handle;
5707cdc0497Smrg	amdgpu_va_handle handle;
5717cdc0497Smrg	uint64_t vmc_addr;
5727cdc0497Smrg	int r;
5737cdc0497Smrg
5747cdc0497Smrg	request.alloc_size = size;
5757cdc0497Smrg	request.phys_alignment = alignment;
5767cdc0497Smrg	request.preferred_heap = heap;
5777cdc0497Smrg	request.flags = alloc_flags;
5787cdc0497Smrg
5797cdc0497Smrg	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
5807cdc0497Smrg	if (r)
5817cdc0497Smrg		return r;
5827cdc0497Smrg
5837cdc0497Smrg	r = amdgpu_va_range_alloc(dev,
5847cdc0497Smrg				  amdgpu_gpu_va_range_general,
5857cdc0497Smrg				  size, alignment, 0, &vmc_addr,
5867cdc0497Smrg				  &handle, 0);
5877cdc0497Smrg	if (r)
5887cdc0497Smrg		goto error_va_alloc;
5897cdc0497Smrg
5907cdc0497Smrg	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
5917cdc0497Smrg				   AMDGPU_VM_PAGE_READABLE |
5927cdc0497Smrg				   AMDGPU_VM_PAGE_WRITEABLE |
5937cdc0497Smrg				   AMDGPU_VM_PAGE_EXECUTABLE |
5947cdc0497Smrg				   mapping_flags,
5957cdc0497Smrg				   AMDGPU_VA_OP_MAP);
5967cdc0497Smrg	if (r)
5977cdc0497Smrg		goto error_va_map;
5987cdc0497Smrg
5997cdc0497Smrg	r = amdgpu_bo_cpu_map(buf_handle, cpu);
6007cdc0497Smrg	if (r)
6017cdc0497Smrg		goto error_cpu_map;
6027cdc0497Smrg
6037cdc0497Smrg	*bo = buf_handle;
6047cdc0497Smrg	*mc_address = vmc_addr;
6057cdc0497Smrg	*va_handle = handle;
6067cdc0497Smrg
6077cdc0497Smrg	return 0;
6087cdc0497Smrg
6097cdc0497Smrg error_cpu_map:
6107cdc0497Smrg	amdgpu_bo_cpu_unmap(buf_handle);
6117cdc0497Smrg
6127cdc0497Smrg error_va_map:
6137cdc0497Smrg	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
6147cdc0497Smrg
6157cdc0497Smrg error_va_alloc:
6167cdc0497Smrg	amdgpu_bo_free(buf_handle);
6177cdc0497Smrg	return r;
6187cdc0497Smrg}
6197cdc0497Smrg
6207cdc0497Smrg
6217cdc0497Smrg
62241687f09SmrgCU_BOOL suite_basic_tests_enable(void)
62341687f09Smrg{
62441687f09Smrg
62541687f09Smrg	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
62641687f09Smrg					     &minor_version, &device_handle))
62741687f09Smrg		return CU_FALSE;
62841687f09Smrg
6294babd585Smrg
6304babd585Smrg	family_id = device_handle->info.family_id;
6314babd585Smrg	chip_id = device_handle->info.chip_external_rev;
6324babd585Smrg	chip_rev = device_handle->info.chip_rev;
63341687f09Smrg
63441687f09Smrg	if (amdgpu_device_deinitialize(device_handle))
63541687f09Smrg		return CU_FALSE;
63641687f09Smrg
6374babd585Smrg	/* disable gfx engine basic test cases for some asics have no CPG */
6384babd585Smrg	if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
63941687f09Smrg		if (amdgpu_set_test_active("Basic Tests",
64041687f09Smrg					"Command submission Test (GFX)",
64141687f09Smrg					CU_FALSE))
64241687f09Smrg			fprintf(stderr, "test deactivation failed - %s\n",
64341687f09Smrg				CU_get_error_msg());
64441687f09Smrg
64541687f09Smrg		if (amdgpu_set_test_active("Basic Tests",
64641687f09Smrg					"Command submission Test (Multi-Fence)",
64741687f09Smrg					CU_FALSE))
64841687f09Smrg			fprintf(stderr, "test deactivation failed - %s\n",
64941687f09Smrg				CU_get_error_msg());
65041687f09Smrg
65141687f09Smrg		if (amdgpu_set_test_active("Basic Tests",
65241687f09Smrg					"Sync dependency Test",
65341687f09Smrg					CU_FALSE))
65441687f09Smrg			fprintf(stderr, "test deactivation failed - %s\n",
65541687f09Smrg				CU_get_error_msg());
65641687f09Smrg	}
65741687f09Smrg
65841687f09Smrg	return CU_TRUE;
65941687f09Smrg}
66041687f09Smrg
6613f012e29Smrgint suite_basic_tests_init(void)
6623f012e29Smrg{
663d8807b2fSmrg	struct amdgpu_gpu_info gpu_info = {0};
6643f012e29Smrg	int r;
6653f012e29Smrg
6663f012e29Smrg	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
6673f012e29Smrg				   &minor_version, &device_handle);
6683f012e29Smrg
669d8807b2fSmrg	if (r) {
670037b3c26Smrg		if ((r == -EACCES) && (errno == EACCES))
671037b3c26Smrg			printf("\n\nError:%s. "
672037b3c26Smrg				"Hint:Try to run this test program as root.",
673037b3c26Smrg				strerror(errno));
6743f012e29Smrg		return CUE_SINIT_FAILED;
675037b3c26Smrg	}
676d8807b2fSmrg
677d8807b2fSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
678d8807b2fSmrg	if (r)
679d8807b2fSmrg		return CUE_SINIT_FAILED;
680d8807b2fSmrg
681d8807b2fSmrg	family_id = gpu_info.family_id;
682d8807b2fSmrg
683d8807b2fSmrg	return CUE_SUCCESS;
6843f012e29Smrg}
6853f012e29Smrg
6863f012e29Smrgint suite_basic_tests_clean(void)
6873f012e29Smrg{
6883f012e29Smrg	int r = amdgpu_device_deinitialize(device_handle);
6893f012e29Smrg
6903f012e29Smrg	if (r == 0)
6913f012e29Smrg		return CUE_SUCCESS;
6923f012e29Smrg	else
6933f012e29Smrg		return CUE_SCLEAN_FAILED;
6943f012e29Smrg}
6953f012e29Smrg
6963f012e29Smrgstatic void amdgpu_query_info_test(void)
6973f012e29Smrg{
6983f012e29Smrg	struct amdgpu_gpu_info gpu_info = {0};
6993f012e29Smrg	uint32_t version, feature;
7003f012e29Smrg	int r;
7013f012e29Smrg
7023f012e29Smrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
7033f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7043f012e29Smrg
7053f012e29Smrg	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
7063f012e29Smrg					  0, &version, &feature);
7073f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7083f012e29Smrg}
7093f012e29Smrg
7103f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void)
7113f012e29Smrg{
7123f012e29Smrg	amdgpu_context_handle context_handle;
7133f012e29Smrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
7143f012e29Smrg	void *ib_result_cpu, *ib_result_ce_cpu;
7153f012e29Smrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
7163f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
7173f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
7183f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
7193f012e29Smrg	uint32_t *ptr;
7203f012e29Smrg	uint32_t expired;
7213f012e29Smrg	amdgpu_bo_list_handle bo_list;
7223f012e29Smrg	amdgpu_va_handle va_handle, va_handle_ce;
723d8807b2fSmrg	int r, i = 0;
7243f012e29Smrg
7253f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
7263f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7273f012e29Smrg
7283f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
7293f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
7303f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
7313f012e29Smrg				    &ib_result_mc_address, &va_handle);
7323f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7333f012e29Smrg
7343f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
7353f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
7363f012e29Smrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
7373f012e29Smrg				    &ib_result_ce_mc_address, &va_handle_ce);
7383f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7393f012e29Smrg
7403f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
7413f012e29Smrg			       ib_result_ce_handle, &bo_list);
7423f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7433f012e29Smrg
7443f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
7453f012e29Smrg
7463f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
7473f012e29Smrg	ptr = ib_result_ce_cpu;
748d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
749d8807b2fSmrg		ptr[i++] = 0xc0008900;
750d8807b2fSmrg		ptr[i++] = 0;
751d8807b2fSmrg	}
752d8807b2fSmrg	ptr[i++] = 0xc0008400;
753d8807b2fSmrg	ptr[i++] = 1;
7543f012e29Smrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
755d8807b2fSmrg	ib_info[0].size = i;
7563f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
7573f012e29Smrg
7583f012e29Smrg	/* IT_WAIT_ON_CE_COUNTER */
7593f012e29Smrg	ptr = ib_result_cpu;
7603f012e29Smrg	ptr[0] = 0xc0008600;
7613f012e29Smrg	ptr[1] = 0x00000001;
7623f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address;
7633f012e29Smrg	ib_info[1].size = 2;
7643f012e29Smrg
7653f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
7663f012e29Smrg	ibs_request.number_of_ibs = 2;
7673f012e29Smrg	ibs_request.ibs = ib_info;
7683f012e29Smrg	ibs_request.resources = bo_list;
7693f012e29Smrg	ibs_request.fence_info.handle = NULL;
7703f012e29Smrg
7713f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
7723f012e29Smrg
7733f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7743f012e29Smrg
7753f012e29Smrg	fence_status.context = context_handle;
7763f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
7773f012e29Smrg	fence_status.ip_instance = 0;
7783f012e29Smrg	fence_status.fence = ibs_request.seq_no;
7793f012e29Smrg
7803f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
7813f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
7823f012e29Smrg					 0, &expired);
7833f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7843f012e29Smrg
7853f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
7863f012e29Smrg				     ib_result_mc_address, 4096);
7873f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7883f012e29Smrg
7893f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
7903f012e29Smrg				     ib_result_ce_mc_address, 4096);
7913f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7923f012e29Smrg
7933f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
7943f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7953f012e29Smrg
7963f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
7973f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7983f012e29Smrg
7993f012e29Smrg}
8003f012e29Smrg
8013f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void)
8023f012e29Smrg{
8033f012e29Smrg	amdgpu_context_handle context_handle;
8043f012e29Smrg	amdgpu_bo_handle ib_result_handle;
8053f012e29Smrg	void *ib_result_cpu;
8063f012e29Smrg	uint64_t ib_result_mc_address;
8073f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
8083f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
8093f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
8103f012e29Smrg	uint32_t *ptr;
8113f012e29Smrg	uint32_t expired;
8123f012e29Smrg	amdgpu_bo_list_handle bo_list;
8133f012e29Smrg	amdgpu_va_handle va_handle;
814d8807b2fSmrg	int r, i = 0;
8153f012e29Smrg
8163f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
8173f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8183f012e29Smrg
8193f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
8203f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
8213f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
8223f012e29Smrg				    &ib_result_mc_address, &va_handle);
8233f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8243f012e29Smrg
8253f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
8263f012e29Smrg			       &bo_list);
8273f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8283f012e29Smrg
8293f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
8303f012e29Smrg
8313f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
8323f012e29Smrg	ptr = ib_result_cpu;
833d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
834d8807b2fSmrg		ptr[i++] = 0xc0008900;
835d8807b2fSmrg		ptr[i++] = 0;
836d8807b2fSmrg	}
837d8807b2fSmrg	ptr[i++] = 0xc0008400;
838d8807b2fSmrg	ptr[i++] = 1;
8393f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address;
840d8807b2fSmrg	ib_info[0].size = i;
8413f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
8423f012e29Smrg
8433f012e29Smrg	ptr = (uint32_t *)ib_result_cpu + 4;
8443f012e29Smrg	ptr[0] = 0xc0008600;
8453f012e29Smrg	ptr[1] = 0x00000001;
8463f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
8473f012e29Smrg	ib_info[1].size = 2;
8483f012e29Smrg
8493f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
8503f012e29Smrg	ibs_request.number_of_ibs = 2;
8513f012e29Smrg	ibs_request.ibs = ib_info;
8523f012e29Smrg	ibs_request.resources = bo_list;
8533f012e29Smrg	ibs_request.fence_info.handle = NULL;
8543f012e29Smrg
8553f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
8563f012e29Smrg
8573f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8583f012e29Smrg
8593f012e29Smrg	fence_status.context = context_handle;
8603f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
8613f012e29Smrg	fence_status.ip_instance = 0;
8623f012e29Smrg	fence_status.fence = ibs_request.seq_no;
8633f012e29Smrg
8643f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
8653f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
8663f012e29Smrg					 0, &expired);
8673f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8683f012e29Smrg
8693f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
8703f012e29Smrg				     ib_result_mc_address, 4096);
8713f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8723f012e29Smrg
8733f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
8743f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8753f012e29Smrg
8763f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
8773f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
8783f012e29Smrg}
8793f012e29Smrg
8803f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void)
8813f012e29Smrg{
8823f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
8833f012e29Smrg}
8843f012e29Smrg
8853f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void)
8863f012e29Smrg{
8873f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
8883f012e29Smrg}
8893f012e29Smrg
8903f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void)
8913f012e29Smrg{
8923f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
8933f012e29Smrg}
8943f012e29Smrg
89500a23bdaSmrgstatic void amdgpu_bo_eviction_test(void)
89600a23bdaSmrg{
89700a23bdaSmrg	const int sdma_write_length = 1024;
89800a23bdaSmrg	const int pm4_dw = 256;
89900a23bdaSmrg	amdgpu_context_handle context_handle;
90000a23bdaSmrg	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
90100a23bdaSmrg	amdgpu_bo_handle *resources;
90200a23bdaSmrg	uint32_t *pm4;
90300a23bdaSmrg	struct amdgpu_cs_ib_info *ib_info;
90400a23bdaSmrg	struct amdgpu_cs_request *ibs_request;
90500a23bdaSmrg	uint64_t bo1_mc, bo2_mc;
90600a23bdaSmrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
90700a23bdaSmrg	int i, j, r, loop1, loop2;
90800a23bdaSmrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
90900a23bdaSmrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
91000a23bdaSmrg	struct amdgpu_heap_info vram_info, gtt_info;
91100a23bdaSmrg
91200a23bdaSmrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
91300a23bdaSmrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
91400a23bdaSmrg
91500a23bdaSmrg	ib_info = calloc(1, sizeof(*ib_info));
91600a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
91700a23bdaSmrg
91800a23bdaSmrg	ibs_request = calloc(1, sizeof(*ibs_request));
91900a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
92000a23bdaSmrg
92100a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
92200a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
92300a23bdaSmrg
92400a23bdaSmrg	/* prepare resource */
92500a23bdaSmrg	resources = calloc(4, sizeof(amdgpu_bo_handle));
92600a23bdaSmrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
92700a23bdaSmrg
92800a23bdaSmrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
92900a23bdaSmrg				   0, &vram_info);
93000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
93100a23bdaSmrg
93200a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
93300a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
93400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
93500a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
93600a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
93700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
93800a23bdaSmrg
9394babd585Smrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
9404babd585Smrg				   0, &gtt_info);
9414babd585Smrg	CU_ASSERT_EQUAL(r, 0);
9424babd585Smrg
94300a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
94400a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
94500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
94600a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
94700a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
94800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
94900a23bdaSmrg
95000a23bdaSmrg
95100a23bdaSmrg
95200a23bdaSmrg	loop1 = loop2 = 0;
95300a23bdaSmrg	/* run 9 circle to test all mapping combination */
95400a23bdaSmrg	while(loop1 < 2) {
95500a23bdaSmrg		while(loop2 < 2) {
95600a23bdaSmrg			/* allocate UC bo1for sDMA use */
95700a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
95800a23bdaSmrg						    sdma_write_length, 4096,
95900a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
96000a23bdaSmrg						    gtt_flags[loop1], &bo1,
96100a23bdaSmrg						    (void**)&bo1_cpu, &bo1_mc,
96200a23bdaSmrg						    &bo1_va_handle);
96300a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
96400a23bdaSmrg
96500a23bdaSmrg			/* set bo1 */
96600a23bdaSmrg			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
96700a23bdaSmrg
96800a23bdaSmrg			/* allocate UC bo2 for sDMA use */
96900a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
97000a23bdaSmrg						    sdma_write_length, 4096,
97100a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
97200a23bdaSmrg						    gtt_flags[loop2], &bo2,
97300a23bdaSmrg						    (void**)&bo2_cpu, &bo2_mc,
97400a23bdaSmrg						    &bo2_va_handle);
97500a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
97600a23bdaSmrg
97700a23bdaSmrg			/* clear bo2 */
97800a23bdaSmrg			memset((void*)bo2_cpu, 0, sdma_write_length);
97900a23bdaSmrg
98000a23bdaSmrg			resources[0] = bo1;
98100a23bdaSmrg			resources[1] = bo2;
98200a23bdaSmrg			resources[2] = vram_max[loop2];
98300a23bdaSmrg			resources[3] = gtt_max[loop2];
98400a23bdaSmrg
98500a23bdaSmrg			/* fulfill PM4: test DMA copy linear */
98600a23bdaSmrg			i = j = 0;
98700a23bdaSmrg			if (family_id == AMDGPU_FAMILY_SI) {
98800a23bdaSmrg				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
98900a23bdaSmrg							  sdma_write_length);
99000a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
99100a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
99200a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
99300a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
99400a23bdaSmrg			} else {
99500a23bdaSmrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
99600a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
99700a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
99800a23bdaSmrg				else
99900a23bdaSmrg					pm4[i++] = sdma_write_length;
100000a23bdaSmrg				pm4[i++] = 0;
100100a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
100200a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
100300a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
100400a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
100500a23bdaSmrg			}
100600a23bdaSmrg
100700a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
100800a23bdaSmrg						   AMDGPU_HW_IP_DMA, 0,
100900a23bdaSmrg						   i, pm4,
101000a23bdaSmrg						   4, resources,
101100a23bdaSmrg						   ib_info, ibs_request);
101200a23bdaSmrg
101300a23bdaSmrg			/* verify if SDMA test result meets with expected */
101400a23bdaSmrg			i = 0;
101500a23bdaSmrg			while(i < sdma_write_length) {
101600a23bdaSmrg				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
101700a23bdaSmrg			}
101800a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
101900a23bdaSmrg						     sdma_write_length);
102000a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
102100a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
102200a23bdaSmrg						     sdma_write_length);
102300a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
102400a23bdaSmrg			loop2++;
102500a23bdaSmrg		}
102600a23bdaSmrg		loop2 = 0;
102700a23bdaSmrg		loop1++;
102800a23bdaSmrg	}
102900a23bdaSmrg	amdgpu_bo_free(vram_max[0]);
103000a23bdaSmrg	amdgpu_bo_free(vram_max[1]);
103100a23bdaSmrg	amdgpu_bo_free(gtt_max[0]);
103200a23bdaSmrg	amdgpu_bo_free(gtt_max[1]);
103300a23bdaSmrg	/* clean resources */
103400a23bdaSmrg	free(resources);
103500a23bdaSmrg	free(ibs_request);
103600a23bdaSmrg	free(ib_info);
103700a23bdaSmrg	free(pm4);
103800a23bdaSmrg
103900a23bdaSmrg	/* end of test */
104000a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle);
104100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
104200a23bdaSmrg}
104300a23bdaSmrg
104400a23bdaSmrg
10453f012e29Smrgstatic void amdgpu_command_submission_gfx(void)
10463f012e29Smrg{
10473f012e29Smrg	/* write data using the CP */
10483f012e29Smrg	amdgpu_command_submission_gfx_cp_write_data();
10493f012e29Smrg	/* const fill using the CP */
10503f012e29Smrg	amdgpu_command_submission_gfx_cp_const_fill();
10513f012e29Smrg	/* copy data using the CP */
10523f012e29Smrg	amdgpu_command_submission_gfx_cp_copy_data();
10533f012e29Smrg	/* separate IB buffers for multi-IB submission */
10543f012e29Smrg	amdgpu_command_submission_gfx_separate_ibs();
10553f012e29Smrg	/* shared IB buffer for multi-IB submission */
10563f012e29Smrg	amdgpu_command_submission_gfx_shared_ib();
10573f012e29Smrg}
10583f012e29Smrg
10593f012e29Smrgstatic void amdgpu_semaphore_test(void)
10603f012e29Smrg{
10613f012e29Smrg	amdgpu_context_handle context_handle[2];
10623f012e29Smrg	amdgpu_semaphore_handle sem;
10633f012e29Smrg	amdgpu_bo_handle ib_result_handle[2];
10643f012e29Smrg	void *ib_result_cpu[2];
10653f012e29Smrg	uint64_t ib_result_mc_address[2];
10663f012e29Smrg	struct amdgpu_cs_request ibs_request[2] = {0};
10673f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2] = {0};
10683f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
10693f012e29Smrg	uint32_t *ptr;
10703f012e29Smrg	uint32_t expired;
1071d8807b2fSmrg	uint32_t sdma_nop, gfx_nop;
10723f012e29Smrg	amdgpu_bo_list_handle bo_list[2];
10733f012e29Smrg	amdgpu_va_handle va_handle[2];
10743f012e29Smrg	int r, i;
10754babd585Smrg	struct amdgpu_gpu_info gpu_info = {0};
10764babd585Smrg	unsigned gc_ip_type;
10774babd585Smrg
10784babd585Smrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
10794babd585Smrg	CU_ASSERT_EQUAL(r, 0);
10804babd585Smrg
10814babd585Smrg	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
10824babd585Smrg			AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
10833f012e29Smrg
1084d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI) {
1085d8807b2fSmrg		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1086d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP_SI;
1087d8807b2fSmrg	} else {
1088d8807b2fSmrg		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1089d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP;
1090d8807b2fSmrg	}
1091d8807b2fSmrg
10923f012e29Smrg	r = amdgpu_cs_create_semaphore(&sem);
10933f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10943f012e29Smrg	for (i = 0; i < 2; i++) {
10953f012e29Smrg		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
10963f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10973f012e29Smrg
10983f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
10993f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
11003f012e29Smrg					    &ib_result_handle[i], &ib_result_cpu[i],
11013f012e29Smrg					    &ib_result_mc_address[i], &va_handle[i]);
11023f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11033f012e29Smrg
11043f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
11053f012e29Smrg				       NULL, &bo_list[i]);
11063f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11073f012e29Smrg	}
11083f012e29Smrg
11093f012e29Smrg	/* 1. same context different engine */
11103f012e29Smrg	ptr = ib_result_cpu[0];
1111d8807b2fSmrg	ptr[0] = sdma_nop;
11123f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
11133f012e29Smrg	ib_info[0].size = 1;
11143f012e29Smrg
11153f012e29Smrg	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
11163f012e29Smrg	ibs_request[0].number_of_ibs = 1;
11173f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
11183f012e29Smrg	ibs_request[0].resources = bo_list[0];
11193f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
11203f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
11213f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11223f012e29Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
11233f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11243f012e29Smrg
11254babd585Smrg	r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
11263f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11273f012e29Smrg	ptr = ib_result_cpu[1];
1128d8807b2fSmrg	ptr[0] = gfx_nop;
11293f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
11303f012e29Smrg	ib_info[1].size = 1;
11313f012e29Smrg
11324babd585Smrg	ibs_request[1].ip_type = gc_ip_type;
11333f012e29Smrg	ibs_request[1].number_of_ibs = 1;
11343f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
11353f012e29Smrg	ibs_request[1].resources = bo_list[1];
11363f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
11373f012e29Smrg
11383f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
11393f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11403f012e29Smrg
11413f012e29Smrg	fence_status.context = context_handle[0];
11424babd585Smrg	fence_status.ip_type = gc_ip_type;
11433f012e29Smrg	fence_status.ip_instance = 0;
11443f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
11453f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
11463f012e29Smrg					 500000000, 0, &expired);
11473f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11483f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
11493f012e29Smrg
11503f012e29Smrg	/* 2. same engine different context */
11513f012e29Smrg	ptr = ib_result_cpu[0];
1152d8807b2fSmrg	ptr[0] = gfx_nop;
11533f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
11543f012e29Smrg	ib_info[0].size = 1;
11553f012e29Smrg
11564babd585Smrg	ibs_request[0].ip_type = gc_ip_type;
11573f012e29Smrg	ibs_request[0].number_of_ibs = 1;
11583f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
11593f012e29Smrg	ibs_request[0].resources = bo_list[0];
11603f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
11613f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
11623f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11634babd585Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
11643f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11653f012e29Smrg
11664babd585Smrg	r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
11673f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11683f012e29Smrg	ptr = ib_result_cpu[1];
1169d8807b2fSmrg	ptr[0] = gfx_nop;
11703f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
11713f012e29Smrg	ib_info[1].size = 1;
11723f012e29Smrg
11734babd585Smrg	ibs_request[1].ip_type = gc_ip_type;
11743f012e29Smrg	ibs_request[1].number_of_ibs = 1;
11753f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
11763f012e29Smrg	ibs_request[1].resources = bo_list[1];
11773f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
11783f012e29Smrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
11793f012e29Smrg
11803f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11813f012e29Smrg
11823f012e29Smrg	fence_status.context = context_handle[1];
11834babd585Smrg	fence_status.ip_type = gc_ip_type;
11843f012e29Smrg	fence_status.ip_instance = 0;
11853f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
11863f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
11873f012e29Smrg					 500000000, 0, &expired);
11883f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11893f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
1190d8807b2fSmrg
11913f012e29Smrg	for (i = 0; i < 2; i++) {
11923f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
11933f012e29Smrg					     ib_result_mc_address[i], 4096);
11943f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11953f012e29Smrg
11963f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list[i]);
11973f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11983f012e29Smrg
11993f012e29Smrg		r = amdgpu_cs_ctx_free(context_handle[i]);
12003f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12013f012e29Smrg	}
12023f012e29Smrg
12033f012e29Smrg	r = amdgpu_cs_destroy_semaphore(sem);
12043f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12053f012e29Smrg}
12063f012e29Smrg
12073f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void)
12083f012e29Smrg{
12093f012e29Smrg	amdgpu_context_handle context_handle;
12103f012e29Smrg	amdgpu_bo_handle ib_result_handle;
12113f012e29Smrg	void *ib_result_cpu;
12123f012e29Smrg	uint64_t ib_result_mc_address;
12133f012e29Smrg	struct amdgpu_cs_request ibs_request;
12143f012e29Smrg	struct amdgpu_cs_ib_info ib_info;
12153f012e29Smrg	struct amdgpu_cs_fence fence_status;
12163f012e29Smrg	uint32_t *ptr;
12173f012e29Smrg	uint32_t expired;
121800a23bdaSmrg	int r, instance;
12193f012e29Smrg	amdgpu_bo_list_handle bo_list;
12203f012e29Smrg	amdgpu_va_handle va_handle;
1221d8807b2fSmrg	struct drm_amdgpu_info_hw_ip info;
1222d8807b2fSmrg
1223d8807b2fSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1224d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
12253f012e29Smrg
12263f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
12273f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12283f012e29Smrg
1229d8807b2fSmrg	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
12303f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
12313f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
12323f012e29Smrg					    &ib_result_handle, &ib_result_cpu,
12333f012e29Smrg					    &ib_result_mc_address, &va_handle);
12343f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12353f012e29Smrg
12363f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
12373f012e29Smrg				       &bo_list);
12383f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12393f012e29Smrg
12403f012e29Smrg		ptr = ib_result_cpu;
1241d8807b2fSmrg		memset(ptr, 0, 16);
1242d8807b2fSmrg		ptr[0]=PACKET3(PACKET3_NOP, 14);
12433f012e29Smrg
12443f012e29Smrg		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
12453f012e29Smrg		ib_info.ib_mc_address = ib_result_mc_address;
12463f012e29Smrg		ib_info.size = 16;
12473f012e29Smrg
12483f012e29Smrg		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
12493f012e29Smrg		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
12503f012e29Smrg		ibs_request.ring = instance;
12513f012e29Smrg		ibs_request.number_of_ibs = 1;
12523f012e29Smrg		ibs_request.ibs = &ib_info;
12533f012e29Smrg		ibs_request.resources = bo_list;
12543f012e29Smrg		ibs_request.fence_info.handle = NULL;
12553f012e29Smrg
12563f012e29Smrg		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
12573f012e29Smrg		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
12583f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12593f012e29Smrg
12603f012e29Smrg		fence_status.context = context_handle;
12613f012e29Smrg		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
12623f012e29Smrg		fence_status.ip_instance = 0;
12633f012e29Smrg		fence_status.ring = instance;
12643f012e29Smrg		fence_status.fence = ibs_request.seq_no;
12653f012e29Smrg
12663f012e29Smrg		r = amdgpu_cs_query_fence_status(&fence_status,
12673f012e29Smrg						 AMDGPU_TIMEOUT_INFINITE,
12683f012e29Smrg						 0, &expired);
12693f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12703f012e29Smrg
12713f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list);
12723f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12733f012e29Smrg
12743f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
12753f012e29Smrg					     ib_result_mc_address, 4096);
12763f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
12773f012e29Smrg	}
12783f012e29Smrg
12793f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
12803f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12813f012e29Smrg}
12823f012e29Smrg
12833f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void)
12843f012e29Smrg{
12853f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
12863f012e29Smrg}
12873f012e29Smrg
12883f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void)
12893f012e29Smrg{
12903f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
12913f012e29Smrg}
12923f012e29Smrg
12933f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void)
12943f012e29Smrg{
12953f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
12963f012e29Smrg}
12973f012e29Smrg
12983f012e29Smrgstatic void amdgpu_command_submission_compute(void)
12993f012e29Smrg{
13003f012e29Smrg	/* write data using the CP */
13013f012e29Smrg	amdgpu_command_submission_compute_cp_write_data();
13023f012e29Smrg	/* const fill using the CP */
13033f012e29Smrg	amdgpu_command_submission_compute_cp_const_fill();
13043f012e29Smrg	/* copy data using the CP */
13053f012e29Smrg	amdgpu_command_submission_compute_cp_copy_data();
13063f012e29Smrg	/* nop test */
13073f012e29Smrg	amdgpu_command_submission_compute_nop();
13083f012e29Smrg}
13093f012e29Smrg
13103f012e29Smrg/*
13113f012e29Smrg * caller need create/release:
13123f012e29Smrg * pm4_src, resources, ib_info, and ibs_request
13133f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished
13143f012e29Smrg */
131541687f09Smrgvoid
131641687f09Smrgamdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
131741687f09Smrg			       amdgpu_context_handle context_handle,
131841687f09Smrg			       unsigned ip_type, int instance, int pm4_dw,
131941687f09Smrg			       uint32_t *pm4_src, int res_cnt,
132041687f09Smrg			       amdgpu_bo_handle *resources,
132141687f09Smrg			       struct amdgpu_cs_ib_info *ib_info,
132241687f09Smrg			       struct amdgpu_cs_request *ibs_request,
132341687f09Smrg			       bool secure)
13243f012e29Smrg{
13253f012e29Smrg	int r;
13263f012e29Smrg	uint32_t expired;
13273f012e29Smrg	uint32_t *ring_ptr;
13283f012e29Smrg	amdgpu_bo_handle ib_result_handle;
13293f012e29Smrg	void *ib_result_cpu;
13303f012e29Smrg	uint64_t ib_result_mc_address;
13313f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
13323f012e29Smrg	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
13333f012e29Smrg	amdgpu_va_handle va_handle;
13343f012e29Smrg
13353f012e29Smrg	/* prepare CS */
13363f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
13373f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
13383f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
13393f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
13403f012e29Smrg	CU_ASSERT_TRUE(pm4_dw <= 1024);
13413f012e29Smrg
13423f012e29Smrg	/* allocate IB */
13433f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
13443f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
13453f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
13463f012e29Smrg				    &ib_result_mc_address, &va_handle);
13473f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13483f012e29Smrg
13493f012e29Smrg	/* copy PM4 packet to ring from caller */
13503f012e29Smrg	ring_ptr = ib_result_cpu;
13513f012e29Smrg	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
13523f012e29Smrg
13533f012e29Smrg	ib_info->ib_mc_address = ib_result_mc_address;
13543f012e29Smrg	ib_info->size = pm4_dw;
135541687f09Smrg	if (secure)
135641687f09Smrg		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
13573f012e29Smrg
13583f012e29Smrg	ibs_request->ip_type = ip_type;
13593f012e29Smrg	ibs_request->ring = instance;
13603f012e29Smrg	ibs_request->number_of_ibs = 1;
13613f012e29Smrg	ibs_request->ibs = ib_info;
13623f012e29Smrg	ibs_request->fence_info.handle = NULL;
13633f012e29Smrg
13643f012e29Smrg	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
13653f012e29Smrg	all_res[res_cnt] = ib_result_handle;
13663f012e29Smrg
13673f012e29Smrg	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
13683f012e29Smrg				  NULL, &ibs_request->resources);
13693f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13703f012e29Smrg
13713f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
13723f012e29Smrg
13733f012e29Smrg	/* submit CS */
13743f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
13753f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13763f012e29Smrg
13773f012e29Smrg	r = amdgpu_bo_list_destroy(ibs_request->resources);
13783f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13793f012e29Smrg
13803f012e29Smrg	fence_status.ip_type = ip_type;
13813f012e29Smrg	fence_status.ip_instance = 0;
13823f012e29Smrg	fence_status.ring = ibs_request->ring;
13833f012e29Smrg	fence_status.context = context_handle;
13843f012e29Smrg	fence_status.fence = ibs_request->seq_no;
13853f012e29Smrg
13863f012e29Smrg	/* wait for IB accomplished */
13873f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
13883f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
13893f012e29Smrg					 0, &expired);
13903f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13913f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
13923f012e29Smrg
13933f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
13943f012e29Smrg				     ib_result_mc_address, 4096);
13953f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13963f012e29Smrg}
13973f012e29Smrg
139841687f09Smrgstatic void
139941687f09Smrgamdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
140041687f09Smrg			   unsigned ip_type, int instance, int pm4_dw,
140141687f09Smrg			   uint32_t *pm4_src, int res_cnt,
140241687f09Smrg			   amdgpu_bo_handle *resources,
140341687f09Smrg			   struct amdgpu_cs_ib_info *ib_info,
140441687f09Smrg			   struct amdgpu_cs_request *ibs_request)
140541687f09Smrg{
140641687f09Smrg	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
140741687f09Smrg				       ip_type, instance, pm4_dw, pm4_src,
140841687f09Smrg				       res_cnt, resources, ib_info,
140941687f09Smrg				       ibs_request, false);
141041687f09Smrg}
141141687f09Smrg
141241687f09Smrgvoid
141341687f09Smrgamdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
141441687f09Smrg							  device, unsigned
141541687f09Smrg							  ip_type, bool secure)
14163f012e29Smrg{
14173f012e29Smrg	const int sdma_write_length = 128;
14183f012e29Smrg	const int pm4_dw = 256;
14193f012e29Smrg	amdgpu_context_handle context_handle;
14203f012e29Smrg	amdgpu_bo_handle bo;
14213f012e29Smrg	amdgpu_bo_handle *resources;
14223f012e29Smrg	uint32_t *pm4;
14233f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
14243f012e29Smrg	struct amdgpu_cs_request *ibs_request;
14253f012e29Smrg	uint64_t bo_mc;
14263f012e29Smrg	volatile uint32_t *bo_cpu;
142741687f09Smrg	uint32_t bo_cpu_origin;
142800a23bdaSmrg	int i, j, r, loop, ring_id;
14293f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
14303f012e29Smrg	amdgpu_va_handle va_handle;
143100a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
14323f012e29Smrg
14333f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
14343f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
14353f012e29Smrg
14363f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
14373f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
14383f012e29Smrg
14393f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
14403f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
14413f012e29Smrg
144241687f09Smrg	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
144300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
144400a23bdaSmrg
144541687f09Smrg	for (i = 0; secure && (i < 2); i++)
144641687f09Smrg		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
144741687f09Smrg
144841687f09Smrg	r = amdgpu_cs_ctx_create(device, &context_handle);
144941687f09Smrg
14503f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
14513f012e29Smrg
14523f012e29Smrg	/* prepare resource */
14533f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
14543f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
14553f012e29Smrg
145600a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
145700a23bdaSmrg		loop = 0;
145800a23bdaSmrg		while(loop < 2) {
145900a23bdaSmrg			/* allocate UC bo for sDMA use */
146041687f09Smrg			r = amdgpu_bo_alloc_and_map(device,
146100a23bdaSmrg						    sdma_write_length * sizeof(uint32_t),
146200a23bdaSmrg						    4096, AMDGPU_GEM_DOMAIN_GTT,
146300a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
146400a23bdaSmrg						    &bo_mc, &va_handle);
146500a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
14663f012e29Smrg
146700a23bdaSmrg			/* clear bo */
146800a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
14693f012e29Smrg
147000a23bdaSmrg			resources[0] = bo;
14713f012e29Smrg
147200a23bdaSmrg			/* fulfill PM4: test DMA write-linear */
147300a23bdaSmrg			i = j = 0;
147400a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
147500a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI)
147600a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
147700a23bdaSmrg								  sdma_write_length);
147800a23bdaSmrg				else
147900a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
148041687f09Smrg							       SDMA_WRITE_SUB_OPCODE_LINEAR,
148141687f09Smrg							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
148241687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
148300a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
148400a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
148500a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
148600a23bdaSmrg				else if (family_id != AMDGPU_FAMILY_SI)
148700a23bdaSmrg					pm4[i++] = sdma_write_length;
148800a23bdaSmrg				while(j++ < sdma_write_length)
148900a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
149000a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
149100a23bdaSmrg				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
149200a23bdaSmrg				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
149300a23bdaSmrg				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
149400a23bdaSmrg				pm4[i++] = 0xfffffffc & bo_mc;
149500a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
149600a23bdaSmrg				while(j++ < sdma_write_length)
149700a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
149800a23bdaSmrg			}
14993f012e29Smrg
150041687f09Smrg			amdgpu_test_exec_cs_helper_raw(device, context_handle,
150141687f09Smrg						       ip_type, ring_id, i, pm4,
150241687f09Smrg						       1, resources, ib_info,
150341687f09Smrg						       ibs_request, secure);
15043f012e29Smrg
150500a23bdaSmrg			/* verify if SDMA test result meets with expected */
150600a23bdaSmrg			i = 0;
150741687f09Smrg			if (!secure) {
150841687f09Smrg				while(i < sdma_write_length) {
150941687f09Smrg					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
151041687f09Smrg				}
151141687f09Smrg			} else if (ip_type == AMDGPU_HW_IP_GFX) {
151241687f09Smrg				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
151341687f09Smrg				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
151441687f09Smrg				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
151541687f09Smrg				 * command, 1-loop_until_compare_satisfied.
151641687f09Smrg				 * single_pass_atomic, 0-lru
151741687f09Smrg				 * engine_sel, 0-micro_engine
151841687f09Smrg				 */
151941687f09Smrg				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
152041687f09Smrg							ATOMIC_MEM_COMMAND(1) |
152141687f09Smrg							ATOMIC_MEM_CACHEPOLICAY(0) |
152241687f09Smrg							ATOMIC_MEM_ENGINESEL(0));
152341687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
152441687f09Smrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
152541687f09Smrg				pm4[i++] = 0x12345678;
152641687f09Smrg				pm4[i++] = 0x0;
152741687f09Smrg				pm4[i++] = 0xdeadbeaf;
152841687f09Smrg				pm4[i++] = 0x0;
152941687f09Smrg				pm4[i++] = 0x100;
153041687f09Smrg				amdgpu_test_exec_cs_helper_raw(device, context_handle,
153141687f09Smrg							ip_type, ring_id, i, pm4,
153241687f09Smrg							1, resources, ib_info,
153341687f09Smrg							ibs_request, true);
153441687f09Smrg			} else if (ip_type == AMDGPU_HW_IP_DMA) {
153541687f09Smrg				/* restore the bo_cpu to compare */
153641687f09Smrg				bo_cpu_origin = bo_cpu[0];
153741687f09Smrg				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
153841687f09Smrg				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
153941687f09Smrg				 * loop, 1-loop_until_compare_satisfied.
154041687f09Smrg				 * single_pass_atomic, 0-lru
154141687f09Smrg				 */
154241687f09Smrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
154341687f09Smrg							       0,
154441687f09Smrg							       SDMA_ATOMIC_LOOP(1) |
154541687f09Smrg							       SDMA_ATOMIC_TMZ(1) |
154641687f09Smrg							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
154741687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
154841687f09Smrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
154941687f09Smrg				pm4[i++] = 0x12345678;
155041687f09Smrg				pm4[i++] = 0x0;
155141687f09Smrg				pm4[i++] = 0xdeadbeaf;
155241687f09Smrg				pm4[i++] = 0x0;
155341687f09Smrg				pm4[i++] = 0x100;
155441687f09Smrg				amdgpu_test_exec_cs_helper_raw(device, context_handle,
155541687f09Smrg							ip_type, ring_id, i, pm4,
155641687f09Smrg							1, resources, ib_info,
155741687f09Smrg							ibs_request, true);
155841687f09Smrg				/* DMA's atomic behavir is unlike GFX
155941687f09Smrg				 * If the comparing data is not equal to destination data,
156041687f09Smrg				 * For GFX, loop again till gfx timeout(system hang).
156141687f09Smrg				 * For DMA, loop again till timer expired and then send interrupt.
156241687f09Smrg				 * So testcase can't use interrupt mechanism.
156341687f09Smrg				 * We take another way to verify. When the comparing data is not
156441687f09Smrg				 * equal to destination data, overwrite the source data to the destination
156541687f09Smrg				 * buffer. Otherwise, original destination data unchanged.
156641687f09Smrg				 * So if the bo_cpu data is overwritten, the result is passed.
156741687f09Smrg				 */
156841687f09Smrg				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
156941687f09Smrg
157041687f09Smrg				/* compare again for the case of dest_data != cmp_data */
157141687f09Smrg				i = 0;
157241687f09Smrg				/* restore again, here dest_data should be */
157341687f09Smrg				bo_cpu_origin = bo_cpu[0];
157441687f09Smrg				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
157541687f09Smrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
157641687f09Smrg							       0,
157741687f09Smrg							       SDMA_ATOMIC_LOOP(1) |
157841687f09Smrg							       SDMA_ATOMIC_TMZ(1) |
157941687f09Smrg							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
158041687f09Smrg				pm4[i++] = 0xfffffffc & bo_mc;
158141687f09Smrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
158241687f09Smrg				pm4[i++] = 0x87654321;
158341687f09Smrg				pm4[i++] = 0x0;
158441687f09Smrg				pm4[i++] = 0xdeadbeaf;
158541687f09Smrg				pm4[i++] = 0x0;
158641687f09Smrg				pm4[i++] = 0x100;
158741687f09Smrg				amdgpu_test_exec_cs_helper_raw(device, context_handle,
158841687f09Smrg							ip_type, ring_id, i, pm4,
158941687f09Smrg							1, resources, ib_info,
159041687f09Smrg							ibs_request, true);
159141687f09Smrg				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
159241687f09Smrg				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
159300a23bdaSmrg			}
15943f012e29Smrg
159500a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
159600a23bdaSmrg						     sdma_write_length * sizeof(uint32_t));
159700a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
159800a23bdaSmrg			loop++;
15993f012e29Smrg		}
16003f012e29Smrg	}
16013f012e29Smrg	/* clean resources */
16023f012e29Smrg	free(resources);
16033f012e29Smrg	free(ibs_request);
16043f012e29Smrg	free(ib_info);
16053f012e29Smrg	free(pm4);
16063f012e29Smrg
16073f012e29Smrg	/* end of test */
16083f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
16093f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
16103f012e29Smrg}
16113f012e29Smrg
161241687f09Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
161341687f09Smrg{
161441687f09Smrg	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
161541687f09Smrg								  ip_type,
161641687f09Smrg								  false);
161741687f09Smrg}
161841687f09Smrg
16193f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void)
16203f012e29Smrg{
16213f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
16223f012e29Smrg}
16233f012e29Smrg
16243f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
16253f012e29Smrg{
16263f012e29Smrg	const int sdma_write_length = 1024 * 1024;
16273f012e29Smrg	const int pm4_dw = 256;
16283f012e29Smrg	amdgpu_context_handle context_handle;
16293f012e29Smrg	amdgpu_bo_handle bo;
16303f012e29Smrg	amdgpu_bo_handle *resources;
16313f012e29Smrg	uint32_t *pm4;
16323f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
16333f012e29Smrg	struct amdgpu_cs_request *ibs_request;
16343f012e29Smrg	uint64_t bo_mc;
16353f012e29Smrg	volatile uint32_t *bo_cpu;
163600a23bdaSmrg	int i, j, r, loop, ring_id;
16373f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
16383f012e29Smrg	amdgpu_va_handle va_handle;
163900a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
16403f012e29Smrg
16413f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
16423f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
16433f012e29Smrg
16443f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
16453f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
16463f012e29Smrg
16473f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
16483f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
16493f012e29Smrg
165000a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
165100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
165200a23bdaSmrg
16533f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
16543f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
16553f012e29Smrg
16563f012e29Smrg	/* prepare resource */
16573f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
16583f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
16593f012e29Smrg
166000a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
166100a23bdaSmrg		loop = 0;
166200a23bdaSmrg		while(loop < 2) {
166300a23bdaSmrg			/* allocate UC bo for sDMA use */
166400a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
166500a23bdaSmrg						    sdma_write_length, 4096,
166600a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
166700a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
166800a23bdaSmrg						    &bo_mc, &va_handle);
166900a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
16703f012e29Smrg
167100a23bdaSmrg			/* clear bo */
167200a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length);
16733f012e29Smrg
167400a23bdaSmrg			resources[0] = bo;
16753f012e29Smrg
167600a23bdaSmrg			/* fulfill PM4: test DMA const fill */
167700a23bdaSmrg			i = j = 0;
167800a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
167900a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
168000a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
168100a23bdaSmrg								  0, 0, 0,
168200a23bdaSmrg								  sdma_write_length / 4);
168300a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
168400a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
168500a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
168600a23bdaSmrg				} else {
168700a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
168800a23bdaSmrg							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
168900a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
169000a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
169100a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
169200a23bdaSmrg					if (family_id >= AMDGPU_FAMILY_AI)
169300a23bdaSmrg						pm4[i++] = sdma_write_length - 1;
169400a23bdaSmrg					else
169500a23bdaSmrg						pm4[i++] = sdma_write_length;
169600a23bdaSmrg				}
169700a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
169800a23bdaSmrg				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
169900a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
170000a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
170100a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
170200a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
170300a23bdaSmrg						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
170400a23bdaSmrg						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
170500a23bdaSmrg						   PACKET3_DMA_DATA_SI_CP_SYNC;
170600a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
170700a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1708d8807b2fSmrg					pm4[i++] = sdma_write_length;
170900a23bdaSmrg				} else {
171000a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
171100a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
171200a23bdaSmrg						   PACKET3_DMA_DATA_DST_SEL(0) |
171300a23bdaSmrg						   PACKET3_DMA_DATA_SRC_SEL(2) |
171400a23bdaSmrg						   PACKET3_DMA_DATA_CP_SYNC;
171500a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
171600a23bdaSmrg					pm4[i++] = 0;
171700a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
171800a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
171900a23bdaSmrg					pm4[i++] = sdma_write_length;
172000a23bdaSmrg				}
1721d8807b2fSmrg			}
17223f012e29Smrg
172300a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
172400a23bdaSmrg						   ip_type, ring_id,
172500a23bdaSmrg						   i, pm4,
172600a23bdaSmrg						   1, resources,
172700a23bdaSmrg						   ib_info, ibs_request);
17283f012e29Smrg
172900a23bdaSmrg			/* verify if SDMA test result meets with expected */
173000a23bdaSmrg			i = 0;
173100a23bdaSmrg			while(i < (sdma_write_length / 4)) {
173200a23bdaSmrg				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
173300a23bdaSmrg			}
17343f012e29Smrg
173500a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
173600a23bdaSmrg						     sdma_write_length);
173700a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
173800a23bdaSmrg			loop++;
173900a23bdaSmrg		}
17403f012e29Smrg	}
17413f012e29Smrg	/* clean resources */
17423f012e29Smrg	free(resources);
17433f012e29Smrg	free(ibs_request);
17443f012e29Smrg	free(ib_info);
17453f012e29Smrg	free(pm4);
17463f012e29Smrg
17473f012e29Smrg	/* end of test */
17483f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
17493f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17503f012e29Smrg}
17513f012e29Smrg
17523f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void)
17533f012e29Smrg{
17543f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
17553f012e29Smrg}
17563f012e29Smrg
17573f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
17583f012e29Smrg{
17593f012e29Smrg	const int sdma_write_length = 1024;
17603f012e29Smrg	const int pm4_dw = 256;
17613f012e29Smrg	amdgpu_context_handle context_handle;
17623f012e29Smrg	amdgpu_bo_handle bo1, bo2;
17633f012e29Smrg	amdgpu_bo_handle *resources;
17643f012e29Smrg	uint32_t *pm4;
17653f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
17663f012e29Smrg	struct amdgpu_cs_request *ibs_request;
17673f012e29Smrg	uint64_t bo1_mc, bo2_mc;
17683f012e29Smrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
176900a23bdaSmrg	int i, j, r, loop1, loop2, ring_id;
17703f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
17713f012e29Smrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
177200a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
17733f012e29Smrg
17743f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
17753f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
17763f012e29Smrg
17773f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
17783f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
17793f012e29Smrg
17803f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
17813f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
17823f012e29Smrg
178300a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
178400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
178500a23bdaSmrg
17863f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
17873f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17883f012e29Smrg
17893f012e29Smrg	/* prepare resource */
17903f012e29Smrg	resources = calloc(2, sizeof(amdgpu_bo_handle));
17913f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
17923f012e29Smrg
179300a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
179400a23bdaSmrg		loop1 = loop2 = 0;
179500a23bdaSmrg		/* run 9 circle to test all mapping combination */
179600a23bdaSmrg		while(loop1 < 2) {
179700a23bdaSmrg			while(loop2 < 2) {
179800a23bdaSmrg				/* allocate UC bo1for sDMA use */
179900a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
180000a23bdaSmrg							    sdma_write_length, 4096,
180100a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
180200a23bdaSmrg							    gtt_flags[loop1], &bo1,
180300a23bdaSmrg							    (void**)&bo1_cpu, &bo1_mc,
180400a23bdaSmrg							    &bo1_va_handle);
180500a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
180600a23bdaSmrg
180700a23bdaSmrg				/* set bo1 */
180800a23bdaSmrg				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
180900a23bdaSmrg
181000a23bdaSmrg				/* allocate UC bo2 for sDMA use */
181100a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
181200a23bdaSmrg							    sdma_write_length, 4096,
181300a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
181400a23bdaSmrg							    gtt_flags[loop2], &bo2,
181500a23bdaSmrg							    (void**)&bo2_cpu, &bo2_mc,
181600a23bdaSmrg							    &bo2_va_handle);
181700a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
181800a23bdaSmrg
181900a23bdaSmrg				/* clear bo2 */
182000a23bdaSmrg				memset((void*)bo2_cpu, 0, sdma_write_length);
182100a23bdaSmrg
182200a23bdaSmrg				resources[0] = bo1;
182300a23bdaSmrg				resources[1] = bo2;
182400a23bdaSmrg
182500a23bdaSmrg				/* fulfill PM4: test DMA copy linear */
182600a23bdaSmrg				i = j = 0;
182700a23bdaSmrg				if (ip_type == AMDGPU_HW_IP_DMA) {
182800a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
182900a23bdaSmrg						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
183000a23bdaSmrg									  0, 0, 0,
183100a23bdaSmrg									  sdma_write_length);
183200a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
183300a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
183400a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
183500a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
183600a23bdaSmrg					} else {
183700a23bdaSmrg						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
183800a23bdaSmrg								       SDMA_COPY_SUB_OPCODE_LINEAR,
183900a23bdaSmrg								       0);
184000a23bdaSmrg						if (family_id >= AMDGPU_FAMILY_AI)
184100a23bdaSmrg							pm4[i++] = sdma_write_length - 1;
184200a23bdaSmrg						else
184300a23bdaSmrg							pm4[i++] = sdma_write_length;
184400a23bdaSmrg						pm4[i++] = 0;
184500a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
184600a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
184700a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
184800a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
184900a23bdaSmrg					}
185000a23bdaSmrg				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
185100a23bdaSmrg					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
185200a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
185300a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
185400a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
185500a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
185600a23bdaSmrg							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
185700a23bdaSmrg							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
185800a23bdaSmrg							   PACKET3_DMA_DATA_SI_CP_SYNC |
185900a23bdaSmrg							   (0xffff00000000 & bo1_mc) >> 32;
186000a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
186100a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1862d8807b2fSmrg						pm4[i++] = sdma_write_length;
186300a23bdaSmrg					} else {
186400a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
186500a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
186600a23bdaSmrg							   PACKET3_DMA_DATA_DST_SEL(0) |
186700a23bdaSmrg							   PACKET3_DMA_DATA_SRC_SEL(0) |
186800a23bdaSmrg							   PACKET3_DMA_DATA_CP_SYNC;
186900a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
187000a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
187100a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
187200a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
187300a23bdaSmrg						pm4[i++] = sdma_write_length;
187400a23bdaSmrg					}
1875d8807b2fSmrg				}
18763f012e29Smrg
187700a23bdaSmrg				amdgpu_test_exec_cs_helper(context_handle,
187800a23bdaSmrg							   ip_type, ring_id,
187900a23bdaSmrg							   i, pm4,
188000a23bdaSmrg							   2, resources,
188100a23bdaSmrg							   ib_info, ibs_request);
18823f012e29Smrg
188300a23bdaSmrg				/* verify if SDMA test result meets with expected */
188400a23bdaSmrg				i = 0;
188500a23bdaSmrg				while(i < sdma_write_length) {
188600a23bdaSmrg					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
188700a23bdaSmrg				}
188800a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
188900a23bdaSmrg							     sdma_write_length);
189000a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
189100a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
189200a23bdaSmrg							     sdma_write_length);
189300a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
189400a23bdaSmrg				loop2++;
18953f012e29Smrg			}
189600a23bdaSmrg			loop1++;
18973f012e29Smrg		}
18983f012e29Smrg	}
18993f012e29Smrg	/* clean resources */
19003f012e29Smrg	free(resources);
19013f012e29Smrg	free(ibs_request);
19023f012e29Smrg	free(ib_info);
19033f012e29Smrg	free(pm4);
19043f012e29Smrg
19053f012e29Smrg	/* end of test */
19063f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
19073f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
19083f012e29Smrg}
19093f012e29Smrg
19103f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void)
19113f012e29Smrg{
19123f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
19133f012e29Smrg}
19143f012e29Smrg
19153f012e29Smrgstatic void amdgpu_command_submission_sdma(void)
19163f012e29Smrg{
19173f012e29Smrg	amdgpu_command_submission_sdma_write_linear();
19183f012e29Smrg	amdgpu_command_submission_sdma_const_fill();
19193f012e29Smrg	amdgpu_command_submission_sdma_copy_linear();
19203f012e29Smrg}
19213f012e29Smrg
1922d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1923d8807b2fSmrg{
1924d8807b2fSmrg	amdgpu_context_handle context_handle;
1925d8807b2fSmrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1926d8807b2fSmrg	void *ib_result_cpu, *ib_result_ce_cpu;
1927d8807b2fSmrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1928d8807b2fSmrg	struct amdgpu_cs_request ibs_request[2] = {0};
1929d8807b2fSmrg	struct amdgpu_cs_ib_info ib_info[2];
1930d8807b2fSmrg	struct amdgpu_cs_fence fence_status[2] = {0};
1931d8807b2fSmrg	uint32_t *ptr;
1932d8807b2fSmrg	uint32_t expired;
1933d8807b2fSmrg	amdgpu_bo_list_handle bo_list;
1934d8807b2fSmrg	amdgpu_va_handle va_handle, va_handle_ce;
1935d8807b2fSmrg	int r;
1936d8807b2fSmrg	int i = 0, ib_cs_num = 2;
1937d8807b2fSmrg
1938d8807b2fSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1939d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1940d8807b2fSmrg
1941d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1942d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
1943d8807b2fSmrg				    &ib_result_handle, &ib_result_cpu,
1944d8807b2fSmrg				    &ib_result_mc_address, &va_handle);
1945d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1946d8807b2fSmrg
1947d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1948d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
1949d8807b2fSmrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
1950d8807b2fSmrg				    &ib_result_ce_mc_address, &va_handle_ce);
1951d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1952d8807b2fSmrg
1953d8807b2fSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1954d8807b2fSmrg			       ib_result_ce_handle, &bo_list);
1955d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1956d8807b2fSmrg
1957d8807b2fSmrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1958d8807b2fSmrg
1959d8807b2fSmrg	/* IT_SET_CE_DE_COUNTERS */
1960d8807b2fSmrg	ptr = ib_result_ce_cpu;
1961d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
1962d8807b2fSmrg		ptr[i++] = 0xc0008900;
1963d8807b2fSmrg		ptr[i++] = 0;
1964d8807b2fSmrg	}
1965d8807b2fSmrg	ptr[i++] = 0xc0008400;
1966d8807b2fSmrg	ptr[i++] = 1;
1967d8807b2fSmrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1968d8807b2fSmrg	ib_info[0].size = i;
1969d8807b2fSmrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1970d8807b2fSmrg
1971d8807b2fSmrg	/* IT_WAIT_ON_CE_COUNTER */
1972d8807b2fSmrg	ptr = ib_result_cpu;
1973d8807b2fSmrg	ptr[0] = 0xc0008600;
1974d8807b2fSmrg	ptr[1] = 0x00000001;
1975d8807b2fSmrg	ib_info[1].ib_mc_address = ib_result_mc_address;
1976d8807b2fSmrg	ib_info[1].size = 2;
1977d8807b2fSmrg
1978d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
1979d8807b2fSmrg		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1980d8807b2fSmrg		ibs_request[i].number_of_ibs = 2;
1981d8807b2fSmrg		ibs_request[i].ibs = ib_info;
1982d8807b2fSmrg		ibs_request[i].resources = bo_list;
1983d8807b2fSmrg		ibs_request[i].fence_info.handle = NULL;
1984d8807b2fSmrg	}
1985d8807b2fSmrg
1986d8807b2fSmrg	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1987d8807b2fSmrg
1988d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1989d8807b2fSmrg
1990d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
1991d8807b2fSmrg		fence_status[i].context = context_handle;
1992d8807b2fSmrg		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1993d8807b2fSmrg		fence_status[i].fence = ibs_request[i].seq_no;
1994d8807b2fSmrg	}
1995d8807b2fSmrg
1996d8807b2fSmrg	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1997d8807b2fSmrg				AMDGPU_TIMEOUT_INFINITE,
1998d8807b2fSmrg				&expired, NULL);
1999d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2000d8807b2fSmrg
2001d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2002d8807b2fSmrg				     ib_result_mc_address, 4096);
2003d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2004d8807b2fSmrg
2005d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2006d8807b2fSmrg				     ib_result_ce_mc_address, 4096);
2007d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2008d8807b2fSmrg
2009d8807b2fSmrg	r = amdgpu_bo_list_destroy(bo_list);
2010d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2011d8807b2fSmrg
2012d8807b2fSmrg	r = amdgpu_cs_ctx_free(context_handle);
2013d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
2014d8807b2fSmrg}
2015d8807b2fSmrg
2016d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void)
2017d8807b2fSmrg{
2018d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(true);
2019d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(false);
2020d8807b2fSmrg}
2021d8807b2fSmrg
20223f012e29Smrgstatic void amdgpu_userptr_test(void)
20233f012e29Smrg{
20243f012e29Smrg	int i, r, j;
20253f012e29Smrg	uint32_t *pm4 = NULL;
20263f012e29Smrg	uint64_t bo_mc;
20273f012e29Smrg	void *ptr = NULL;
20283f012e29Smrg	int pm4_dw = 256;
20293f012e29Smrg	int sdma_write_length = 4;
20303f012e29Smrg	amdgpu_bo_handle handle;
20313f012e29Smrg	amdgpu_context_handle context_handle;
20323f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
20333f012e29Smrg	struct amdgpu_cs_request *ibs_request;
20343f012e29Smrg	amdgpu_bo_handle buf_handle;
20353f012e29Smrg	amdgpu_va_handle va_handle;
20363f012e29Smrg
20373f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
20383f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
20393f012e29Smrg
20403f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
20413f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
20423f012e29Smrg
20433f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
20443f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
20453f012e29Smrg
20463f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
20473f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
20483f012e29Smrg
20493f012e29Smrg	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
20503f012e29Smrg	CU_ASSERT_NOT_EQUAL(ptr, NULL);
20513f012e29Smrg	memset(ptr, 0, BUFFER_SIZE);
20523f012e29Smrg
20533f012e29Smrg	r = amdgpu_create_bo_from_user_mem(device_handle,
20543f012e29Smrg					   ptr, BUFFER_SIZE, &buf_handle);
20553f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
20563f012e29Smrg
20573f012e29Smrg	r = amdgpu_va_range_alloc(device_handle,
20583f012e29Smrg				  amdgpu_gpu_va_range_general,
20593f012e29Smrg				  BUFFER_SIZE, 1, 0, &bo_mc,
20603f012e29Smrg				  &va_handle, 0);
20613f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
20623f012e29Smrg
20633f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
20643f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
20653f012e29Smrg
20663f012e29Smrg	handle = buf_handle;
20673f012e29Smrg
20683f012e29Smrg	j = i = 0;
2069d8807b2fSmrg
2070d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI)
2071d8807b2fSmrg		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2072d8807b2fSmrg				sdma_write_length);
2073d8807b2fSmrg	else
2074d8807b2fSmrg		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2075d8807b2fSmrg				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
20763f012e29Smrg	pm4[i++] = 0xffffffff & bo_mc;
20773f012e29Smrg	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2078d8807b2fSmrg	if (family_id >= AMDGPU_FAMILY_AI)
2079d8807b2fSmrg		pm4[i++] = sdma_write_length - 1;
2080d8807b2fSmrg	else if (family_id != AMDGPU_FAMILY_SI)
2081d8807b2fSmrg		pm4[i++] = sdma_write_length;
20823f012e29Smrg
20833f012e29Smrg	while (j++ < sdma_write_length)
20843f012e29Smrg		pm4[i++] = 0xdeadbeaf;
20853f012e29Smrg
208600a23bdaSmrg	if (!fork()) {
208700a23bdaSmrg		pm4[0] = 0x0;
208800a23bdaSmrg		exit(0);
208900a23bdaSmrg	}
209000a23bdaSmrg
20913f012e29Smrg	amdgpu_test_exec_cs_helper(context_handle,
20923f012e29Smrg				   AMDGPU_HW_IP_DMA, 0,
20933f012e29Smrg				   i, pm4,
20943f012e29Smrg				   1, &handle,
20953f012e29Smrg				   ib_info, ibs_request);
20963f012e29Smrg	i = 0;
20973f012e29Smrg	while (i < sdma_write_length) {
20983f012e29Smrg		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
20993f012e29Smrg	}
21003f012e29Smrg	free(ibs_request);
21013f012e29Smrg	free(ib_info);
21023f012e29Smrg	free(pm4);
21033f012e29Smrg
21043f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
21053f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
21063f012e29Smrg	r = amdgpu_va_range_free(va_handle);
21073f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
21083f012e29Smrg	r = amdgpu_bo_free(buf_handle);
21093f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
21103f012e29Smrg	free(ptr);
21113f012e29Smrg
21123f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
21133f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
211400a23bdaSmrg
211500a23bdaSmrg	wait(NULL);
211600a23bdaSmrg}
211700a23bdaSmrg
211800a23bdaSmrgstatic void amdgpu_sync_dependency_test(void)
211900a23bdaSmrg{
212000a23bdaSmrg	amdgpu_context_handle context_handle[2];
212100a23bdaSmrg	amdgpu_bo_handle ib_result_handle;
212200a23bdaSmrg	void *ib_result_cpu;
212300a23bdaSmrg	uint64_t ib_result_mc_address;
212400a23bdaSmrg	struct amdgpu_cs_request ibs_request;
212500a23bdaSmrg	struct amdgpu_cs_ib_info ib_info;
212600a23bdaSmrg	struct amdgpu_cs_fence fence_status;
212700a23bdaSmrg	uint32_t expired;
212800a23bdaSmrg	int i, j, r;
212900a23bdaSmrg	amdgpu_bo_list_handle bo_list;
213000a23bdaSmrg	amdgpu_va_handle va_handle;
213100a23bdaSmrg	static uint32_t *ptr;
213200a23bdaSmrg	uint64_t seq_no;
213300a23bdaSmrg
213400a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
213500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
213600a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
213700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
213800a23bdaSmrg
213900a23bdaSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
214000a23bdaSmrg			AMDGPU_GEM_DOMAIN_GTT, 0,
214100a23bdaSmrg						    &ib_result_handle, &ib_result_cpu,
214200a23bdaSmrg						    &ib_result_mc_address, &va_handle);
214300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
214400a23bdaSmrg
214500a23bdaSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
214600a23bdaSmrg			       &bo_list);
214700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
214800a23bdaSmrg
214900a23bdaSmrg	ptr = ib_result_cpu;
215000a23bdaSmrg	i = 0;
215100a23bdaSmrg
215200a23bdaSmrg	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
215300a23bdaSmrg
215400a23bdaSmrg	/* Dispatch minimal init config and verify it's executed */
215500a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
215600a23bdaSmrg	ptr[i++] = 0x80000000;
215700a23bdaSmrg	ptr[i++] = 0x80000000;
215800a23bdaSmrg
215900a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
216000a23bdaSmrg	ptr[i++] = 0x80000000;
216100a23bdaSmrg
216200a23bdaSmrg
216300a23bdaSmrg	/* Program compute regs */
216400a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
216500a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
216600a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
216700a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
216800a23bdaSmrg
216900a23bdaSmrg
217000a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
217100a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
217200a23bdaSmrg	/*
217300a23bdaSmrg	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
217400a23bdaSmrg	                                      SGPRS = 1
217500a23bdaSmrg	                                      PRIORITY = 0
217600a23bdaSmrg	                                      FLOAT_MODE = 192 (0xc0)
217700a23bdaSmrg	                                      PRIV = 0
217800a23bdaSmrg	                                      DX10_CLAMP = 1
217900a23bdaSmrg	                                      DEBUG_MODE = 0
218000a23bdaSmrg	                                      IEEE_MODE = 0
218100a23bdaSmrg	                                      BULKY = 0
218200a23bdaSmrg	                                      CDBG_USER = 0
218300a23bdaSmrg	 *
218400a23bdaSmrg	 */
218500a23bdaSmrg	ptr[i++] = 0x002c0040;
218600a23bdaSmrg
218700a23bdaSmrg
218800a23bdaSmrg	/*
218900a23bdaSmrg	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
219000a23bdaSmrg	                                      USER_SGPR = 8
219100a23bdaSmrg	                                      TRAP_PRESENT = 0
219200a23bdaSmrg	                                      TGID_X_EN = 0
219300a23bdaSmrg	                                      TGID_Y_EN = 0
219400a23bdaSmrg	                                      TGID_Z_EN = 0
219500a23bdaSmrg	                                      TG_SIZE_EN = 0
219600a23bdaSmrg	                                      TIDIG_COMP_CNT = 0
219700a23bdaSmrg	                                      EXCP_EN_MSB = 0
219800a23bdaSmrg	                                      LDS_SIZE = 0
219900a23bdaSmrg	                                      EXCP_EN = 0
220000a23bdaSmrg	 *
220100a23bdaSmrg	 */
220200a23bdaSmrg	ptr[i++] = 0x00000010;
220300a23bdaSmrg
220400a23bdaSmrg
220500a23bdaSmrg/*
220600a23bdaSmrg * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
220700a23bdaSmrg                                         WAVESIZE = 0
220800a23bdaSmrg *
220900a23bdaSmrg */
221000a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
221100a23bdaSmrg	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
221200a23bdaSmrg	ptr[i++] = 0x00000100;
221300a23bdaSmrg
221400a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
221500a23bdaSmrg	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
221600a23bdaSmrg	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
221700a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
221800a23bdaSmrg
221900a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
222000a23bdaSmrg	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
222100a23bdaSmrg	ptr[i++] = 0;
222200a23bdaSmrg
222300a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
222400a23bdaSmrg	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
222500a23bdaSmrg	ptr[i++] = 1;
222600a23bdaSmrg	ptr[i++] = 1;
222700a23bdaSmrg	ptr[i++] = 1;
222800a23bdaSmrg
222900a23bdaSmrg
223000a23bdaSmrg	/* Dispatch */
223100a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
223200a23bdaSmrg	ptr[i++] = 1;
223300a23bdaSmrg	ptr[i++] = 1;
223400a23bdaSmrg	ptr[i++] = 1;
223500a23bdaSmrg	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
223600a23bdaSmrg
223700a23bdaSmrg
223800a23bdaSmrg	while (i & 7)
223900a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
224000a23bdaSmrg
224100a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
224200a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address;
224300a23bdaSmrg	ib_info.size = i;
224400a23bdaSmrg
224500a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
224600a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
224700a23bdaSmrg	ibs_request.ring = 0;
224800a23bdaSmrg	ibs_request.number_of_ibs = 1;
224900a23bdaSmrg	ibs_request.ibs = &ib_info;
225000a23bdaSmrg	ibs_request.resources = bo_list;
225100a23bdaSmrg	ibs_request.fence_info.handle = NULL;
225200a23bdaSmrg
225300a23bdaSmrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
225400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
225500a23bdaSmrg	seq_no = ibs_request.seq_no;
225600a23bdaSmrg
225700a23bdaSmrg
225800a23bdaSmrg
225900a23bdaSmrg	/* Prepare second command with dependency on the first */
226000a23bdaSmrg	j = i;
226100a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
226200a23bdaSmrg	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
226300a23bdaSmrg	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
226400a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
226500a23bdaSmrg	ptr[i++] = 99;
226600a23bdaSmrg
226700a23bdaSmrg	while (i & 7)
226800a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
226900a23bdaSmrg
227000a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
227100a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
227200a23bdaSmrg	ib_info.size = i - j;
227300a23bdaSmrg
227400a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
227500a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
227600a23bdaSmrg	ibs_request.ring = 0;
227700a23bdaSmrg	ibs_request.number_of_ibs = 1;
227800a23bdaSmrg	ibs_request.ibs = &ib_info;
227900a23bdaSmrg	ibs_request.resources = bo_list;
228000a23bdaSmrg	ibs_request.fence_info.handle = NULL;
228100a23bdaSmrg
228200a23bdaSmrg	ibs_request.number_of_dependencies = 1;
228300a23bdaSmrg
228400a23bdaSmrg	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
228500a23bdaSmrg	ibs_request.dependencies[0].context = context_handle[1];
228600a23bdaSmrg	ibs_request.dependencies[0].ip_instance = 0;
228700a23bdaSmrg	ibs_request.dependencies[0].ring = 0;
228800a23bdaSmrg	ibs_request.dependencies[0].fence = seq_no;
228900a23bdaSmrg
229000a23bdaSmrg
229100a23bdaSmrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
229200a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
229300a23bdaSmrg
229400a23bdaSmrg
229500a23bdaSmrg	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
229600a23bdaSmrg	fence_status.context = context_handle[0];
229700a23bdaSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
229800a23bdaSmrg	fence_status.ip_instance = 0;
229900a23bdaSmrg	fence_status.ring = 0;
230000a23bdaSmrg	fence_status.fence = ibs_request.seq_no;
230100a23bdaSmrg
230200a23bdaSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
230300a23bdaSmrg		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
230400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
230500a23bdaSmrg
230600a23bdaSmrg	/* Expect the second command to wait for shader to complete */
230700a23bdaSmrg	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
230800a23bdaSmrg
230900a23bdaSmrg	r = amdgpu_bo_list_destroy(bo_list);
231000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
231100a23bdaSmrg
231200a23bdaSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
231300a23bdaSmrg				     ib_result_mc_address, 4096);
231400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
231500a23bdaSmrg
231600a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[0]);
231700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
231800a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[1]);
231900a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
232000a23bdaSmrg
232100a23bdaSmrg	free(ibs_request.dependencies);
23223f012e29Smrg}
23235324fb0dSmrg
23249bd392adSmrgstatic int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
23259bd392adSmrg{
23269bd392adSmrg	struct amdgpu_test_shader *shader;
23279bd392adSmrg	int i, loop = 0x10000;
23289bd392adSmrg
23299bd392adSmrg	switch (family) {
23309bd392adSmrg		case AMDGPU_FAMILY_AI:
23319bd392adSmrg			shader = &memcpy_cs_hang_slow_ai;
23329bd392adSmrg			break;
23339bd392adSmrg		case AMDGPU_FAMILY_RV:
23349bd392adSmrg			shader = &memcpy_cs_hang_slow_rv;
23359bd392adSmrg			break;
23369bd392adSmrg		default:
23379bd392adSmrg			return -1;
23389bd392adSmrg			break;
23399bd392adSmrg	}
23409bd392adSmrg
23419bd392adSmrg	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
23429bd392adSmrg
23439bd392adSmrg	for (i = 0; i < loop; i++)
23449bd392adSmrg		memcpy(ptr + shader->header_length + shader->body_length * i,
23459bd392adSmrg			shader->shader + shader->header_length,
23469bd392adSmrg			shader->body_length * sizeof(uint32_t));
23479bd392adSmrg
23489bd392adSmrg	memcpy(ptr + shader->header_length + shader->body_length * loop,
23499bd392adSmrg		shader->shader + shader->header_length + shader->body_length,
23509bd392adSmrg		shader->foot_length * sizeof(uint32_t));
23519bd392adSmrg
23529bd392adSmrg	return 0;
23539bd392adSmrg}
23549bd392adSmrg
23555324fb0dSmrgstatic int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
23565324fb0dSmrg					   int cs_type)
23575324fb0dSmrg{
23585324fb0dSmrg	uint32_t shader_size;
23595324fb0dSmrg	const uint32_t *shader;
23605324fb0dSmrg
23615324fb0dSmrg	switch (cs_type) {
23625324fb0dSmrg		case CS_BUFFERCLEAR:
23635324fb0dSmrg			shader = bufferclear_cs_shader_gfx9;
23645324fb0dSmrg			shader_size = sizeof(bufferclear_cs_shader_gfx9);
23655324fb0dSmrg			break;
23665324fb0dSmrg		case CS_BUFFERCOPY:
23675324fb0dSmrg			shader = buffercopy_cs_shader_gfx9;
23685324fb0dSmrg			shader_size = sizeof(buffercopy_cs_shader_gfx9);
23695324fb0dSmrg			break;
23709bd392adSmrg		case CS_HANG:
23719bd392adSmrg			shader = memcpy_ps_hang;
23729bd392adSmrg			shader_size = sizeof(memcpy_ps_hang);
23739bd392adSmrg			break;
23745324fb0dSmrg		default:
23755324fb0dSmrg			return -1;
23765324fb0dSmrg			break;
23775324fb0dSmrg	}
23785324fb0dSmrg
23795324fb0dSmrg	memcpy(ptr, shader, shader_size);
23805324fb0dSmrg	return 0;
23815324fb0dSmrg}
23825324fb0dSmrg
23835324fb0dSmrgstatic int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
23845324fb0dSmrg{
23855324fb0dSmrg	int i = 0;
23865324fb0dSmrg
23875324fb0dSmrg	/* Write context control and load shadowing register if necessary */
23885324fb0dSmrg	if (ip_type == AMDGPU_HW_IP_GFX) {
23895324fb0dSmrg		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
23905324fb0dSmrg		ptr[i++] = 0x80000000;
23915324fb0dSmrg		ptr[i++] = 0x80000000;
23925324fb0dSmrg	}
23935324fb0dSmrg
23945324fb0dSmrg	/* Issue commands to set default compute state. */
23955324fb0dSmrg	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
23965324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
23975324fb0dSmrg	ptr[i++] = 0x204;
23985324fb0dSmrg	i += 3;
239988f8a8d2Smrg
24005324fb0dSmrg	/* clear mmCOMPUTE_TMPRING_SIZE */
24015324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
24025324fb0dSmrg	ptr[i++] = 0x218;
24035324fb0dSmrg	ptr[i++] = 0;
24045324fb0dSmrg
24055324fb0dSmrg	return i;
24065324fb0dSmrg}
24075324fb0dSmrg
24085324fb0dSmrgstatic int amdgpu_dispatch_write_cumask(uint32_t *ptr)
24095324fb0dSmrg{
24105324fb0dSmrg	int i = 0;
24115324fb0dSmrg
24125324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
24135324fb0dSmrg	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
24145324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
24155324fb0dSmrg	ptr[i++] = 0x216;
24165324fb0dSmrg	ptr[i++] = 0xffffffff;
24175324fb0dSmrg	ptr[i++] = 0xffffffff;
24185324fb0dSmrg	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
24195324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
24205324fb0dSmrg	ptr[i++] = 0x219;
24215324fb0dSmrg	ptr[i++] = 0xffffffff;
24225324fb0dSmrg	ptr[i++] = 0xffffffff;
24235324fb0dSmrg
24245324fb0dSmrg	return i;
24255324fb0dSmrg}
24265324fb0dSmrg
24275324fb0dSmrgstatic int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
24285324fb0dSmrg{
24295324fb0dSmrg	int i, j;
24305324fb0dSmrg
24315324fb0dSmrg	i = 0;
24325324fb0dSmrg
24335324fb0dSmrg	/* Writes shader state to HW */
24345324fb0dSmrg	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
24355324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
24365324fb0dSmrg	ptr[i++] = 0x20c;
24375324fb0dSmrg	ptr[i++] = (shader_addr >> 8);
24385324fb0dSmrg	ptr[i++] = (shader_addr >> 40);
24395324fb0dSmrg	/* write sh regs*/
24405324fb0dSmrg	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
24415324fb0dSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
24425324fb0dSmrg		/* - Gfx9ShRegBase */
24435324fb0dSmrg		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
24445324fb0dSmrg		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
24455324fb0dSmrg	}
24465324fb0dSmrg
24475324fb0dSmrg	return i;
24485324fb0dSmrg}
24495324fb0dSmrg
24505324fb0dSmrgstatic void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
24515324fb0dSmrg					 uint32_t ip_type,
24525324fb0dSmrg					 uint32_t ring)
24535324fb0dSmrg{
24545324fb0dSmrg	amdgpu_context_handle context_handle;
24555324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
24565324fb0dSmrg	volatile unsigned char *ptr_dst;
24575324fb0dSmrg	void *ptr_shader;
24585324fb0dSmrg	uint32_t *ptr_cmd;
24595324fb0dSmrg	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
24605324fb0dSmrg	amdgpu_va_handle va_dst, va_shader, va_cmd;
24615324fb0dSmrg	int i, r;
24625324fb0dSmrg	int bo_dst_size = 16384;
24635324fb0dSmrg	int bo_shader_size = 4096;
24645324fb0dSmrg	int bo_cmd_size = 4096;
24655324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
24665324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
24675324fb0dSmrg	amdgpu_bo_list_handle bo_list;
24685324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
24695324fb0dSmrg	uint32_t expired;
24705324fb0dSmrg
24715324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
24725324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24735324fb0dSmrg
24745324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
24755324fb0dSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
24765324fb0dSmrg					&bo_cmd, (void **)&ptr_cmd,
24775324fb0dSmrg					&mc_address_cmd, &va_cmd);
24785324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24795324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
24805324fb0dSmrg
24815324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
24825324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
24835324fb0dSmrg					&bo_shader, &ptr_shader,
24845324fb0dSmrg					&mc_address_shader, &va_shader);
24855324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
248688f8a8d2Smrg	memset(ptr_shader, 0, bo_shader_size);
24875324fb0dSmrg
24885324fb0dSmrg	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
24895324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24905324fb0dSmrg
24915324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
24925324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
24935324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
24945324fb0dSmrg					&mc_address_dst, &va_dst);
24955324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24965324fb0dSmrg
24975324fb0dSmrg	i = 0;
24985324fb0dSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
24995324fb0dSmrg
25005324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
25015324fb0dSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
25025324fb0dSmrg
25035324fb0dSmrg	/* Writes shader state to HW */
25045324fb0dSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
25055324fb0dSmrg
25065324fb0dSmrg	/* Write constant data */
25075324fb0dSmrg	/* Writes the UAV constant data to the SGPRs. */
25085324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
25095324fb0dSmrg	ptr_cmd[i++] = 0x240;
25105324fb0dSmrg	ptr_cmd[i++] = mc_address_dst;
25115324fb0dSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
25125324fb0dSmrg	ptr_cmd[i++] = 0x400;
25135324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
25145324fb0dSmrg
25155324fb0dSmrg	/* Sets a range of pixel shader constants */
25165324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
25175324fb0dSmrg	ptr_cmd[i++] = 0x244;
25185324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
25195324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
25205324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
25215324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
25225324fb0dSmrg
252388f8a8d2Smrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
252488f8a8d2Smrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
252588f8a8d2Smrg	ptr_cmd[i++] = 0x215;
252688f8a8d2Smrg	ptr_cmd[i++] = 0;
252788f8a8d2Smrg
25285324fb0dSmrg	/* dispatch direct command */
25295324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
25305324fb0dSmrg	ptr_cmd[i++] = 0x10;
25315324fb0dSmrg	ptr_cmd[i++] = 1;
25325324fb0dSmrg	ptr_cmd[i++] = 1;
25335324fb0dSmrg	ptr_cmd[i++] = 1;
25345324fb0dSmrg
25355324fb0dSmrg	while (i & 7)
25365324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
25375324fb0dSmrg
25385324fb0dSmrg	resources[0] = bo_dst;
25395324fb0dSmrg	resources[1] = bo_shader;
25405324fb0dSmrg	resources[2] = bo_cmd;
25415324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
25425324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25435324fb0dSmrg
25445324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
25455324fb0dSmrg	ib_info.size = i;
25465324fb0dSmrg	ibs_request.ip_type = ip_type;
25475324fb0dSmrg	ibs_request.ring = ring;
25485324fb0dSmrg	ibs_request.resources = bo_list;
25495324fb0dSmrg	ibs_request.number_of_ibs = 1;
25505324fb0dSmrg	ibs_request.ibs = &ib_info;
25515324fb0dSmrg	ibs_request.fence_info.handle = NULL;
25525324fb0dSmrg
25535324fb0dSmrg	/* submit CS */
25545324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
25555324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25565324fb0dSmrg
25575324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
25585324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25595324fb0dSmrg
25605324fb0dSmrg	fence_status.ip_type = ip_type;
25615324fb0dSmrg	fence_status.ip_instance = 0;
25625324fb0dSmrg	fence_status.ring = ring;
25635324fb0dSmrg	fence_status.context = context_handle;
25645324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
25655324fb0dSmrg
25665324fb0dSmrg	/* wait for IB accomplished */
25675324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
25685324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
25695324fb0dSmrg					 0, &expired);
25705324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25715324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
25725324fb0dSmrg
25735324fb0dSmrg	/* verify if memset test result meets with expected */
25745324fb0dSmrg	i = 0;
25755324fb0dSmrg	while(i < bo_dst_size) {
25765324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
25775324fb0dSmrg	}
25785324fb0dSmrg
25795324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
25805324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25815324fb0dSmrg
25825324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
25835324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25845324fb0dSmrg
25855324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
25865324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25875324fb0dSmrg
25885324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
25895324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
25905324fb0dSmrg}
25915324fb0dSmrg
25925324fb0dSmrgstatic void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
25935324fb0dSmrg					uint32_t ip_type,
25949bd392adSmrg					uint32_t ring,
25959bd392adSmrg					int hang)
25965324fb0dSmrg{
25975324fb0dSmrg	amdgpu_context_handle context_handle;
25985324fb0dSmrg	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
25995324fb0dSmrg	volatile unsigned char *ptr_dst;
26005324fb0dSmrg	void *ptr_shader;
26015324fb0dSmrg	unsigned char *ptr_src;
26025324fb0dSmrg	uint32_t *ptr_cmd;
26035324fb0dSmrg	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
26045324fb0dSmrg	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
26055324fb0dSmrg	int i, r;
26065324fb0dSmrg	int bo_dst_size = 16384;
26075324fb0dSmrg	int bo_shader_size = 4096;
26085324fb0dSmrg	int bo_cmd_size = 4096;
26095324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
26105324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
26119bd392adSmrg	uint32_t expired, hang_state, hangs;
26129bd392adSmrg	enum cs_type cs_type;
26135324fb0dSmrg	amdgpu_bo_list_handle bo_list;
26145324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
26155324fb0dSmrg
26165324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
26175324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26185324fb0dSmrg
26195324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
26205324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
26215324fb0dSmrg				    &bo_cmd, (void **)&ptr_cmd,
26225324fb0dSmrg				    &mc_address_cmd, &va_cmd);
26235324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26245324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
26255324fb0dSmrg
26265324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
26275324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
26285324fb0dSmrg					&bo_shader, &ptr_shader,
26295324fb0dSmrg					&mc_address_shader, &va_shader);
26305324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
263188f8a8d2Smrg	memset(ptr_shader, 0, bo_shader_size);
26325324fb0dSmrg
26339bd392adSmrg	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
26349bd392adSmrg	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
26355324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26365324fb0dSmrg
26375324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
26385324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
26395324fb0dSmrg					&bo_src, (void **)&ptr_src,
26405324fb0dSmrg					&mc_address_src, &va_src);
26415324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26425324fb0dSmrg
26435324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
26445324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
26455324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
26465324fb0dSmrg					&mc_address_dst, &va_dst);
26475324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26485324fb0dSmrg
26495324fb0dSmrg	memset(ptr_src, 0x55, bo_dst_size);
26505324fb0dSmrg
26515324fb0dSmrg	i = 0;
26525324fb0dSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
26535324fb0dSmrg
26545324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
26555324fb0dSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
26565324fb0dSmrg
26575324fb0dSmrg	/* Writes shader state to HW */
26585324fb0dSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
26595324fb0dSmrg
26605324fb0dSmrg	/* Write constant data */
26615324fb0dSmrg	/* Writes the texture resource constants data to the SGPRs */
26625324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
26635324fb0dSmrg	ptr_cmd[i++] = 0x240;
26645324fb0dSmrg	ptr_cmd[i++] = mc_address_src;
26655324fb0dSmrg	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
26665324fb0dSmrg	ptr_cmd[i++] = 0x400;
26675324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
26685324fb0dSmrg
26695324fb0dSmrg	/* Writes the UAV constant data to the SGPRs. */
26705324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
26715324fb0dSmrg	ptr_cmd[i++] = 0x244;
26725324fb0dSmrg	ptr_cmd[i++] = mc_address_dst;
26735324fb0dSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
26745324fb0dSmrg	ptr_cmd[i++] = 0x400;
26755324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
26765324fb0dSmrg
267788f8a8d2Smrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
267888f8a8d2Smrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
267988f8a8d2Smrg	ptr_cmd[i++] = 0x215;
268088f8a8d2Smrg	ptr_cmd[i++] = 0;
268188f8a8d2Smrg
26825324fb0dSmrg	/* dispatch direct command */
26835324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
26845324fb0dSmrg	ptr_cmd[i++] = 0x10;
26855324fb0dSmrg	ptr_cmd[i++] = 1;
26865324fb0dSmrg	ptr_cmd[i++] = 1;
26875324fb0dSmrg	ptr_cmd[i++] = 1;
26885324fb0dSmrg
26895324fb0dSmrg	while (i & 7)
26905324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
26915324fb0dSmrg
26925324fb0dSmrg	resources[0] = bo_shader;
26935324fb0dSmrg	resources[1] = bo_src;
26945324fb0dSmrg	resources[2] = bo_dst;
26955324fb0dSmrg	resources[3] = bo_cmd;
26965324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
26975324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
26985324fb0dSmrg
26995324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
27005324fb0dSmrg	ib_info.size = i;
27015324fb0dSmrg	ibs_request.ip_type = ip_type;
27025324fb0dSmrg	ibs_request.ring = ring;
27035324fb0dSmrg	ibs_request.resources = bo_list;
27045324fb0dSmrg	ibs_request.number_of_ibs = 1;
27055324fb0dSmrg	ibs_request.ibs = &ib_info;
27065324fb0dSmrg	ibs_request.fence_info.handle = NULL;
27075324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
27085324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27095324fb0dSmrg
27105324fb0dSmrg	fence_status.ip_type = ip_type;
27115324fb0dSmrg	fence_status.ip_instance = 0;
27125324fb0dSmrg	fence_status.ring = ring;
27135324fb0dSmrg	fence_status.context = context_handle;
27145324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
27155324fb0dSmrg
27165324fb0dSmrg	/* wait for IB accomplished */
27175324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
27185324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
27195324fb0dSmrg					 0, &expired);
27205324fb0dSmrg
27219bd392adSmrg	if (!hang) {
27229bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
27239bd392adSmrg		CU_ASSERT_EQUAL(expired, true);
27249bd392adSmrg
27259bd392adSmrg		/* verify if memcpy test result meets with expected */
27269bd392adSmrg		i = 0;
27279bd392adSmrg		while(i < bo_dst_size) {
27289bd392adSmrg			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
27299bd392adSmrg			i++;
27309bd392adSmrg		}
27319bd392adSmrg	} else {
27329bd392adSmrg		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
27339bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
27349bd392adSmrg		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
27355324fb0dSmrg	}
27365324fb0dSmrg
27375324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
27385324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27395324fb0dSmrg
27405324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
27415324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27425324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
27435324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27445324fb0dSmrg
27455324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
27465324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27475324fb0dSmrg
27485324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
27495324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27505324fb0dSmrg
27515324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
27525324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27535324fb0dSmrg}
275488f8a8d2Smrg
275588f8a8d2Smrgstatic void amdgpu_compute_dispatch_test(void)
27565324fb0dSmrg{
27575324fb0dSmrg	int r;
27585324fb0dSmrg	struct drm_amdgpu_info_hw_ip info;
27595324fb0dSmrg	uint32_t ring_id;
27605324fb0dSmrg
27615324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
27625324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
276388f8a8d2Smrg	if (!info.available_rings)
276488f8a8d2Smrg		printf("SKIP ... as there's no compute ring\n");
27655324fb0dSmrg
27665324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
27675324fb0dSmrg		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
27689bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
27695324fb0dSmrg	}
277088f8a8d2Smrg}
277188f8a8d2Smrg
277288f8a8d2Smrgstatic void amdgpu_gfx_dispatch_test(void)
277388f8a8d2Smrg{
277488f8a8d2Smrg	int r;
277588f8a8d2Smrg	struct drm_amdgpu_info_hw_ip info;
277688f8a8d2Smrg	uint32_t ring_id;
27775324fb0dSmrg
27785324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
27795324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
278088f8a8d2Smrg	if (!info.available_rings)
278188f8a8d2Smrg		printf("SKIP ... as there's no graphics ring\n");
27825324fb0dSmrg
27835324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
27845324fb0dSmrg		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
27859bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
27869bd392adSmrg	}
27879bd392adSmrg}
27889bd392adSmrg
27899bd392adSmrgvoid amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
27909bd392adSmrg{
27919bd392adSmrg	int r;
27929bd392adSmrg	struct drm_amdgpu_info_hw_ip info;
27939bd392adSmrg	uint32_t ring_id;
27949bd392adSmrg
27959bd392adSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
27969bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
27979bd392adSmrg	if (!info.available_rings)
27989bd392adSmrg		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
27999bd392adSmrg
28009bd392adSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
28019bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
28029bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
28039bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
28049bd392adSmrg	}
28059bd392adSmrg}
28069bd392adSmrg
28079bd392adSmrgstatic void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
28089bd392adSmrg						  uint32_t ip_type, uint32_t ring)
28099bd392adSmrg{
28109bd392adSmrg	amdgpu_context_handle context_handle;
28119bd392adSmrg	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
28129bd392adSmrg	volatile unsigned char *ptr_dst;
28139bd392adSmrg	void *ptr_shader;
28149bd392adSmrg	unsigned char *ptr_src;
28159bd392adSmrg	uint32_t *ptr_cmd;
28169bd392adSmrg	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
28179bd392adSmrg	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
28189bd392adSmrg	int i, r;
28199bd392adSmrg	int bo_dst_size = 0x4000000;
28209bd392adSmrg	int bo_shader_size = 0x400000;
28219bd392adSmrg	int bo_cmd_size = 4096;
28229bd392adSmrg	struct amdgpu_cs_request ibs_request = {0};
28239bd392adSmrg	struct amdgpu_cs_ib_info ib_info= {0};
28249bd392adSmrg	uint32_t hang_state, hangs, expired;
28259bd392adSmrg	struct amdgpu_gpu_info gpu_info = {0};
28269bd392adSmrg	amdgpu_bo_list_handle bo_list;
28279bd392adSmrg	struct amdgpu_cs_fence fence_status = {0};
28289bd392adSmrg
28299bd392adSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
28309bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28319bd392adSmrg
28329bd392adSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
28339bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28349bd392adSmrg
28359bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
28369bd392adSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
28379bd392adSmrg				    &bo_cmd, (void **)&ptr_cmd,
28389bd392adSmrg				    &mc_address_cmd, &va_cmd);
28399bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28409bd392adSmrg	memset(ptr_cmd, 0, bo_cmd_size);
28419bd392adSmrg
28429bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
28439bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
28449bd392adSmrg					&bo_shader, &ptr_shader,
28459bd392adSmrg					&mc_address_shader, &va_shader);
28469bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28479bd392adSmrg	memset(ptr_shader, 0, bo_shader_size);
28489bd392adSmrg
28499bd392adSmrg	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
28509bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28519bd392adSmrg
28529bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
28539bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
28549bd392adSmrg					&bo_src, (void **)&ptr_src,
28559bd392adSmrg					&mc_address_src, &va_src);
28569bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28579bd392adSmrg
28589bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
28599bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
28609bd392adSmrg					&bo_dst, (void **)&ptr_dst,
28619bd392adSmrg					&mc_address_dst, &va_dst);
28629bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
28639bd392adSmrg
28649bd392adSmrg	memset(ptr_src, 0x55, bo_dst_size);
28659bd392adSmrg
28669bd392adSmrg	i = 0;
28679bd392adSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
28689bd392adSmrg
28699bd392adSmrg	/*  Issue commands to set cu mask used in current dispatch */
28709bd392adSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
28719bd392adSmrg
28729bd392adSmrg	/* Writes shader state to HW */
28739bd392adSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
28749bd392adSmrg
28759bd392adSmrg	/* Write constant data */
28769bd392adSmrg	/* Writes the texture resource constants data to the SGPRs */
28779bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
28789bd392adSmrg	ptr_cmd[i++] = 0x240;
28799bd392adSmrg	ptr_cmd[i++] = mc_address_src;
28809bd392adSmrg	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
28819bd392adSmrg	ptr_cmd[i++] = 0x400000;
28829bd392adSmrg	ptr_cmd[i++] = 0x74fac;
28839bd392adSmrg
28849bd392adSmrg	/* Writes the UAV constant data to the SGPRs. */
28859bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
28869bd392adSmrg	ptr_cmd[i++] = 0x244;
28879bd392adSmrg	ptr_cmd[i++] = mc_address_dst;
28889bd392adSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
28899bd392adSmrg	ptr_cmd[i++] = 0x400000;
28909bd392adSmrg	ptr_cmd[i++] = 0x74fac;
28919bd392adSmrg
28929bd392adSmrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
28939bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
28949bd392adSmrg	ptr_cmd[i++] = 0x215;
28959bd392adSmrg	ptr_cmd[i++] = 0;
28969bd392adSmrg
28979bd392adSmrg	/* dispatch direct command */
28989bd392adSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
28999bd392adSmrg	ptr_cmd[i++] = 0x10000;
29009bd392adSmrg	ptr_cmd[i++] = 1;
29019bd392adSmrg	ptr_cmd[i++] = 1;
29029bd392adSmrg	ptr_cmd[i++] = 1;
29039bd392adSmrg
29049bd392adSmrg	while (i & 7)
29059bd392adSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
29069bd392adSmrg
29079bd392adSmrg	resources[0] = bo_shader;
29089bd392adSmrg	resources[1] = bo_src;
29099bd392adSmrg	resources[2] = bo_dst;
29109bd392adSmrg	resources[3] = bo_cmd;
29119bd392adSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
29129bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29139bd392adSmrg
29149bd392adSmrg	ib_info.ib_mc_address = mc_address_cmd;
29159bd392adSmrg	ib_info.size = i;
29169bd392adSmrg	ibs_request.ip_type = ip_type;
29179bd392adSmrg	ibs_request.ring = ring;
29189bd392adSmrg	ibs_request.resources = bo_list;
29199bd392adSmrg	ibs_request.number_of_ibs = 1;
29209bd392adSmrg	ibs_request.ibs = &ib_info;
29219bd392adSmrg	ibs_request.fence_info.handle = NULL;
29229bd392adSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
29239bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29249bd392adSmrg
29259bd392adSmrg	fence_status.ip_type = ip_type;
29269bd392adSmrg	fence_status.ip_instance = 0;
29279bd392adSmrg	fence_status.ring = ring;
29289bd392adSmrg	fence_status.context = context_handle;
29299bd392adSmrg	fence_status.fence = ibs_request.seq_no;
29309bd392adSmrg
29319bd392adSmrg	/* wait for IB accomplished */
29329bd392adSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
29339bd392adSmrg					 AMDGPU_TIMEOUT_INFINITE,
29349bd392adSmrg					 0, &expired);
29359bd392adSmrg
29369bd392adSmrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
29379bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29389bd392adSmrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
29399bd392adSmrg
29409bd392adSmrg	r = amdgpu_bo_list_destroy(bo_list);
29419bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29429bd392adSmrg
29439bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
29449bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29459bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
29469bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29479bd392adSmrg
29489bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
29499bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29509bd392adSmrg
29519bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
29529bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29539bd392adSmrg
29549bd392adSmrg	r = amdgpu_cs_ctx_free(context_handle);
29559bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29569bd392adSmrg}
29579bd392adSmrg
29589bd392adSmrgvoid amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
29599bd392adSmrg{
29609bd392adSmrg	int r;
29619bd392adSmrg	struct drm_amdgpu_info_hw_ip info;
29629bd392adSmrg	uint32_t ring_id;
29639bd392adSmrg
29649bd392adSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
29659bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
29669bd392adSmrg	if (!info.available_rings)
29679bd392adSmrg		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
29689bd392adSmrg
29699bd392adSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
29709bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
29719bd392adSmrg		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
29729bd392adSmrg		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
29739bd392adSmrg	}
29749bd392adSmrg}
29759bd392adSmrg
29769bd392adSmrgstatic int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
29779bd392adSmrg{
29789bd392adSmrg	struct amdgpu_test_shader *shader;
29799bd392adSmrg	int i, loop = 0x40000;
29809bd392adSmrg
29819bd392adSmrg	switch (family) {
29829bd392adSmrg		case AMDGPU_FAMILY_AI:
29839bd392adSmrg		case AMDGPU_FAMILY_RV:
29849bd392adSmrg			shader = &memcpy_ps_hang_slow_ai;
29859bd392adSmrg			break;
29869bd392adSmrg		default:
29879bd392adSmrg			return -1;
29889bd392adSmrg			break;
29895324fb0dSmrg	}
29909bd392adSmrg
29919bd392adSmrg	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
29929bd392adSmrg
29939bd392adSmrg	for (i = 0; i < loop; i++)
29949bd392adSmrg		memcpy(ptr + shader->header_length + shader->body_length * i,
29959bd392adSmrg			shader->shader + shader->header_length,
29969bd392adSmrg			shader->body_length * sizeof(uint32_t));
29979bd392adSmrg
29989bd392adSmrg	memcpy(ptr + shader->header_length + shader->body_length * loop,
29999bd392adSmrg		shader->shader + shader->header_length + shader->body_length,
30009bd392adSmrg		shader->foot_length * sizeof(uint32_t));
30019bd392adSmrg
30029bd392adSmrg	return 0;
30035324fb0dSmrg}
30045324fb0dSmrg
30055324fb0dSmrgstatic int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
30065324fb0dSmrg{
30075324fb0dSmrg	int i;
30085324fb0dSmrg	uint32_t shader_offset= 256;
30095324fb0dSmrg	uint32_t mem_offset, patch_code_offset;
30105324fb0dSmrg	uint32_t shader_size, patchinfo_code_size;
30115324fb0dSmrg	const uint32_t *shader;
30125324fb0dSmrg	const uint32_t *patchinfo_code;
30135324fb0dSmrg	const uint32_t *patchcode_offset;
30145324fb0dSmrg
30155324fb0dSmrg	switch (ps_type) {
30165324fb0dSmrg		case PS_CONST:
30175324fb0dSmrg			shader = ps_const_shader_gfx9;
30185324fb0dSmrg			shader_size = sizeof(ps_const_shader_gfx9);
30195324fb0dSmrg			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
30205324fb0dSmrg			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
30215324fb0dSmrg			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
30225324fb0dSmrg			break;
30235324fb0dSmrg		case PS_TEX:
30245324fb0dSmrg			shader = ps_tex_shader_gfx9;
30255324fb0dSmrg			shader_size = sizeof(ps_tex_shader_gfx9);
30265324fb0dSmrg			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
30275324fb0dSmrg			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
30285324fb0dSmrg			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
30295324fb0dSmrg			break;
30309bd392adSmrg		case PS_HANG:
30319bd392adSmrg			shader = memcpy_ps_hang;
30329bd392adSmrg			shader_size = sizeof(memcpy_ps_hang);
30339bd392adSmrg
30349bd392adSmrg			memcpy(ptr, shader, shader_size);
30359bd392adSmrg			return 0;
30365324fb0dSmrg		default:
30375324fb0dSmrg			return -1;
30385324fb0dSmrg			break;
30395324fb0dSmrg	}
30405324fb0dSmrg
30415324fb0dSmrg	/* write main shader program */
30425324fb0dSmrg	for (i = 0 ; i < 10; i++) {
30435324fb0dSmrg		mem_offset = i * shader_offset;
30445324fb0dSmrg		memcpy(ptr + mem_offset, shader, shader_size);
30455324fb0dSmrg	}
30465324fb0dSmrg
30475324fb0dSmrg	/* overwrite patch codes */
30485324fb0dSmrg	for (i = 0 ; i < 10; i++) {
30495324fb0dSmrg		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
30505324fb0dSmrg		patch_code_offset = i * patchinfo_code_size;
30515324fb0dSmrg		memcpy(ptr + mem_offset,
30525324fb0dSmrg			patchinfo_code + patch_code_offset,
30535324fb0dSmrg			patchinfo_code_size * sizeof(uint32_t));
30545324fb0dSmrg	}
30555324fb0dSmrg
30565324fb0dSmrg	return 0;
30575324fb0dSmrg}
30585324fb0dSmrg
30595324fb0dSmrg/* load RectPosTexFast_VS */
30605324fb0dSmrgstatic int amdgpu_draw_load_vs_shader(uint8_t *ptr)
30615324fb0dSmrg{
30625324fb0dSmrg	const uint32_t *shader;
30635324fb0dSmrg	uint32_t shader_size;
30645324fb0dSmrg
30655324fb0dSmrg	shader = vs_RectPosTexFast_shader_gfx9;
30665324fb0dSmrg	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
30675324fb0dSmrg
30685324fb0dSmrg	memcpy(ptr, shader, shader_size);
30695324fb0dSmrg
30705324fb0dSmrg	return 0;
30715324fb0dSmrg}
30725324fb0dSmrg
30735324fb0dSmrgstatic int amdgpu_draw_init(uint32_t *ptr)
30745324fb0dSmrg{
30755324fb0dSmrg	int i = 0;
30765324fb0dSmrg	const uint32_t *preamblecache_ptr;
30775324fb0dSmrg	uint32_t preamblecache_size;
30785324fb0dSmrg
30795324fb0dSmrg	/* Write context control and load shadowing register if necessary */
30805324fb0dSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
30815324fb0dSmrg	ptr[i++] = 0x80000000;
30825324fb0dSmrg	ptr[i++] = 0x80000000;
30835324fb0dSmrg
30845324fb0dSmrg	preamblecache_ptr = preamblecache_gfx9;
30855324fb0dSmrg	preamblecache_size = sizeof(preamblecache_gfx9);
30865324fb0dSmrg
30875324fb0dSmrg	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
30885324fb0dSmrg	return i + preamblecache_size/sizeof(uint32_t);
30895324fb0dSmrg}
30905324fb0dSmrg
30915324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
30929bd392adSmrg							 uint64_t dst_addr,
30939bd392adSmrg							 int hang_slow)
30945324fb0dSmrg{
30955324fb0dSmrg	int i = 0;
30965324fb0dSmrg
30975324fb0dSmrg	/* setup color buffer */
30985324fb0dSmrg	/* offset   reg
30995324fb0dSmrg	   0xA318   CB_COLOR0_BASE
31005324fb0dSmrg	   0xA319   CB_COLOR0_BASE_EXT
31015324fb0dSmrg	   0xA31A   CB_COLOR0_ATTRIB2
31025324fb0dSmrg	   0xA31B   CB_COLOR0_VIEW
31035324fb0dSmrg	   0xA31C   CB_COLOR0_INFO
31045324fb0dSmrg	   0xA31D   CB_COLOR0_ATTRIB
31055324fb0dSmrg	   0xA31E   CB_COLOR0_DCC_CONTROL
31065324fb0dSmrg	   0xA31F   CB_COLOR0_CMASK
31075324fb0dSmrg	   0xA320   CB_COLOR0_CMASK_BASE_EXT
31085324fb0dSmrg	   0xA321   CB_COLOR0_FMASK
31095324fb0dSmrg	   0xA322   CB_COLOR0_FMASK_BASE_EXT
31105324fb0dSmrg	   0xA323   CB_COLOR0_CLEAR_WORD0
31115324fb0dSmrg	   0xA324   CB_COLOR0_CLEAR_WORD1
31125324fb0dSmrg	   0xA325   CB_COLOR0_DCC_BASE
31135324fb0dSmrg	   0xA326   CB_COLOR0_DCC_BASE_EXT */
31145324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
31155324fb0dSmrg	ptr[i++] = 0x318;
31165324fb0dSmrg	ptr[i++] = dst_addr >> 8;
31175324fb0dSmrg	ptr[i++] = dst_addr >> 40;
31189bd392adSmrg	ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
31195324fb0dSmrg	ptr[i++] = 0;
31205324fb0dSmrg	ptr[i++] = 0x50438;
31215324fb0dSmrg	ptr[i++] = 0x10140000;
31225324fb0dSmrg	i += 9;
31235324fb0dSmrg
31245324fb0dSmrg	/* mmCB_MRT0_EPITCH */
31255324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31265324fb0dSmrg	ptr[i++] = 0x1e8;
31279bd392adSmrg	ptr[i++] = hang_slow ? 0x7ff : 0x1f;
31285324fb0dSmrg
31295324fb0dSmrg	/* 0xA32B   CB_COLOR1_BASE */
31305324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31315324fb0dSmrg	ptr[i++] = 0x32b;
31325324fb0dSmrg	ptr[i++] = 0;
31335324fb0dSmrg
31345324fb0dSmrg	/* 0xA33A   CB_COLOR1_BASE */
31355324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31365324fb0dSmrg	ptr[i++] = 0x33a;
31375324fb0dSmrg	ptr[i++] = 0;
31385324fb0dSmrg
31395324fb0dSmrg	/* SPI_SHADER_COL_FORMAT */
31405324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31415324fb0dSmrg	ptr[i++] = 0x1c5;
31425324fb0dSmrg	ptr[i++] = 9;
31435324fb0dSmrg
31445324fb0dSmrg	/* Setup depth buffer */
31455324fb0dSmrg	/* mmDB_Z_INFO */
31465324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
31475324fb0dSmrg	ptr[i++] = 0xe;
31485324fb0dSmrg	i += 2;
31495324fb0dSmrg
31505324fb0dSmrg	return i;
31515324fb0dSmrg}
31525324fb0dSmrg
31539bd392adSmrgstatic int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
31545324fb0dSmrg{
31555324fb0dSmrg	int i = 0;
31565324fb0dSmrg	const uint32_t *cached_cmd_ptr;
31575324fb0dSmrg	uint32_t cached_cmd_size;
31585324fb0dSmrg
31595324fb0dSmrg	/* mmPA_SC_TILE_STEERING_OVERRIDE */
31605324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31615324fb0dSmrg	ptr[i++] = 0xd7;
31625324fb0dSmrg	ptr[i++] = 0;
31635324fb0dSmrg
31645324fb0dSmrg	ptr[i++] = 0xffff1000;
31655324fb0dSmrg	ptr[i++] = 0xc0021000;
31665324fb0dSmrg
31675324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
31685324fb0dSmrg	ptr[i++] = 0xd7;
31695324fb0dSmrg	ptr[i++] = 1;
31705324fb0dSmrg
31715324fb0dSmrg	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
31725324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
31735324fb0dSmrg	ptr[i++] = 0x2fe;
31745324fb0dSmrg	i += 16;
31755324fb0dSmrg
31765324fb0dSmrg	/* mmPA_SC_CENTROID_PRIORITY_0 */
31775324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
31785324fb0dSmrg	ptr[i++] = 0x2f5;
31795324fb0dSmrg	i += 2;
31805324fb0dSmrg
31815324fb0dSmrg	cached_cmd_ptr = cached_cmd_gfx9;
31825324fb0dSmrg	cached_cmd_size = sizeof(cached_cmd_gfx9);
31835324fb0dSmrg
31845324fb0dSmrg	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
31859bd392adSmrg	if (hang_slow)
31869bd392adSmrg		*(ptr + i + 12) = 0x8000800;
31875324fb0dSmrg	i += cached_cmd_size/sizeof(uint32_t);
31885324fb0dSmrg
31895324fb0dSmrg	return i;
31905324fb0dSmrg}
31915324fb0dSmrg
31925324fb0dSmrgstatic int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
31935324fb0dSmrg						  int ps_type,
31949bd392adSmrg						  uint64_t shader_addr,
31959bd392adSmrg						  int hang_slow)
31965324fb0dSmrg{
31975324fb0dSmrg	int i = 0;
31985324fb0dSmrg
31995324fb0dSmrg	/* mmPA_CL_VS_OUT_CNTL */
32005324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
32015324fb0dSmrg	ptr[i++] = 0x207;
32025324fb0dSmrg	ptr[i++] = 0;
32035324fb0dSmrg
32045324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC3_VS */
32055324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
32065324fb0dSmrg	ptr[i++] = 0x46;
32075324fb0dSmrg	ptr[i++] = 0xffff;
32085324fb0dSmrg
32095324fb0dSmrg	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
32105324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
32115324fb0dSmrg	ptr[i++] = 0x48;
32125324fb0dSmrg	ptr[i++] = shader_addr >> 8;
32135324fb0dSmrg	ptr[i++] = shader_addr >> 40;
32145324fb0dSmrg
32155324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC1_VS */
32165324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
32175324fb0dSmrg	ptr[i++] = 0x4a;
32185324fb0dSmrg	ptr[i++] = 0xc0081;
32195324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC2_VS */
32205324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
32215324fb0dSmrg	ptr[i++] = 0x4b;
32225324fb0dSmrg	ptr[i++] = 0x18;
32235324fb0dSmrg
32245324fb0dSmrg	/* mmSPI_VS_OUT_CONFIG */
32255324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
32265324fb0dSmrg	ptr[i++] = 0x1b1;
32275324fb0dSmrg	ptr[i++] = 2;
32285324fb0dSmrg
32295324fb0dSmrg	/* mmSPI_SHADER_POS_FORMAT */
32305324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
32315324fb0dSmrg	ptr[i++] = 0x1c3;
32325324fb0dSmrg	ptr[i++] = 4;
32335324fb0dSmrg
32345324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
32355324fb0dSmrg	ptr[i++] = 0x4c;
32365324fb0dSmrg	i += 2;
32379bd392adSmrg	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
32389bd392adSmrg	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
32395324fb0dSmrg
32405324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
32415324fb0dSmrg	ptr[i++] = 0x50;
32425324fb0dSmrg	i += 2;
32435324fb0dSmrg	if (ps_type == PS_CONST) {
32445324fb0dSmrg		i += 2;
32455324fb0dSmrg	} else if (ps_type == PS_TEX) {
32465324fb0dSmrg		ptr[i++] = 0x3f800000;
32475324fb0dSmrg		ptr[i++] = 0x3f800000;
32485324fb0dSmrg	}
32495324fb0dSmrg
32505324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
32515324fb0dSmrg	ptr[i++] = 0x54;
32525324fb0dSmrg	i += 4;
32535324fb0dSmrg
32545324fb0dSmrg	return i;
32555324fb0dSmrg}
32565324fb0dSmrg
32575324fb0dSmrgstatic int amdgpu_draw_ps_write2hw(uint32_t *ptr,
32585324fb0dSmrg				   int ps_type,
32595324fb0dSmrg				   uint64_t shader_addr)
32605324fb0dSmrg{
32615324fb0dSmrg	int i, j;
32625324fb0dSmrg	const uint32_t *sh_registers;
32635324fb0dSmrg	const uint32_t *context_registers;
32645324fb0dSmrg	uint32_t num_sh_reg, num_context_reg;
32655324fb0dSmrg
32665324fb0dSmrg	if (ps_type == PS_CONST) {
32675324fb0dSmrg		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
32685324fb0dSmrg		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
32695324fb0dSmrg		num_sh_reg = ps_num_sh_registers_gfx9;
32705324fb0dSmrg		num_context_reg = ps_num_context_registers_gfx9;
32715324fb0dSmrg	} else if (ps_type == PS_TEX) {
32725324fb0dSmrg		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
32735324fb0dSmrg		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
32745324fb0dSmrg		num_sh_reg = ps_num_sh_registers_gfx9;
32755324fb0dSmrg		num_context_reg = ps_num_context_registers_gfx9;
32765324fb0dSmrg	}
32775324fb0dSmrg
32785324fb0dSmrg	i = 0;
32795324fb0dSmrg
32805324fb0dSmrg	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
32815324fb0dSmrg	   0x2c08   SPI_SHADER_PGM_LO_PS
32825324fb0dSmrg	   0x2c09   SPI_SHADER_PGM_HI_PS */
32835324fb0dSmrg	shader_addr += 256 * 9;
32845324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
32855324fb0dSmrg	ptr[i++] = 0x7;
32865324fb0dSmrg	ptr[i++] = 0xffff;
32875324fb0dSmrg	ptr[i++] = shader_addr >> 8;
32885324fb0dSmrg	ptr[i++] = shader_addr >> 40;
32895324fb0dSmrg
32905324fb0dSmrg	for (j = 0; j < num_sh_reg; j++) {
32915324fb0dSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
32925324fb0dSmrg		ptr[i++] = sh_registers[j * 2] - 0x2c00;
32935324fb0dSmrg		ptr[i++] = sh_registers[j * 2 + 1];
32945324fb0dSmrg	}
32955324fb0dSmrg
32965324fb0dSmrg	for (j = 0; j < num_context_reg; j++) {
32975324fb0dSmrg		if (context_registers[j * 2] != 0xA1C5) {
32985324fb0dSmrg			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
32995324fb0dSmrg			ptr[i++] = context_registers[j * 2] - 0xa000;
33005324fb0dSmrg			ptr[i++] = context_registers[j * 2 + 1];
33015324fb0dSmrg		}
33025324fb0dSmrg
33035324fb0dSmrg		if (context_registers[j * 2] == 0xA1B4) {
33045324fb0dSmrg			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
33055324fb0dSmrg			ptr[i++] = 0x1b3;
33065324fb0dSmrg			ptr[i++] = 2;
33075324fb0dSmrg		}
33085324fb0dSmrg	}
33095324fb0dSmrg
33105324fb0dSmrg	return i;
33115324fb0dSmrg}
33125324fb0dSmrg
33135324fb0dSmrgstatic int amdgpu_draw_draw(uint32_t *ptr)
33145324fb0dSmrg{
33155324fb0dSmrg	int i = 0;
33165324fb0dSmrg
33175324fb0dSmrg	/* mmIA_MULTI_VGT_PARAM */
33185324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
33195324fb0dSmrg	ptr[i++] = 0x40000258;
33205324fb0dSmrg	ptr[i++] = 0xd00ff;
33215324fb0dSmrg
33225324fb0dSmrg	/* mmVGT_PRIMITIVE_TYPE */
33235324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
33245324fb0dSmrg	ptr[i++] = 0x10000242;
33255324fb0dSmrg	ptr[i++] = 0x11;
33265324fb0dSmrg
33275324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
33285324fb0dSmrg	ptr[i++] = 3;
33295324fb0dSmrg	ptr[i++] = 2;
33305324fb0dSmrg
33315324fb0dSmrg	return i;
33325324fb0dSmrg}
33335324fb0dSmrg
33345324fb0dSmrgvoid amdgpu_memset_draw(amdgpu_device_handle device_handle,
33355324fb0dSmrg			amdgpu_bo_handle bo_shader_ps,
33365324fb0dSmrg			amdgpu_bo_handle bo_shader_vs,
33375324fb0dSmrg			uint64_t mc_address_shader_ps,
33385324fb0dSmrg			uint64_t mc_address_shader_vs,
33395324fb0dSmrg			uint32_t ring_id)
33405324fb0dSmrg{
33415324fb0dSmrg	amdgpu_context_handle context_handle;
33425324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
33435324fb0dSmrg	volatile unsigned char *ptr_dst;
33445324fb0dSmrg	uint32_t *ptr_cmd;
33455324fb0dSmrg	uint64_t mc_address_dst, mc_address_cmd;
33465324fb0dSmrg	amdgpu_va_handle va_dst, va_cmd;
33475324fb0dSmrg	int i, r;
33485324fb0dSmrg	int bo_dst_size = 16384;
33495324fb0dSmrg	int bo_cmd_size = 4096;
33505324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
33515324fb0dSmrg	struct amdgpu_cs_ib_info ib_info = {0};
33525324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
33535324fb0dSmrg	uint32_t expired;
33545324fb0dSmrg	amdgpu_bo_list_handle bo_list;
33555324fb0dSmrg
33565324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
33575324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33585324fb0dSmrg
33595324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
33605324fb0dSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
33615324fb0dSmrg					&bo_cmd, (void **)&ptr_cmd,
33625324fb0dSmrg					&mc_address_cmd, &va_cmd);
33635324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33645324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
33655324fb0dSmrg
33665324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
33675324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
33685324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
33695324fb0dSmrg					&mc_address_dst, &va_dst);
33705324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
33715324fb0dSmrg
33725324fb0dSmrg	i = 0;
33735324fb0dSmrg	i += amdgpu_draw_init(ptr_cmd + i);
33745324fb0dSmrg
33759bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
33765324fb0dSmrg
33779bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
33785324fb0dSmrg
33799bd392adSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
33805324fb0dSmrg
33815324fb0dSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
33825324fb0dSmrg
33835324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
33845324fb0dSmrg	ptr_cmd[i++] = 0xc;
33855324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
33865324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
33875324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
33885324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
33895324fb0dSmrg
33905324fb0dSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
33915324fb0dSmrg
33925324fb0dSmrg	while (i & 7)
33935324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
33945324fb0dSmrg
33955324fb0dSmrg	resources[0] = bo_dst;
33965324fb0dSmrg	resources[1] = bo_shader_ps;
33975324fb0dSmrg	resources[2] = bo_shader_vs;
33985324fb0dSmrg	resources[3] = bo_cmd;
33999bd392adSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
34005324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34015324fb0dSmrg
34025324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
34035324fb0dSmrg	ib_info.size = i;
34045324fb0dSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
34055324fb0dSmrg	ibs_request.ring = ring_id;
34065324fb0dSmrg	ibs_request.resources = bo_list;
34075324fb0dSmrg	ibs_request.number_of_ibs = 1;
34085324fb0dSmrg	ibs_request.ibs = &ib_info;
34095324fb0dSmrg	ibs_request.fence_info.handle = NULL;
34105324fb0dSmrg
34115324fb0dSmrg	/* submit CS */
34125324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
34135324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34145324fb0dSmrg
34155324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
34165324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34175324fb0dSmrg
34185324fb0dSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
34195324fb0dSmrg	fence_status.ip_instance = 0;
34205324fb0dSmrg	fence_status.ring = ring_id;
34215324fb0dSmrg	fence_status.context = context_handle;
34225324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
34235324fb0dSmrg
34245324fb0dSmrg	/* wait for IB accomplished */
34255324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
34265324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
34275324fb0dSmrg					 0, &expired);
34285324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34295324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
34305324fb0dSmrg
34315324fb0dSmrg	/* verify if memset test result meets with expected */
34325324fb0dSmrg	i = 0;
34335324fb0dSmrg	while(i < bo_dst_size) {
34345324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
34355324fb0dSmrg	}
34365324fb0dSmrg
34375324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
34385324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34395324fb0dSmrg
34405324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
34415324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34425324fb0dSmrg
34435324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
34445324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34455324fb0dSmrg}
34465324fb0dSmrg
34475324fb0dSmrgstatic void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
34485324fb0dSmrg				    uint32_t ring)
34495324fb0dSmrg{
34505324fb0dSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
34515324fb0dSmrg	void *ptr_shader_ps;
34525324fb0dSmrg	void *ptr_shader_vs;
34535324fb0dSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
34545324fb0dSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
34555324fb0dSmrg	int r;
34565324fb0dSmrg	int bo_shader_size = 4096;
34575324fb0dSmrg
34585324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
34595324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
34605324fb0dSmrg					&bo_shader_ps, &ptr_shader_ps,
34615324fb0dSmrg					&mc_address_shader_ps, &va_shader_ps);
34625324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
346388f8a8d2Smrg	memset(ptr_shader_ps, 0, bo_shader_size);
34645324fb0dSmrg
34655324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
34665324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
34675324fb0dSmrg					&bo_shader_vs, &ptr_shader_vs,
34685324fb0dSmrg					&mc_address_shader_vs, &va_shader_vs);
34695324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
347088f8a8d2Smrg	memset(ptr_shader_vs, 0, bo_shader_size);
34715324fb0dSmrg
34725324fb0dSmrg	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
34735324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34745324fb0dSmrg
34755324fb0dSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
34765324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34775324fb0dSmrg
34785324fb0dSmrg	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
34795324fb0dSmrg			mc_address_shader_ps, mc_address_shader_vs, ring);
34805324fb0dSmrg
34815324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
34825324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34835324fb0dSmrg
34845324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
34855324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
34865324fb0dSmrg}
34875324fb0dSmrg
34885324fb0dSmrgstatic void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
34895324fb0dSmrg			       amdgpu_bo_handle bo_shader_ps,
34905324fb0dSmrg			       amdgpu_bo_handle bo_shader_vs,
34915324fb0dSmrg			       uint64_t mc_address_shader_ps,
34925324fb0dSmrg			       uint64_t mc_address_shader_vs,
34939bd392adSmrg			       uint32_t ring, int hang)
34945324fb0dSmrg{
34955324fb0dSmrg	amdgpu_context_handle context_handle;
34965324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
34975324fb0dSmrg	volatile unsigned char *ptr_dst;
34985324fb0dSmrg	unsigned char *ptr_src;
34995324fb0dSmrg	uint32_t *ptr_cmd;
35005324fb0dSmrg	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
35015324fb0dSmrg	amdgpu_va_handle va_dst, va_src, va_cmd;
35025324fb0dSmrg	int i, r;
35035324fb0dSmrg	int bo_size = 16384;
35045324fb0dSmrg	int bo_cmd_size = 4096;
35055324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
35065324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
35079bd392adSmrg	uint32_t hang_state, hangs;
35089bd392adSmrg	uint32_t expired;
35095324fb0dSmrg	amdgpu_bo_list_handle bo_list;
35105324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
35115324fb0dSmrg
35125324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
35135324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
35145324fb0dSmrg
35155324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
35165324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
35175324fb0dSmrg				    &bo_cmd, (void **)&ptr_cmd,
35185324fb0dSmrg				    &mc_address_cmd, &va_cmd);
35195324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
35205324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
35215324fb0dSmrg
35225324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
35235324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
35245324fb0dSmrg					&bo_src, (void **)&ptr_src,
35255324fb0dSmrg					&mc_address_src, &va_src);
35265324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
35275324fb0dSmrg
35285324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
35295324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
35305324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
35315324fb0dSmrg					&mc_address_dst, &va_dst);
35325324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
35335324fb0dSmrg
35345324fb0dSmrg	memset(ptr_src, 0x55, bo_size);
35355324fb0dSmrg
35365324fb0dSmrg	i = 0;
35375324fb0dSmrg	i += amdgpu_draw_init(ptr_cmd + i);
35385324fb0dSmrg
35399bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
35405324fb0dSmrg
35419bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
35425324fb0dSmrg
35439bd392adSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
35445324fb0dSmrg
35455324fb0dSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
35465324fb0dSmrg
35475324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
35485324fb0dSmrg	ptr_cmd[i++] = 0xc;
35495324fb0dSmrg	ptr_cmd[i++] = mc_address_src >> 8;
35505324fb0dSmrg	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
35515324fb0dSmrg	ptr_cmd[i++] = 0x7c01f;
35525324fb0dSmrg	ptr_cmd[i++] = 0x90500fac;
35535324fb0dSmrg	ptr_cmd[i++] = 0x3e000;
35545324fb0dSmrg	i += 3;
35555324fb0dSmrg
35565324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
35575324fb0dSmrg	ptr_cmd[i++] = 0x14;
35585324fb0dSmrg	ptr_cmd[i++] = 0x92;
35595324fb0dSmrg	i += 3;
35605324fb0dSmrg
356188f8a8d2Smrg	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
35625324fb0dSmrg	ptr_cmd[i++] = 0x191;
35635324fb0dSmrg	ptr_cmd[i++] = 0;
35645324fb0dSmrg
35655324fb0dSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
35665324fb0dSmrg
35675324fb0dSmrg	while (i & 7)
35685324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
35695324fb0dSmrg
35705324fb0dSmrg	resources[0] = bo_dst;
35715324fb0dSmrg	resources[1] = bo_src;
35725324fb0dSmrg	resources[2] = bo_shader_ps;
35735324fb0dSmrg	resources[3] = bo_shader_vs;
35745324fb0dSmrg	resources[4] = bo_cmd;
35755324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
35765324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
35775324fb0dSmrg
35785324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
35795324fb0dSmrg	ib_info.size = i;
35805324fb0dSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
35815324fb0dSmrg	ibs_request.ring = ring;
35825324fb0dSmrg	ibs_request.resources = bo_list;
35835324fb0dSmrg	ibs_request.number_of_ibs = 1;
35845324fb0dSmrg	ibs_request.ibs = &ib_info;
35855324fb0dSmrg	ibs_request.fence_info.handle = NULL;
35865324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
35875324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
35885324fb0dSmrg
35895324fb0dSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
35905324fb0dSmrg	fence_status.ip_instance = 0;
35915324fb0dSmrg	fence_status.ring = ring;
35925324fb0dSmrg	fence_status.context = context_handle;
35935324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
35945324fb0dSmrg
35955324fb0dSmrg	/* wait for IB accomplished */
35965324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
35975324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
35985324fb0dSmrg					 0, &expired);
35999bd392adSmrg	if (!hang) {
36009bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
36019bd392adSmrg		CU_ASSERT_EQUAL(expired, true);
36025324fb0dSmrg
36039bd392adSmrg		/* verify if memcpy test result meets with expected */
36049bd392adSmrg		i = 0;
36059bd392adSmrg		while(i < bo_size) {
36069bd392adSmrg			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
36079bd392adSmrg			i++;
36089bd392adSmrg		}
36099bd392adSmrg	} else {
36109bd392adSmrg		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
36119bd392adSmrg		CU_ASSERT_EQUAL(r, 0);
36129bd392adSmrg		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
36135324fb0dSmrg	}
36145324fb0dSmrg
36155324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
36165324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36175324fb0dSmrg
36185324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
36195324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36205324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
36215324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36225324fb0dSmrg
36235324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
36245324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36255324fb0dSmrg
36265324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
36275324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36285324fb0dSmrg}
36295324fb0dSmrg
36309bd392adSmrgvoid amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
36319bd392adSmrg			     int hang)
36325324fb0dSmrg{
36335324fb0dSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
36345324fb0dSmrg	void *ptr_shader_ps;
36355324fb0dSmrg	void *ptr_shader_vs;
36365324fb0dSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
36375324fb0dSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
36385324fb0dSmrg	int bo_shader_size = 4096;
36399bd392adSmrg	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
36405324fb0dSmrg	int r;
36415324fb0dSmrg
36425324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
36435324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
36445324fb0dSmrg					&bo_shader_ps, &ptr_shader_ps,
36455324fb0dSmrg					&mc_address_shader_ps, &va_shader_ps);
36465324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
364788f8a8d2Smrg	memset(ptr_shader_ps, 0, bo_shader_size);
36485324fb0dSmrg
36495324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
36505324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
36515324fb0dSmrg					&bo_shader_vs, &ptr_shader_vs,
36525324fb0dSmrg					&mc_address_shader_vs, &va_shader_vs);
36535324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
365488f8a8d2Smrg	memset(ptr_shader_vs, 0, bo_shader_size);
36555324fb0dSmrg
36569bd392adSmrg	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
36575324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36585324fb0dSmrg
36595324fb0dSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
36605324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36615324fb0dSmrg
36625324fb0dSmrg	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
36639bd392adSmrg			mc_address_shader_ps, mc_address_shader_vs, ring, hang);
36645324fb0dSmrg
36655324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
36665324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36675324fb0dSmrg
36685324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
36695324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
36705324fb0dSmrg}
36715324fb0dSmrg
36725324fb0dSmrgstatic void amdgpu_draw_test(void)
36735324fb0dSmrg{
36745324fb0dSmrg	int r;
36755324fb0dSmrg	struct drm_amdgpu_info_hw_ip info;
36765324fb0dSmrg	uint32_t ring_id;
36775324fb0dSmrg
36785324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
36795324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
368088f8a8d2Smrg	if (!info.available_rings)
368188f8a8d2Smrg		printf("SKIP ... as there's no graphics ring\n");
36825324fb0dSmrg
36835324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
36845324fb0dSmrg		amdgpu_memset_draw_test(device_handle, ring_id);
36859bd392adSmrg		amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
36865324fb0dSmrg	}
36875324fb0dSmrg}
368888f8a8d2Smrg
36899bd392adSmrgvoid amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
36909bd392adSmrg{
36919bd392adSmrg	amdgpu_context_handle context_handle;
36929bd392adSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
36939bd392adSmrg	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
36949bd392adSmrg	void *ptr_shader_ps;
36959bd392adSmrg	void *ptr_shader_vs;
36969bd392adSmrg	volatile unsigned char *ptr_dst;
36979bd392adSmrg	unsigned char *ptr_src;
36989bd392adSmrg	uint32_t *ptr_cmd;
36999bd392adSmrg	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
37009bd392adSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
37019bd392adSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
37029bd392adSmrg	amdgpu_va_handle va_dst, va_src, va_cmd;
37039bd392adSmrg	struct amdgpu_gpu_info gpu_info = {0};
37049bd392adSmrg	int i, r;
37059bd392adSmrg	int bo_size = 0x4000000;
37069bd392adSmrg	int bo_shader_ps_size = 0x400000;
37079bd392adSmrg	int bo_shader_vs_size = 4096;
37089bd392adSmrg	int bo_cmd_size = 4096;
37099bd392adSmrg	struct amdgpu_cs_request ibs_request = {0};
37109bd392adSmrg	struct amdgpu_cs_ib_info ib_info= {0};
37119bd392adSmrg	uint32_t hang_state, hangs, expired;
37129bd392adSmrg	amdgpu_bo_list_handle bo_list;
37139bd392adSmrg	struct amdgpu_cs_fence fence_status = {0};
37149bd392adSmrg
37159bd392adSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
37169bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37179bd392adSmrg
37189bd392adSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
37199bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37209bd392adSmrg
37219bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
37229bd392adSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
37239bd392adSmrg				    &bo_cmd, (void **)&ptr_cmd,
37249bd392adSmrg				    &mc_address_cmd, &va_cmd);
37259bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37269bd392adSmrg	memset(ptr_cmd, 0, bo_cmd_size);
37279bd392adSmrg
37289bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
37299bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
37309bd392adSmrg					&bo_shader_ps, &ptr_shader_ps,
37319bd392adSmrg					&mc_address_shader_ps, &va_shader_ps);
37329bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37339bd392adSmrg	memset(ptr_shader_ps, 0, bo_shader_ps_size);
37349bd392adSmrg
37359bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
37369bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
37379bd392adSmrg					&bo_shader_vs, &ptr_shader_vs,
37389bd392adSmrg					&mc_address_shader_vs, &va_shader_vs);
37399bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37409bd392adSmrg	memset(ptr_shader_vs, 0, bo_shader_vs_size);
37419bd392adSmrg
37429bd392adSmrg	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
37439bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37449bd392adSmrg
37459bd392adSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
37469bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37479bd392adSmrg
37489bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
37499bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
37509bd392adSmrg					&bo_src, (void **)&ptr_src,
37519bd392adSmrg					&mc_address_src, &va_src);
37529bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37539bd392adSmrg
37549bd392adSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
37559bd392adSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
37569bd392adSmrg					&bo_dst, (void **)&ptr_dst,
37579bd392adSmrg					&mc_address_dst, &va_dst);
37589bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
37599bd392adSmrg
37609bd392adSmrg	memset(ptr_src, 0x55, bo_size);
37619bd392adSmrg
37629bd392adSmrg	i = 0;
37639bd392adSmrg	i += amdgpu_draw_init(ptr_cmd + i);
37649bd392adSmrg
37659bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
37669bd392adSmrg
37679bd392adSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
37689bd392adSmrg
37699bd392adSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
37709bd392adSmrg							mc_address_shader_vs, 1);
37719bd392adSmrg
37729bd392adSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
37739bd392adSmrg
37749bd392adSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
37759bd392adSmrg	ptr_cmd[i++] = 0xc;
37769bd392adSmrg	ptr_cmd[i++] = mc_address_src >> 8;
37779bd392adSmrg	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
37789bd392adSmrg	ptr_cmd[i++] = 0x1ffc7ff;
37799bd392adSmrg	ptr_cmd[i++] = 0x90500fac;
37809bd392adSmrg	ptr_cmd[i++] = 0xffe000;
37819bd392adSmrg	i += 3;
37829bd392adSmrg
37839bd392adSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
37849bd392adSmrg	ptr_cmd[i++] = 0x14;
37859bd392adSmrg	ptr_cmd[i++] = 0x92;
37869bd392adSmrg	i += 3;
37879bd392adSmrg
37889bd392adSmrg	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
37899bd392adSmrg	ptr_cmd[i++] = 0x191;
37909bd392adSmrg	ptr_cmd[i++] = 0;
37919bd392adSmrg
37929bd392adSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
37939bd392adSmrg
37949bd392adSmrg	while (i & 7)
37959bd392adSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
37969bd392adSmrg
37979bd392adSmrg	resources[0] = bo_dst;
37989bd392adSmrg	resources[1] = bo_src;
37999bd392adSmrg	resources[2] = bo_shader_ps;
38009bd392adSmrg	resources[3] = bo_shader_vs;
38019bd392adSmrg	resources[4] = bo_cmd;
38029bd392adSmrg	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
38039bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38049bd392adSmrg
38059bd392adSmrg	ib_info.ib_mc_address = mc_address_cmd;
38069bd392adSmrg	ib_info.size = i;
38079bd392adSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
38089bd392adSmrg	ibs_request.ring = ring;
38099bd392adSmrg	ibs_request.resources = bo_list;
38109bd392adSmrg	ibs_request.number_of_ibs = 1;
38119bd392adSmrg	ibs_request.ibs = &ib_info;
38129bd392adSmrg	ibs_request.fence_info.handle = NULL;
38139bd392adSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
38149bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38159bd392adSmrg
38169bd392adSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
38179bd392adSmrg	fence_status.ip_instance = 0;
38189bd392adSmrg	fence_status.ring = ring;
38199bd392adSmrg	fence_status.context = context_handle;
38209bd392adSmrg	fence_status.fence = ibs_request.seq_no;
38219bd392adSmrg
38229bd392adSmrg	/* wait for IB accomplished */
38239bd392adSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
38249bd392adSmrg					 AMDGPU_TIMEOUT_INFINITE,
38259bd392adSmrg					 0, &expired);
38269bd392adSmrg
38279bd392adSmrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
38289bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38299bd392adSmrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
38309bd392adSmrg
38319bd392adSmrg	r = amdgpu_bo_list_destroy(bo_list);
38329bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38339bd392adSmrg
38349bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
38359bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38369bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
38379bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38389bd392adSmrg
38399bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
38409bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38419bd392adSmrg
38429bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
38439bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38449bd392adSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
38459bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38469bd392adSmrg
38479bd392adSmrg	r = amdgpu_cs_ctx_free(context_handle);
38489bd392adSmrg	CU_ASSERT_EQUAL(r, 0);
38499bd392adSmrg}
38509bd392adSmrg
385188f8a8d2Smrgstatic void amdgpu_gpu_reset_test(void)
385288f8a8d2Smrg{
385388f8a8d2Smrg	int r;
385488f8a8d2Smrg	char debugfs_path[256], tmp[10];
385588f8a8d2Smrg	int fd;
385688f8a8d2Smrg	struct stat sbuf;
385788f8a8d2Smrg	amdgpu_context_handle context_handle;
385888f8a8d2Smrg	uint32_t hang_state, hangs;
385988f8a8d2Smrg
386088f8a8d2Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
386188f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
386288f8a8d2Smrg
386388f8a8d2Smrg	r = fstat(drm_amdgpu[0], &sbuf);
386488f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
386588f8a8d2Smrg
386688f8a8d2Smrg	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
386788f8a8d2Smrg	fd = open(debugfs_path, O_RDONLY);
386888f8a8d2Smrg	CU_ASSERT(fd >= 0);
386988f8a8d2Smrg
387088f8a8d2Smrg	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
387188f8a8d2Smrg	CU_ASSERT(r > 0);
387288f8a8d2Smrg
387388f8a8d2Smrg	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
387488f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
387588f8a8d2Smrg	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
387688f8a8d2Smrg
387788f8a8d2Smrg	close(fd);
387888f8a8d2Smrg	r = amdgpu_cs_ctx_free(context_handle);
387988f8a8d2Smrg	CU_ASSERT_EQUAL(r, 0);
388088f8a8d2Smrg
388188f8a8d2Smrg	amdgpu_compute_dispatch_test();
388288f8a8d2Smrg	amdgpu_gfx_dispatch_test();
388388f8a8d2Smrg}
3884