basic_tests.c revision 5324fb0d
13f012e29Smrg/*
23f012e29Smrg * Copyright 2014 Advanced Micro Devices, Inc.
33f012e29Smrg *
43f012e29Smrg * Permission is hereby granted, free of charge, to any person obtaining a
53f012e29Smrg * copy of this software and associated documentation files (the "Software"),
63f012e29Smrg * to deal in the Software without restriction, including without limitation
73f012e29Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
83f012e29Smrg * and/or sell copies of the Software, and to permit persons to whom the
93f012e29Smrg * Software is furnished to do so, subject to the following conditions:
103f012e29Smrg *
113f012e29Smrg * The above copyright notice and this permission notice shall be included in
123f012e29Smrg * all copies or substantial portions of the Software.
133f012e29Smrg *
143f012e29Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
153f012e29Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
163f012e29Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
173f012e29Smrg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
183f012e29Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
193f012e29Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
203f012e29Smrg * OTHER DEALINGS IN THE SOFTWARE.
213f012e29Smrg *
223f012e29Smrg*/
233f012e29Smrg
243f012e29Smrg#include <stdio.h>
253f012e29Smrg#include <stdlib.h>
263f012e29Smrg#include <unistd.h>
273f012e29Smrg#ifdef HAVE_ALLOCA_H
283f012e29Smrg# include <alloca.h>
293f012e29Smrg#endif
3000a23bdaSmrg#include <sys/wait.h>
313f012e29Smrg
323f012e29Smrg#include "CUnit/Basic.h"
333f012e29Smrg
343f012e29Smrg#include "amdgpu_test.h"
353f012e29Smrg#include "amdgpu_drm.h"
367cdc0497Smrg#include "util_math.h"
373f012e29Smrg
383f012e29Smrgstatic  amdgpu_device_handle device_handle;
393f012e29Smrgstatic  uint32_t  major_version;
403f012e29Smrgstatic  uint32_t  minor_version;
41d8807b2fSmrgstatic  uint32_t  family_id;
423f012e29Smrg
433f012e29Smrgstatic void amdgpu_query_info_test(void);
443f012e29Smrgstatic void amdgpu_command_submission_gfx(void);
453f012e29Smrgstatic void amdgpu_command_submission_compute(void);
46d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void);
473f012e29Smrgstatic void amdgpu_command_submission_sdma(void);
483f012e29Smrgstatic void amdgpu_userptr_test(void);
493f012e29Smrgstatic void amdgpu_semaphore_test(void);
5000a23bdaSmrgstatic void amdgpu_sync_dependency_test(void);
5100a23bdaSmrgstatic void amdgpu_bo_eviction_test(void);
525324fb0dSmrgstatic void amdgpu_dispatch_test(void);
535324fb0dSmrgstatic void amdgpu_draw_test(void);
543f012e29Smrg
553f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
563f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
573f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
5800a23bdaSmrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
5900a23bdaSmrg				       unsigned ip_type,
6000a23bdaSmrg				       int instance, int pm4_dw, uint32_t *pm4_src,
6100a23bdaSmrg				       int res_cnt, amdgpu_bo_handle *resources,
6200a23bdaSmrg				       struct amdgpu_cs_ib_info *ib_info,
6300a23bdaSmrg				       struct amdgpu_cs_request *ibs_request);
6400a23bdaSmrg
653f012e29SmrgCU_TestInfo basic_tests[] = {
663f012e29Smrg	{ "Query Info Test",  amdgpu_query_info_test },
673f012e29Smrg	{ "Userptr Test",  amdgpu_userptr_test },
6800a23bdaSmrg	{ "bo eviction Test",  amdgpu_bo_eviction_test },
693f012e29Smrg	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
703f012e29Smrg	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
71d8807b2fSmrg	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
723f012e29Smrg	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
733f012e29Smrg	{ "SW semaphore Test",  amdgpu_semaphore_test },
7400a23bdaSmrg	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
755324fb0dSmrg	{ "Dispatch Test",  amdgpu_dispatch_test },
765324fb0dSmrg	{ "Draw Test",  amdgpu_draw_test },
773f012e29Smrg	CU_TEST_INFO_NULL,
783f012e29Smrg};
793f012e29Smrg#define BUFFER_SIZE (8 * 1024)
803f012e29Smrg#define SDMA_PKT_HEADER_op_offset 0
813f012e29Smrg#define SDMA_PKT_HEADER_op_mask   0x000000FF
823f012e29Smrg#define SDMA_PKT_HEADER_op_shift  0
833f012e29Smrg#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
843f012e29Smrg#define SDMA_OPCODE_CONSTANT_FILL  11
853f012e29Smrg#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
863f012e29Smrg	/* 0 = byte fill
873f012e29Smrg	 * 2 = DW fill
883f012e29Smrg	 */
893f012e29Smrg#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
903f012e29Smrg					(((sub_op) & 0xFF) << 8) |	\
913f012e29Smrg					(((op) & 0xFF) << 0))
923f012e29Smrg#define	SDMA_OPCODE_WRITE				  2
933f012e29Smrg#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
943f012e29Smrg#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
953f012e29Smrg
963f012e29Smrg#define	SDMA_OPCODE_COPY				  1
973f012e29Smrg#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
983f012e29Smrg
993f012e29Smrg#define GFX_COMPUTE_NOP  0xffff1000
1003f012e29Smrg#define SDMA_NOP  0x0
1013f012e29Smrg
1023f012e29Smrg/* PM4 */
1033f012e29Smrg#define	PACKET_TYPE0	0
1043f012e29Smrg#define	PACKET_TYPE1	1
1053f012e29Smrg#define	PACKET_TYPE2	2
1063f012e29Smrg#define	PACKET_TYPE3	3
1073f012e29Smrg
1083f012e29Smrg#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
1093f012e29Smrg#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
1103f012e29Smrg#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
1113f012e29Smrg#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
1123f012e29Smrg#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
1133f012e29Smrg			 ((reg) & 0xFFFF) |			\
1143f012e29Smrg			 ((n) & 0x3FFF) << 16)
1153f012e29Smrg#define CP_PACKET2			0x80000000
1163f012e29Smrg#define		PACKET2_PAD_SHIFT		0
1173f012e29Smrg#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
1183f012e29Smrg
1193f012e29Smrg#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
1203f012e29Smrg
1213f012e29Smrg#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
1223f012e29Smrg			 (((op) & 0xFF) << 8) |				\
1233f012e29Smrg			 ((n) & 0x3FFF) << 16)
1245324fb0dSmrg#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
1253f012e29Smrg
1263f012e29Smrg/* Packet 3 types */
1273f012e29Smrg#define	PACKET3_NOP					0x10
1283f012e29Smrg
1293f012e29Smrg#define	PACKET3_WRITE_DATA				0x37
1303f012e29Smrg#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
1313f012e29Smrg		/* 0 - register
1323f012e29Smrg		 * 1 - memory (sync - via GRBM)
1333f012e29Smrg		 * 2 - gl2
1343f012e29Smrg		 * 3 - gds
1353f012e29Smrg		 * 4 - reserved
1363f012e29Smrg		 * 5 - memory (async - direct)
1373f012e29Smrg		 */
1383f012e29Smrg#define		WR_ONE_ADDR                             (1 << 16)
1393f012e29Smrg#define		WR_CONFIRM                              (1 << 20)
1403f012e29Smrg#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
1413f012e29Smrg		/* 0 - LRU
1423f012e29Smrg		 * 1 - Stream
1433f012e29Smrg		 */
1443f012e29Smrg#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
1453f012e29Smrg		/* 0 - me
1463f012e29Smrg		 * 1 - pfp
1473f012e29Smrg		 * 2 - ce
1483f012e29Smrg		 */
1493f012e29Smrg
1503f012e29Smrg#define	PACKET3_DMA_DATA				0x50
1513f012e29Smrg/* 1. header
1523f012e29Smrg * 2. CONTROL
1533f012e29Smrg * 3. SRC_ADDR_LO or DATA [31:0]
1543f012e29Smrg * 4. SRC_ADDR_HI [31:0]
1553f012e29Smrg * 5. DST_ADDR_LO [31:0]
1563f012e29Smrg * 6. DST_ADDR_HI [7:0]
1573f012e29Smrg * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
1583f012e29Smrg */
1593f012e29Smrg/* CONTROL */
1603f012e29Smrg#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
1613f012e29Smrg		/* 0 - ME
1623f012e29Smrg		 * 1 - PFP
1633f012e29Smrg		 */
1643f012e29Smrg#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
1653f012e29Smrg		/* 0 - LRU
1663f012e29Smrg		 * 1 - Stream
1673f012e29Smrg		 * 2 - Bypass
1683f012e29Smrg		 */
1693f012e29Smrg#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
1703f012e29Smrg#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
1713f012e29Smrg		/* 0 - DST_ADDR using DAS
1723f012e29Smrg		 * 1 - GDS
1733f012e29Smrg		 * 3 - DST_ADDR using L2
1743f012e29Smrg		 */
1753f012e29Smrg#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
1763f012e29Smrg		/* 0 - LRU
1773f012e29Smrg		 * 1 - Stream
1783f012e29Smrg		 * 2 - Bypass
1793f012e29Smrg		 */
1803f012e29Smrg#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
1813f012e29Smrg#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
1823f012e29Smrg		/* 0 - SRC_ADDR using SAS
1833f012e29Smrg		 * 1 - GDS
1843f012e29Smrg		 * 2 - DATA
1853f012e29Smrg		 * 3 - SRC_ADDR using L2
1863f012e29Smrg		 */
1873f012e29Smrg#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
1883f012e29Smrg/* COMMAND */
1893f012e29Smrg#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
1903f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
1913f012e29Smrg		/* 0 - none
1923f012e29Smrg		 * 1 - 8 in 16
1933f012e29Smrg		 * 2 - 8 in 32
1943f012e29Smrg		 * 3 - 8 in 64
1953f012e29Smrg		 */
1963f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
1973f012e29Smrg		/* 0 - none
1983f012e29Smrg		 * 1 - 8 in 16
1993f012e29Smrg		 * 2 - 8 in 32
2003f012e29Smrg		 * 3 - 8 in 64
2013f012e29Smrg		 */
2023f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
2033f012e29Smrg		/* 0 - memory
2043f012e29Smrg		 * 1 - register
2053f012e29Smrg		 */
2063f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
2073f012e29Smrg		/* 0 - memory
2083f012e29Smrg		 * 1 - register
2093f012e29Smrg		 */
2103f012e29Smrg#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
2113f012e29Smrg#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
2123f012e29Smrg#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
2133f012e29Smrg
214d8807b2fSmrg#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
215d8807b2fSmrg						(((b) & 0x1) << 26) |		\
216d8807b2fSmrg						(((t) & 0x1) << 23) |		\
217d8807b2fSmrg						(((s) & 0x1) << 22) |		\
218d8807b2fSmrg						(((cnt) & 0xFFFFF) << 0))
219d8807b2fSmrg#define	SDMA_OPCODE_COPY_SI	3
220d8807b2fSmrg#define SDMA_OPCODE_CONSTANT_FILL_SI	13
221d8807b2fSmrg#define SDMA_NOP_SI  0xf
222d8807b2fSmrg#define GFX_COMPUTE_NOP_SI 0x80000000
223d8807b2fSmrg#define	PACKET3_DMA_DATA_SI	0x41
224d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
225d8807b2fSmrg		/* 0 - ME
226d8807b2fSmrg		 * 1 - PFP
227d8807b2fSmrg		 */
228d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
229d8807b2fSmrg		/* 0 - DST_ADDR using DAS
230d8807b2fSmrg		 * 1 - GDS
231d8807b2fSmrg		 * 3 - DST_ADDR using L2
232d8807b2fSmrg		 */
233d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
234d8807b2fSmrg		/* 0 - SRC_ADDR using SAS
235d8807b2fSmrg		 * 1 - GDS
236d8807b2fSmrg		 * 2 - DATA
237d8807b2fSmrg		 * 3 - SRC_ADDR using L2
238d8807b2fSmrg		 */
239d8807b2fSmrg#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
240d8807b2fSmrg
24100a23bdaSmrg
24200a23bdaSmrg#define PKT3_CONTEXT_CONTROL                   0x28
24300a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
24400a23bdaSmrg#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
24500a23bdaSmrg#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
24600a23bdaSmrg
24700a23bdaSmrg#define PKT3_CLEAR_STATE                       0x12
24800a23bdaSmrg
24900a23bdaSmrg#define PKT3_SET_SH_REG                        0x76
25000a23bdaSmrg#define		PACKET3_SET_SH_REG_START			0x00002c00
25100a23bdaSmrg
25200a23bdaSmrg#define	PACKET3_DISPATCH_DIRECT				0x15
2535324fb0dSmrg#define PACKET3_EVENT_WRITE				0x46
2545324fb0dSmrg#define PACKET3_ACQUIRE_MEM				0x58
2555324fb0dSmrg#define PACKET3_SET_CONTEXT_REG				0x69
2565324fb0dSmrg#define PACKET3_SET_UCONFIG_REG				0x79
2575324fb0dSmrg#define PACKET3_DRAW_INDEX_AUTO				0x2D
25800a23bdaSmrg/* gfx 8 */
25900a23bdaSmrg#define mmCOMPUTE_PGM_LO                                                        0x2e0c
26000a23bdaSmrg#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
26100a23bdaSmrg#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
26200a23bdaSmrg#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
26300a23bdaSmrg#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
26400a23bdaSmrg#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
26500a23bdaSmrg#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
26600a23bdaSmrg
26700a23bdaSmrg
26800a23bdaSmrg
26900a23bdaSmrg#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
27000a23bdaSmrg		      ((num & 0x0000ff00) << 8) | \
27100a23bdaSmrg		      ((num & 0x00ff0000) >> 8) | \
27200a23bdaSmrg		      ((num & 0x000000ff) << 24))
27300a23bdaSmrg
27400a23bdaSmrg
27500a23bdaSmrg/* Shader code
27600a23bdaSmrg * void main()
27700a23bdaSmrg{
27800a23bdaSmrg
27900a23bdaSmrg	float x = some_input;
28000a23bdaSmrg		for (unsigned i = 0; i < 1000000; i++)
28100a23bdaSmrg  	x = sin(x);
28200a23bdaSmrg
28300a23bdaSmrg	u[0] = 42u;
28400a23bdaSmrg}
28500a23bdaSmrg*/
28600a23bdaSmrg
28700a23bdaSmrgstatic  uint32_t shader_bin[] = {
28800a23bdaSmrg	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
28900a23bdaSmrg	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
29000a23bdaSmrg	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
29100a23bdaSmrg	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
29200a23bdaSmrg};
29300a23bdaSmrg
29400a23bdaSmrg#define CODE_OFFSET 512
29500a23bdaSmrg#define DATA_OFFSET 1024
29600a23bdaSmrg
2975324fb0dSmrgenum cs_type {
2985324fb0dSmrg	CS_BUFFERCLEAR,
2995324fb0dSmrg	CS_BUFFERCOPY
3005324fb0dSmrg};
3015324fb0dSmrg
3025324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_gfx9[] = {
3035324fb0dSmrg    0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
3045324fb0dSmrg    0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
3055324fb0dSmrg    0xBF810000
3065324fb0dSmrg};
3075324fb0dSmrg
3085324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
3095324fb0dSmrg	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
3105324fb0dSmrg	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
3115324fb0dSmrg	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
3125324fb0dSmrg	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
3135324fb0dSmrg	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
3145324fb0dSmrg};
3155324fb0dSmrg
3165324fb0dSmrgstatic const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
3175324fb0dSmrg
3185324fb0dSmrgstatic const uint32_t buffercopy_cs_shader_gfx9[] = {
3195324fb0dSmrg    0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
3205324fb0dSmrg    0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
3215324fb0dSmrg};
3225324fb0dSmrg
3235324fb0dSmrgstatic const uint32_t preamblecache_gfx9[] = {
3245324fb0dSmrg	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
3255324fb0dSmrg	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
3265324fb0dSmrg	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
3275324fb0dSmrg	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
3285324fb0dSmrg	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
3295324fb0dSmrg	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
3305324fb0dSmrg	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
3315324fb0dSmrg	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
3325324fb0dSmrg	0xc0026900, 0x311,  0x3, 0x0, 0xc0026900, 0x316, 0x1e, 0x20,
3335324fb0dSmrg	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
3345324fb0dSmrg	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
3355324fb0dSmrg	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
3365324fb0dSmrg	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
3375324fb0dSmrg	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
3385324fb0dSmrg	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
3395324fb0dSmrg	0xc0016900, 0x314, 0x0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
3405324fb0dSmrg	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
3415324fb0dSmrg	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
3425324fb0dSmrg	0xc0017900, 0x24b, 0x0
3435324fb0dSmrg};
3445324fb0dSmrg
3455324fb0dSmrgenum ps_type {
3465324fb0dSmrg	PS_CONST,
3475324fb0dSmrg	PS_TEX
3485324fb0dSmrg};
3495324fb0dSmrg
3505324fb0dSmrgstatic const uint32_t ps_const_shader_gfx9[] = {
3515324fb0dSmrg    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
3525324fb0dSmrg    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
3535324fb0dSmrg    0xC4001C0F, 0x00000100, 0xBF810000
3545324fb0dSmrg};
3555324fb0dSmrg
3565324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
3575324fb0dSmrg
3585324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
3595324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
3605324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
3615324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
3625324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
3635324fb0dSmrg     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
3645324fb0dSmrg     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
3655324fb0dSmrg     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
3665324fb0dSmrg     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
3675324fb0dSmrg     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
3685324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
3695324fb0dSmrg    }
3705324fb0dSmrg};
3715324fb0dSmrg
3725324fb0dSmrgstatic const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
3735324fb0dSmrg    0x00000004
3745324fb0dSmrg};
3755324fb0dSmrg
3765324fb0dSmrgstatic const uint32_t ps_num_sh_registers_gfx9 = 2;
3775324fb0dSmrg
3785324fb0dSmrgstatic const uint32_t ps_const_sh_registers_gfx9[][2] = {
3795324fb0dSmrg    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
3805324fb0dSmrg    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
3815324fb0dSmrg};
3825324fb0dSmrg
3835324fb0dSmrgstatic const uint32_t ps_num_context_registers_gfx9 = 7;
3845324fb0dSmrg
3855324fb0dSmrgstatic const uint32_t ps_const_context_reg_gfx9[][2] = {
3865324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
3875324fb0dSmrg    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
3885324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
3895324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
3905324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
3915324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
3925324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
3935324fb0dSmrg};
3945324fb0dSmrg
3955324fb0dSmrgstatic const uint32_t ps_tex_shader_gfx9[] = {
3965324fb0dSmrg    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
3975324fb0dSmrg    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
3985324fb0dSmrg    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
3995324fb0dSmrg    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
4005324fb0dSmrg    0x00000100, 0xBF810000
4015324fb0dSmrg};
4025324fb0dSmrg
4035324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
4045324fb0dSmrg    0x0000000B
4055324fb0dSmrg};
4065324fb0dSmrg
4075324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
4085324fb0dSmrg
4095324fb0dSmrgstatic const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
4105324fb0dSmrg    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
4115324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
4125324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
4135324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
4145324fb0dSmrg     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4155324fb0dSmrg     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4165324fb0dSmrg     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4175324fb0dSmrg     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4185324fb0dSmrg     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
4195324fb0dSmrg     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
4205324fb0dSmrg    }
4215324fb0dSmrg};
4225324fb0dSmrg
4235324fb0dSmrgstatic const uint32_t ps_tex_sh_registers_gfx9[][2] = {
4245324fb0dSmrg    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
4255324fb0dSmrg    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
4265324fb0dSmrg};
4275324fb0dSmrg
4285324fb0dSmrgstatic const uint32_t ps_tex_context_reg_gfx9[][2] = {
4295324fb0dSmrg    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
4305324fb0dSmrg    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
4315324fb0dSmrg    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
4325324fb0dSmrg    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
4335324fb0dSmrg    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
4345324fb0dSmrg    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
4355324fb0dSmrg    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
4365324fb0dSmrg};
4375324fb0dSmrg
4385324fb0dSmrgstatic const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
4395324fb0dSmrg    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
4405324fb0dSmrg    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
4415324fb0dSmrg    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
4425324fb0dSmrg    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
4435324fb0dSmrg    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
4445324fb0dSmrg    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
4455324fb0dSmrg    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
4465324fb0dSmrg    0xC400020F, 0x05060403, 0xBF810000
4475324fb0dSmrg};
4485324fb0dSmrg
4495324fb0dSmrgstatic const uint32_t cached_cmd_gfx9[] = {
4505324fb0dSmrg	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
4515324fb0dSmrg	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
4525324fb0dSmrg	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
4535324fb0dSmrg	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
4545324fb0dSmrg	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
4555324fb0dSmrg	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
4565324fb0dSmrg	0xc0026900, 0x292, 0x20, 0x60201b8,
4575324fb0dSmrg	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
4585324fb0dSmrg};
45900a23bdaSmrg
4607cdc0497Smrgint amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
4617cdc0497Smrg			unsigned alignment, unsigned heap, uint64_t alloc_flags,
4627cdc0497Smrg			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
4637cdc0497Smrg			uint64_t *mc_address,
4647cdc0497Smrg			amdgpu_va_handle *va_handle)
4657cdc0497Smrg{
4667cdc0497Smrg	struct amdgpu_bo_alloc_request request = {};
4677cdc0497Smrg	amdgpu_bo_handle buf_handle;
4687cdc0497Smrg	amdgpu_va_handle handle;
4697cdc0497Smrg	uint64_t vmc_addr;
4707cdc0497Smrg	int r;
4717cdc0497Smrg
4727cdc0497Smrg	request.alloc_size = size;
4737cdc0497Smrg	request.phys_alignment = alignment;
4747cdc0497Smrg	request.preferred_heap = heap;
4757cdc0497Smrg	request.flags = alloc_flags;
4767cdc0497Smrg
4777cdc0497Smrg	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
4787cdc0497Smrg	if (r)
4797cdc0497Smrg		return r;
4807cdc0497Smrg
4817cdc0497Smrg	r = amdgpu_va_range_alloc(dev,
4827cdc0497Smrg				  amdgpu_gpu_va_range_general,
4837cdc0497Smrg				  size, alignment, 0, &vmc_addr,
4847cdc0497Smrg				  &handle, 0);
4857cdc0497Smrg	if (r)
4867cdc0497Smrg		goto error_va_alloc;
4877cdc0497Smrg
4887cdc0497Smrg	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
4897cdc0497Smrg				   AMDGPU_VM_PAGE_READABLE |
4907cdc0497Smrg				   AMDGPU_VM_PAGE_WRITEABLE |
4917cdc0497Smrg				   AMDGPU_VM_PAGE_EXECUTABLE |
4927cdc0497Smrg				   mapping_flags,
4937cdc0497Smrg				   AMDGPU_VA_OP_MAP);
4947cdc0497Smrg	if (r)
4957cdc0497Smrg		goto error_va_map;
4967cdc0497Smrg
4977cdc0497Smrg	r = amdgpu_bo_cpu_map(buf_handle, cpu);
4987cdc0497Smrg	if (r)
4997cdc0497Smrg		goto error_cpu_map;
5007cdc0497Smrg
5017cdc0497Smrg	*bo = buf_handle;
5027cdc0497Smrg	*mc_address = vmc_addr;
5037cdc0497Smrg	*va_handle = handle;
5047cdc0497Smrg
5057cdc0497Smrg	return 0;
5067cdc0497Smrg
5077cdc0497Smrg error_cpu_map:
5087cdc0497Smrg	amdgpu_bo_cpu_unmap(buf_handle);
5097cdc0497Smrg
5107cdc0497Smrg error_va_map:
5117cdc0497Smrg	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
5127cdc0497Smrg
5137cdc0497Smrg error_va_alloc:
5147cdc0497Smrg	amdgpu_bo_free(buf_handle);
5157cdc0497Smrg	return r;
5167cdc0497Smrg}
5177cdc0497Smrg
5187cdc0497Smrg
5197cdc0497Smrg
5203f012e29Smrgint suite_basic_tests_init(void)
5213f012e29Smrg{
522d8807b2fSmrg	struct amdgpu_gpu_info gpu_info = {0};
5233f012e29Smrg	int r;
5243f012e29Smrg
5253f012e29Smrg	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
5263f012e29Smrg				   &minor_version, &device_handle);
5273f012e29Smrg
528d8807b2fSmrg	if (r) {
529037b3c26Smrg		if ((r == -EACCES) && (errno == EACCES))
530037b3c26Smrg			printf("\n\nError:%s. "
531037b3c26Smrg				"Hint:Try to run this test program as root.",
532037b3c26Smrg				strerror(errno));
5333f012e29Smrg		return CUE_SINIT_FAILED;
534037b3c26Smrg	}
535d8807b2fSmrg
536d8807b2fSmrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
537d8807b2fSmrg	if (r)
538d8807b2fSmrg		return CUE_SINIT_FAILED;
539d8807b2fSmrg
540d8807b2fSmrg	family_id = gpu_info.family_id;
541d8807b2fSmrg
542d8807b2fSmrg	return CUE_SUCCESS;
5433f012e29Smrg}
5443f012e29Smrg
5453f012e29Smrgint suite_basic_tests_clean(void)
5463f012e29Smrg{
5473f012e29Smrg	int r = amdgpu_device_deinitialize(device_handle);
5483f012e29Smrg
5493f012e29Smrg	if (r == 0)
5503f012e29Smrg		return CUE_SUCCESS;
5513f012e29Smrg	else
5523f012e29Smrg		return CUE_SCLEAN_FAILED;
5533f012e29Smrg}
5543f012e29Smrg
5553f012e29Smrgstatic void amdgpu_query_info_test(void)
5563f012e29Smrg{
5573f012e29Smrg	struct amdgpu_gpu_info gpu_info = {0};
5583f012e29Smrg	uint32_t version, feature;
5593f012e29Smrg	int r;
5603f012e29Smrg
5613f012e29Smrg	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
5623f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
5633f012e29Smrg
5643f012e29Smrg	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
5653f012e29Smrg					  0, &version, &feature);
5663f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
5673f012e29Smrg}
5683f012e29Smrg
5693f012e29Smrgstatic void amdgpu_command_submission_gfx_separate_ibs(void)
5703f012e29Smrg{
5713f012e29Smrg	amdgpu_context_handle context_handle;
5723f012e29Smrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
5733f012e29Smrg	void *ib_result_cpu, *ib_result_ce_cpu;
5743f012e29Smrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
5753f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
5763f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
5773f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
5783f012e29Smrg	uint32_t *ptr;
5793f012e29Smrg	uint32_t expired;
5803f012e29Smrg	amdgpu_bo_list_handle bo_list;
5813f012e29Smrg	amdgpu_va_handle va_handle, va_handle_ce;
582d8807b2fSmrg	int r, i = 0;
5833f012e29Smrg
5843f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
5853f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
5863f012e29Smrg
5873f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
5883f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
5893f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
5903f012e29Smrg				    &ib_result_mc_address, &va_handle);
5913f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
5923f012e29Smrg
5933f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
5943f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
5953f012e29Smrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
5963f012e29Smrg				    &ib_result_ce_mc_address, &va_handle_ce);
5973f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
5983f012e29Smrg
5993f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
6003f012e29Smrg			       ib_result_ce_handle, &bo_list);
6013f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6023f012e29Smrg
6033f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
6043f012e29Smrg
6053f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
6063f012e29Smrg	ptr = ib_result_ce_cpu;
607d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
608d8807b2fSmrg		ptr[i++] = 0xc0008900;
609d8807b2fSmrg		ptr[i++] = 0;
610d8807b2fSmrg	}
611d8807b2fSmrg	ptr[i++] = 0xc0008400;
612d8807b2fSmrg	ptr[i++] = 1;
6133f012e29Smrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
614d8807b2fSmrg	ib_info[0].size = i;
6153f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
6163f012e29Smrg
6173f012e29Smrg	/* IT_WAIT_ON_CE_COUNTER */
6183f012e29Smrg	ptr = ib_result_cpu;
6193f012e29Smrg	ptr[0] = 0xc0008600;
6203f012e29Smrg	ptr[1] = 0x00000001;
6213f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address;
6223f012e29Smrg	ib_info[1].size = 2;
6233f012e29Smrg
6243f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
6253f012e29Smrg	ibs_request.number_of_ibs = 2;
6263f012e29Smrg	ibs_request.ibs = ib_info;
6273f012e29Smrg	ibs_request.resources = bo_list;
6283f012e29Smrg	ibs_request.fence_info.handle = NULL;
6293f012e29Smrg
6303f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
6313f012e29Smrg
6323f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6333f012e29Smrg
6343f012e29Smrg	fence_status.context = context_handle;
6353f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
6363f012e29Smrg	fence_status.ip_instance = 0;
6373f012e29Smrg	fence_status.fence = ibs_request.seq_no;
6383f012e29Smrg
6393f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
6403f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
6413f012e29Smrg					 0, &expired);
6423f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6433f012e29Smrg
6443f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
6453f012e29Smrg				     ib_result_mc_address, 4096);
6463f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6473f012e29Smrg
6483f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
6493f012e29Smrg				     ib_result_ce_mc_address, 4096);
6503f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6513f012e29Smrg
6523f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
6533f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6543f012e29Smrg
6553f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
6563f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6573f012e29Smrg
6583f012e29Smrg}
6593f012e29Smrg
6603f012e29Smrgstatic void amdgpu_command_submission_gfx_shared_ib(void)
6613f012e29Smrg{
6623f012e29Smrg	amdgpu_context_handle context_handle;
6633f012e29Smrg	amdgpu_bo_handle ib_result_handle;
6643f012e29Smrg	void *ib_result_cpu;
6653f012e29Smrg	uint64_t ib_result_mc_address;
6663f012e29Smrg	struct amdgpu_cs_request ibs_request = {0};
6673f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2];
6683f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
6693f012e29Smrg	uint32_t *ptr;
6703f012e29Smrg	uint32_t expired;
6713f012e29Smrg	amdgpu_bo_list_handle bo_list;
6723f012e29Smrg	amdgpu_va_handle va_handle;
673d8807b2fSmrg	int r, i = 0;
6743f012e29Smrg
6753f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
6763f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6773f012e29Smrg
6783f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
6793f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
6803f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
6813f012e29Smrg				    &ib_result_mc_address, &va_handle);
6823f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6833f012e29Smrg
6843f012e29Smrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
6853f012e29Smrg			       &bo_list);
6863f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
6873f012e29Smrg
6883f012e29Smrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
6893f012e29Smrg
6903f012e29Smrg	/* IT_SET_CE_DE_COUNTERS */
6913f012e29Smrg	ptr = ib_result_cpu;
692d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
693d8807b2fSmrg		ptr[i++] = 0xc0008900;
694d8807b2fSmrg		ptr[i++] = 0;
695d8807b2fSmrg	}
696d8807b2fSmrg	ptr[i++] = 0xc0008400;
697d8807b2fSmrg	ptr[i++] = 1;
6983f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address;
699d8807b2fSmrg	ib_info[0].size = i;
7003f012e29Smrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
7013f012e29Smrg
7023f012e29Smrg	ptr = (uint32_t *)ib_result_cpu + 4;
7033f012e29Smrg	ptr[0] = 0xc0008600;
7043f012e29Smrg	ptr[1] = 0x00000001;
7053f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
7063f012e29Smrg	ib_info[1].size = 2;
7073f012e29Smrg
7083f012e29Smrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
7093f012e29Smrg	ibs_request.number_of_ibs = 2;
7103f012e29Smrg	ibs_request.ibs = ib_info;
7113f012e29Smrg	ibs_request.resources = bo_list;
7123f012e29Smrg	ibs_request.fence_info.handle = NULL;
7133f012e29Smrg
7143f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
7153f012e29Smrg
7163f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7173f012e29Smrg
7183f012e29Smrg	fence_status.context = context_handle;
7193f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
7203f012e29Smrg	fence_status.ip_instance = 0;
7213f012e29Smrg	fence_status.fence = ibs_request.seq_no;
7223f012e29Smrg
7233f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
7243f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
7253f012e29Smrg					 0, &expired);
7263f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7273f012e29Smrg
7283f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
7293f012e29Smrg				     ib_result_mc_address, 4096);
7303f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7313f012e29Smrg
7323f012e29Smrg	r = amdgpu_bo_list_destroy(bo_list);
7333f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7343f012e29Smrg
7353f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
7363f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
7373f012e29Smrg}
7383f012e29Smrg
7393f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_write_data(void)
7403f012e29Smrg{
7413f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
7423f012e29Smrg}
7433f012e29Smrg
7443f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_const_fill(void)
7453f012e29Smrg{
7463f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
7473f012e29Smrg}
7483f012e29Smrg
7493f012e29Smrgstatic void amdgpu_command_submission_gfx_cp_copy_data(void)
7503f012e29Smrg{
7513f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
7523f012e29Smrg}
7533f012e29Smrg
75400a23bdaSmrgstatic void amdgpu_bo_eviction_test(void)
75500a23bdaSmrg{
75600a23bdaSmrg	const int sdma_write_length = 1024;
75700a23bdaSmrg	const int pm4_dw = 256;
75800a23bdaSmrg	amdgpu_context_handle context_handle;
75900a23bdaSmrg	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
76000a23bdaSmrg	amdgpu_bo_handle *resources;
76100a23bdaSmrg	uint32_t *pm4;
76200a23bdaSmrg	struct amdgpu_cs_ib_info *ib_info;
76300a23bdaSmrg	struct amdgpu_cs_request *ibs_request;
76400a23bdaSmrg	uint64_t bo1_mc, bo2_mc;
76500a23bdaSmrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
76600a23bdaSmrg	int i, j, r, loop1, loop2;
76700a23bdaSmrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
76800a23bdaSmrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
76900a23bdaSmrg	struct amdgpu_heap_info vram_info, gtt_info;
77000a23bdaSmrg
77100a23bdaSmrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
77200a23bdaSmrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
77300a23bdaSmrg
77400a23bdaSmrg	ib_info = calloc(1, sizeof(*ib_info));
77500a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
77600a23bdaSmrg
77700a23bdaSmrg	ibs_request = calloc(1, sizeof(*ibs_request));
77800a23bdaSmrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
77900a23bdaSmrg
78000a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
78100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
78200a23bdaSmrg
78300a23bdaSmrg	/* prepare resource */
78400a23bdaSmrg	resources = calloc(4, sizeof(amdgpu_bo_handle));
78500a23bdaSmrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
78600a23bdaSmrg
78700a23bdaSmrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
78800a23bdaSmrg				   0, &vram_info);
78900a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
79000a23bdaSmrg
79100a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
79200a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
79300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
79400a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
79500a23bdaSmrg				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
79600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
79700a23bdaSmrg
79800a23bdaSmrg	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
79900a23bdaSmrg				   0, &gtt_info);
80000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
80100a23bdaSmrg
80200a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
80300a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
80400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
80500a23bdaSmrg	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
80600a23bdaSmrg				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
80700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
80800a23bdaSmrg
80900a23bdaSmrg
81000a23bdaSmrg
81100a23bdaSmrg	loop1 = loop2 = 0;
81200a23bdaSmrg	/* run 9 circle to test all mapping combination */
81300a23bdaSmrg	while(loop1 < 2) {
81400a23bdaSmrg		while(loop2 < 2) {
81500a23bdaSmrg			/* allocate UC bo1for sDMA use */
81600a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
81700a23bdaSmrg						    sdma_write_length, 4096,
81800a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
81900a23bdaSmrg						    gtt_flags[loop1], &bo1,
82000a23bdaSmrg						    (void**)&bo1_cpu, &bo1_mc,
82100a23bdaSmrg						    &bo1_va_handle);
82200a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
82300a23bdaSmrg
82400a23bdaSmrg			/* set bo1 */
82500a23bdaSmrg			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
82600a23bdaSmrg
82700a23bdaSmrg			/* allocate UC bo2 for sDMA use */
82800a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
82900a23bdaSmrg						    sdma_write_length, 4096,
83000a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
83100a23bdaSmrg						    gtt_flags[loop2], &bo2,
83200a23bdaSmrg						    (void**)&bo2_cpu, &bo2_mc,
83300a23bdaSmrg						    &bo2_va_handle);
83400a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
83500a23bdaSmrg
83600a23bdaSmrg			/* clear bo2 */
83700a23bdaSmrg			memset((void*)bo2_cpu, 0, sdma_write_length);
83800a23bdaSmrg
83900a23bdaSmrg			resources[0] = bo1;
84000a23bdaSmrg			resources[1] = bo2;
84100a23bdaSmrg			resources[2] = vram_max[loop2];
84200a23bdaSmrg			resources[3] = gtt_max[loop2];
84300a23bdaSmrg
84400a23bdaSmrg			/* fulfill PM4: test DMA copy linear */
84500a23bdaSmrg			i = j = 0;
84600a23bdaSmrg			if (family_id == AMDGPU_FAMILY_SI) {
84700a23bdaSmrg				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
84800a23bdaSmrg							  sdma_write_length);
84900a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
85000a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
85100a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
85200a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
85300a23bdaSmrg			} else {
85400a23bdaSmrg				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
85500a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
85600a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
85700a23bdaSmrg				else
85800a23bdaSmrg					pm4[i++] = sdma_write_length;
85900a23bdaSmrg				pm4[i++] = 0;
86000a23bdaSmrg				pm4[i++] = 0xffffffff & bo1_mc;
86100a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
86200a23bdaSmrg				pm4[i++] = 0xffffffff & bo2_mc;
86300a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
86400a23bdaSmrg			}
86500a23bdaSmrg
86600a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
86700a23bdaSmrg						   AMDGPU_HW_IP_DMA, 0,
86800a23bdaSmrg						   i, pm4,
86900a23bdaSmrg						   4, resources,
87000a23bdaSmrg						   ib_info, ibs_request);
87100a23bdaSmrg
87200a23bdaSmrg			/* verify if SDMA test result meets with expected */
87300a23bdaSmrg			i = 0;
87400a23bdaSmrg			while(i < sdma_write_length) {
87500a23bdaSmrg				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
87600a23bdaSmrg			}
87700a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
87800a23bdaSmrg						     sdma_write_length);
87900a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
88000a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
88100a23bdaSmrg						     sdma_write_length);
88200a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
88300a23bdaSmrg			loop2++;
88400a23bdaSmrg		}
88500a23bdaSmrg		loop2 = 0;
88600a23bdaSmrg		loop1++;
88700a23bdaSmrg	}
88800a23bdaSmrg	amdgpu_bo_free(vram_max[0]);
88900a23bdaSmrg	amdgpu_bo_free(vram_max[1]);
89000a23bdaSmrg	amdgpu_bo_free(gtt_max[0]);
89100a23bdaSmrg	amdgpu_bo_free(gtt_max[1]);
89200a23bdaSmrg	/* clean resources */
89300a23bdaSmrg	free(resources);
89400a23bdaSmrg	free(ibs_request);
89500a23bdaSmrg	free(ib_info);
89600a23bdaSmrg	free(pm4);
89700a23bdaSmrg
89800a23bdaSmrg	/* end of test */
89900a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle);
90000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
90100a23bdaSmrg}
90200a23bdaSmrg
90300a23bdaSmrg
9043f012e29Smrgstatic void amdgpu_command_submission_gfx(void)
9053f012e29Smrg{
9063f012e29Smrg	/* write data using the CP */
9073f012e29Smrg	amdgpu_command_submission_gfx_cp_write_data();
9083f012e29Smrg	/* const fill using the CP */
9093f012e29Smrg	amdgpu_command_submission_gfx_cp_const_fill();
9103f012e29Smrg	/* copy data using the CP */
9113f012e29Smrg	amdgpu_command_submission_gfx_cp_copy_data();
9123f012e29Smrg	/* separate IB buffers for multi-IB submission */
9133f012e29Smrg	amdgpu_command_submission_gfx_separate_ibs();
9143f012e29Smrg	/* shared IB buffer for multi-IB submission */
9153f012e29Smrg	amdgpu_command_submission_gfx_shared_ib();
9163f012e29Smrg}
9173f012e29Smrg
9183f012e29Smrgstatic void amdgpu_semaphore_test(void)
9193f012e29Smrg{
9203f012e29Smrg	amdgpu_context_handle context_handle[2];
9213f012e29Smrg	amdgpu_semaphore_handle sem;
9223f012e29Smrg	amdgpu_bo_handle ib_result_handle[2];
9233f012e29Smrg	void *ib_result_cpu[2];
9243f012e29Smrg	uint64_t ib_result_mc_address[2];
9253f012e29Smrg	struct amdgpu_cs_request ibs_request[2] = {0};
9263f012e29Smrg	struct amdgpu_cs_ib_info ib_info[2] = {0};
9273f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
9283f012e29Smrg	uint32_t *ptr;
9293f012e29Smrg	uint32_t expired;
930d8807b2fSmrg	uint32_t sdma_nop, gfx_nop;
9313f012e29Smrg	amdgpu_bo_list_handle bo_list[2];
9323f012e29Smrg	amdgpu_va_handle va_handle[2];
9333f012e29Smrg	int r, i;
9343f012e29Smrg
935d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI) {
936d8807b2fSmrg		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
937d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP_SI;
938d8807b2fSmrg	} else {
939d8807b2fSmrg		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
940d8807b2fSmrg		gfx_nop = GFX_COMPUTE_NOP;
941d8807b2fSmrg	}
942d8807b2fSmrg
9433f012e29Smrg	r = amdgpu_cs_create_semaphore(&sem);
9443f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9453f012e29Smrg	for (i = 0; i < 2; i++) {
9463f012e29Smrg		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
9473f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
9483f012e29Smrg
9493f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
9503f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
9513f012e29Smrg					    &ib_result_handle[i], &ib_result_cpu[i],
9523f012e29Smrg					    &ib_result_mc_address[i], &va_handle[i]);
9533f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
9543f012e29Smrg
9553f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
9563f012e29Smrg				       NULL, &bo_list[i]);
9573f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
9583f012e29Smrg	}
9593f012e29Smrg
9603f012e29Smrg	/* 1. same context different engine */
9613f012e29Smrg	ptr = ib_result_cpu[0];
962d8807b2fSmrg	ptr[0] = sdma_nop;
9633f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
9643f012e29Smrg	ib_info[0].size = 1;
9653f012e29Smrg
9663f012e29Smrg	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
9673f012e29Smrg	ibs_request[0].number_of_ibs = 1;
9683f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
9693f012e29Smrg	ibs_request[0].resources = bo_list[0];
9703f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
9713f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
9723f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9733f012e29Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
9743f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9753f012e29Smrg
9763f012e29Smrg	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
9773f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9783f012e29Smrg	ptr = ib_result_cpu[1];
979d8807b2fSmrg	ptr[0] = gfx_nop;
9803f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
9813f012e29Smrg	ib_info[1].size = 1;
9823f012e29Smrg
9833f012e29Smrg	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
9843f012e29Smrg	ibs_request[1].number_of_ibs = 1;
9853f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
9863f012e29Smrg	ibs_request[1].resources = bo_list[1];
9873f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
9883f012e29Smrg
9893f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
9903f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9913f012e29Smrg
9923f012e29Smrg	fence_status.context = context_handle[0];
9933f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
9943f012e29Smrg	fence_status.ip_instance = 0;
9953f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
9963f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
9973f012e29Smrg					 500000000, 0, &expired);
9983f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
9993f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
10003f012e29Smrg
10013f012e29Smrg	/* 2. same engine different context */
10023f012e29Smrg	ptr = ib_result_cpu[0];
1003d8807b2fSmrg	ptr[0] = gfx_nop;
10043f012e29Smrg	ib_info[0].ib_mc_address = ib_result_mc_address[0];
10053f012e29Smrg	ib_info[0].size = 1;
10063f012e29Smrg
10073f012e29Smrg	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
10083f012e29Smrg	ibs_request[0].number_of_ibs = 1;
10093f012e29Smrg	ibs_request[0].ibs = &ib_info[0];
10103f012e29Smrg	ibs_request[0].resources = bo_list[0];
10113f012e29Smrg	ibs_request[0].fence_info.handle = NULL;
10123f012e29Smrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
10133f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10143f012e29Smrg	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
10153f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10163f012e29Smrg
10173f012e29Smrg	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
10183f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10193f012e29Smrg	ptr = ib_result_cpu[1];
1020d8807b2fSmrg	ptr[0] = gfx_nop;
10213f012e29Smrg	ib_info[1].ib_mc_address = ib_result_mc_address[1];
10223f012e29Smrg	ib_info[1].size = 1;
10233f012e29Smrg
10243f012e29Smrg	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
10253f012e29Smrg	ibs_request[1].number_of_ibs = 1;
10263f012e29Smrg	ibs_request[1].ibs = &ib_info[1];
10273f012e29Smrg	ibs_request[1].resources = bo_list[1];
10283f012e29Smrg	ibs_request[1].fence_info.handle = NULL;
10293f012e29Smrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
10303f012e29Smrg
10313f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10323f012e29Smrg
10333f012e29Smrg	fence_status.context = context_handle[1];
10343f012e29Smrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
10353f012e29Smrg	fence_status.ip_instance = 0;
10363f012e29Smrg	fence_status.fence = ibs_request[1].seq_no;
10373f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
10383f012e29Smrg					 500000000, 0, &expired);
10393f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10403f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
1041d8807b2fSmrg
10423f012e29Smrg	for (i = 0; i < 2; i++) {
10433f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
10443f012e29Smrg					     ib_result_mc_address[i], 4096);
10453f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10463f012e29Smrg
10473f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list[i]);
10483f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10493f012e29Smrg
10503f012e29Smrg		r = amdgpu_cs_ctx_free(context_handle[i]);
10513f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10523f012e29Smrg	}
10533f012e29Smrg
10543f012e29Smrg	r = amdgpu_cs_destroy_semaphore(sem);
10553f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10563f012e29Smrg}
10573f012e29Smrg
10583f012e29Smrgstatic void amdgpu_command_submission_compute_nop(void)
10593f012e29Smrg{
10603f012e29Smrg	amdgpu_context_handle context_handle;
10613f012e29Smrg	amdgpu_bo_handle ib_result_handle;
10623f012e29Smrg	void *ib_result_cpu;
10633f012e29Smrg	uint64_t ib_result_mc_address;
10643f012e29Smrg	struct amdgpu_cs_request ibs_request;
10653f012e29Smrg	struct amdgpu_cs_ib_info ib_info;
10663f012e29Smrg	struct amdgpu_cs_fence fence_status;
10673f012e29Smrg	uint32_t *ptr;
10683f012e29Smrg	uint32_t expired;
106900a23bdaSmrg	int r, instance;
10703f012e29Smrg	amdgpu_bo_list_handle bo_list;
10713f012e29Smrg	amdgpu_va_handle va_handle;
1072d8807b2fSmrg	struct drm_amdgpu_info_hw_ip info;
1073d8807b2fSmrg
1074d8807b2fSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1075d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
10763f012e29Smrg
10773f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
10783f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
10793f012e29Smrg
1080d8807b2fSmrg	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
10813f012e29Smrg		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
10823f012e29Smrg					    AMDGPU_GEM_DOMAIN_GTT, 0,
10833f012e29Smrg					    &ib_result_handle, &ib_result_cpu,
10843f012e29Smrg					    &ib_result_mc_address, &va_handle);
10853f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10863f012e29Smrg
10873f012e29Smrg		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
10883f012e29Smrg				       &bo_list);
10893f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
10903f012e29Smrg
10913f012e29Smrg		ptr = ib_result_cpu;
1092d8807b2fSmrg		memset(ptr, 0, 16);
1093d8807b2fSmrg		ptr[0]=PACKET3(PACKET3_NOP, 14);
10943f012e29Smrg
10953f012e29Smrg		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
10963f012e29Smrg		ib_info.ib_mc_address = ib_result_mc_address;
10973f012e29Smrg		ib_info.size = 16;
10983f012e29Smrg
10993f012e29Smrg		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
11003f012e29Smrg		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
11013f012e29Smrg		ibs_request.ring = instance;
11023f012e29Smrg		ibs_request.number_of_ibs = 1;
11033f012e29Smrg		ibs_request.ibs = &ib_info;
11043f012e29Smrg		ibs_request.resources = bo_list;
11053f012e29Smrg		ibs_request.fence_info.handle = NULL;
11063f012e29Smrg
11073f012e29Smrg		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
11083f012e29Smrg		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
11093f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11103f012e29Smrg
11113f012e29Smrg		fence_status.context = context_handle;
11123f012e29Smrg		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
11133f012e29Smrg		fence_status.ip_instance = 0;
11143f012e29Smrg		fence_status.ring = instance;
11153f012e29Smrg		fence_status.fence = ibs_request.seq_no;
11163f012e29Smrg
11173f012e29Smrg		r = amdgpu_cs_query_fence_status(&fence_status,
11183f012e29Smrg						 AMDGPU_TIMEOUT_INFINITE,
11193f012e29Smrg						 0, &expired);
11203f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11213f012e29Smrg
11223f012e29Smrg		r = amdgpu_bo_list_destroy(bo_list);
11233f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11243f012e29Smrg
11253f012e29Smrg		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
11263f012e29Smrg					     ib_result_mc_address, 4096);
11273f012e29Smrg		CU_ASSERT_EQUAL(r, 0);
11283f012e29Smrg	}
11293f012e29Smrg
11303f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
11313f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11323f012e29Smrg}
11333f012e29Smrg
11343f012e29Smrgstatic void amdgpu_command_submission_compute_cp_write_data(void)
11353f012e29Smrg{
11363f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
11373f012e29Smrg}
11383f012e29Smrg
11393f012e29Smrgstatic void amdgpu_command_submission_compute_cp_const_fill(void)
11403f012e29Smrg{
11413f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
11423f012e29Smrg}
11433f012e29Smrg
11443f012e29Smrgstatic void amdgpu_command_submission_compute_cp_copy_data(void)
11453f012e29Smrg{
11463f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
11473f012e29Smrg}
11483f012e29Smrg
11493f012e29Smrgstatic void amdgpu_command_submission_compute(void)
11503f012e29Smrg{
11513f012e29Smrg	/* write data using the CP */
11523f012e29Smrg	amdgpu_command_submission_compute_cp_write_data();
11533f012e29Smrg	/* const fill using the CP */
11543f012e29Smrg	amdgpu_command_submission_compute_cp_const_fill();
11553f012e29Smrg	/* copy data using the CP */
11563f012e29Smrg	amdgpu_command_submission_compute_cp_copy_data();
11573f012e29Smrg	/* nop test */
11583f012e29Smrg	amdgpu_command_submission_compute_nop();
11593f012e29Smrg}
11603f012e29Smrg
11613f012e29Smrg/*
11623f012e29Smrg * caller need create/release:
11633f012e29Smrg * pm4_src, resources, ib_info, and ibs_request
11643f012e29Smrg * submit command stream described in ibs_request and wait for this IB accomplished
11653f012e29Smrg */
11663f012e29Smrgstatic void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
11673f012e29Smrg				       unsigned ip_type,
11683f012e29Smrg				       int instance, int pm4_dw, uint32_t *pm4_src,
11693f012e29Smrg				       int res_cnt, amdgpu_bo_handle *resources,
11703f012e29Smrg				       struct amdgpu_cs_ib_info *ib_info,
11713f012e29Smrg				       struct amdgpu_cs_request *ibs_request)
11723f012e29Smrg{
11733f012e29Smrg	int r;
11743f012e29Smrg	uint32_t expired;
11753f012e29Smrg	uint32_t *ring_ptr;
11763f012e29Smrg	amdgpu_bo_handle ib_result_handle;
11773f012e29Smrg	void *ib_result_cpu;
11783f012e29Smrg	uint64_t ib_result_mc_address;
11793f012e29Smrg	struct amdgpu_cs_fence fence_status = {0};
11803f012e29Smrg	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
11813f012e29Smrg	amdgpu_va_handle va_handle;
11823f012e29Smrg
11833f012e29Smrg	/* prepare CS */
11843f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
11853f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
11863f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
11873f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
11883f012e29Smrg	CU_ASSERT_TRUE(pm4_dw <= 1024);
11893f012e29Smrg
11903f012e29Smrg	/* allocate IB */
11913f012e29Smrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
11923f012e29Smrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
11933f012e29Smrg				    &ib_result_handle, &ib_result_cpu,
11943f012e29Smrg				    &ib_result_mc_address, &va_handle);
11953f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
11963f012e29Smrg
11973f012e29Smrg	/* copy PM4 packet to ring from caller */
11983f012e29Smrg	ring_ptr = ib_result_cpu;
11993f012e29Smrg	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
12003f012e29Smrg
12013f012e29Smrg	ib_info->ib_mc_address = ib_result_mc_address;
12023f012e29Smrg	ib_info->size = pm4_dw;
12033f012e29Smrg
12043f012e29Smrg	ibs_request->ip_type = ip_type;
12053f012e29Smrg	ibs_request->ring = instance;
12063f012e29Smrg	ibs_request->number_of_ibs = 1;
12073f012e29Smrg	ibs_request->ibs = ib_info;
12083f012e29Smrg	ibs_request->fence_info.handle = NULL;
12093f012e29Smrg
12103f012e29Smrg	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
12113f012e29Smrg	all_res[res_cnt] = ib_result_handle;
12123f012e29Smrg
12133f012e29Smrg	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
12143f012e29Smrg				  NULL, &ibs_request->resources);
12153f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12163f012e29Smrg
12173f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
12183f012e29Smrg
12193f012e29Smrg	/* submit CS */
12203f012e29Smrg	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
12213f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12223f012e29Smrg
12233f012e29Smrg	r = amdgpu_bo_list_destroy(ibs_request->resources);
12243f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12253f012e29Smrg
12263f012e29Smrg	fence_status.ip_type = ip_type;
12273f012e29Smrg	fence_status.ip_instance = 0;
12283f012e29Smrg	fence_status.ring = ibs_request->ring;
12293f012e29Smrg	fence_status.context = context_handle;
12303f012e29Smrg	fence_status.fence = ibs_request->seq_no;
12313f012e29Smrg
12323f012e29Smrg	/* wait for IB accomplished */
12333f012e29Smrg	r = amdgpu_cs_query_fence_status(&fence_status,
12343f012e29Smrg					 AMDGPU_TIMEOUT_INFINITE,
12353f012e29Smrg					 0, &expired);
12363f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12373f012e29Smrg	CU_ASSERT_EQUAL(expired, true);
12383f012e29Smrg
12393f012e29Smrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
12403f012e29Smrg				     ib_result_mc_address, 4096);
12413f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12423f012e29Smrg}
12433f012e29Smrg
12443f012e29Smrgstatic void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
12453f012e29Smrg{
12463f012e29Smrg	const int sdma_write_length = 128;
12473f012e29Smrg	const int pm4_dw = 256;
12483f012e29Smrg	amdgpu_context_handle context_handle;
12493f012e29Smrg	amdgpu_bo_handle bo;
12503f012e29Smrg	amdgpu_bo_handle *resources;
12513f012e29Smrg	uint32_t *pm4;
12523f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
12533f012e29Smrg	struct amdgpu_cs_request *ibs_request;
12543f012e29Smrg	uint64_t bo_mc;
12553f012e29Smrg	volatile uint32_t *bo_cpu;
125600a23bdaSmrg	int i, j, r, loop, ring_id;
12573f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
12583f012e29Smrg	amdgpu_va_handle va_handle;
125900a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
12603f012e29Smrg
12613f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
12623f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
12633f012e29Smrg
12643f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
12653f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
12663f012e29Smrg
12673f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
12683f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
12693f012e29Smrg
127000a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
127100a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
127200a23bdaSmrg
12733f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
12743f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
12753f012e29Smrg
12763f012e29Smrg	/* prepare resource */
12773f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
12783f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
12793f012e29Smrg
128000a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
128100a23bdaSmrg		loop = 0;
128200a23bdaSmrg		while(loop < 2) {
128300a23bdaSmrg			/* allocate UC bo for sDMA use */
128400a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
128500a23bdaSmrg						    sdma_write_length * sizeof(uint32_t),
128600a23bdaSmrg						    4096, AMDGPU_GEM_DOMAIN_GTT,
128700a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
128800a23bdaSmrg						    &bo_mc, &va_handle);
128900a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
12903f012e29Smrg
129100a23bdaSmrg			/* clear bo */
129200a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
12933f012e29Smrg
129400a23bdaSmrg			resources[0] = bo;
12953f012e29Smrg
129600a23bdaSmrg			/* fulfill PM4: test DMA write-linear */
129700a23bdaSmrg			i = j = 0;
129800a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
129900a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI)
130000a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
130100a23bdaSmrg								  sdma_write_length);
130200a23bdaSmrg				else
130300a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
130400a23bdaSmrg							       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
130500a23bdaSmrg				pm4[i++] = 0xffffffff & bo_mc;
130600a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
130700a23bdaSmrg				if (family_id >= AMDGPU_FAMILY_AI)
130800a23bdaSmrg					pm4[i++] = sdma_write_length - 1;
130900a23bdaSmrg				else if (family_id != AMDGPU_FAMILY_SI)
131000a23bdaSmrg					pm4[i++] = sdma_write_length;
131100a23bdaSmrg				while(j++ < sdma_write_length)
131200a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
131300a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
131400a23bdaSmrg				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
131500a23bdaSmrg				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
131600a23bdaSmrg				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
131700a23bdaSmrg				pm4[i++] = 0xfffffffc & bo_mc;
131800a23bdaSmrg				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
131900a23bdaSmrg				while(j++ < sdma_write_length)
132000a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
132100a23bdaSmrg			}
13223f012e29Smrg
132300a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
132400a23bdaSmrg						   ip_type, ring_id,
132500a23bdaSmrg						   i, pm4,
132600a23bdaSmrg						   1, resources,
132700a23bdaSmrg						   ib_info, ibs_request);
13283f012e29Smrg
132900a23bdaSmrg			/* verify if SDMA test result meets with expected */
133000a23bdaSmrg			i = 0;
133100a23bdaSmrg			while(i < sdma_write_length) {
133200a23bdaSmrg				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
133300a23bdaSmrg			}
13343f012e29Smrg
133500a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
133600a23bdaSmrg						     sdma_write_length * sizeof(uint32_t));
133700a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
133800a23bdaSmrg			loop++;
13393f012e29Smrg		}
13403f012e29Smrg	}
13413f012e29Smrg	/* clean resources */
13423f012e29Smrg	free(resources);
13433f012e29Smrg	free(ibs_request);
13443f012e29Smrg	free(ib_info);
13453f012e29Smrg	free(pm4);
13463f012e29Smrg
13473f012e29Smrg	/* end of test */
13483f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
13493f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13503f012e29Smrg}
13513f012e29Smrg
13523f012e29Smrgstatic void amdgpu_command_submission_sdma_write_linear(void)
13533f012e29Smrg{
13543f012e29Smrg	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
13553f012e29Smrg}
13563f012e29Smrg
13573f012e29Smrgstatic void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
13583f012e29Smrg{
13593f012e29Smrg	const int sdma_write_length = 1024 * 1024;
13603f012e29Smrg	const int pm4_dw = 256;
13613f012e29Smrg	amdgpu_context_handle context_handle;
13623f012e29Smrg	amdgpu_bo_handle bo;
13633f012e29Smrg	amdgpu_bo_handle *resources;
13643f012e29Smrg	uint32_t *pm4;
13653f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
13663f012e29Smrg	struct amdgpu_cs_request *ibs_request;
13673f012e29Smrg	uint64_t bo_mc;
13683f012e29Smrg	volatile uint32_t *bo_cpu;
136900a23bdaSmrg	int i, j, r, loop, ring_id;
13703f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
13713f012e29Smrg	amdgpu_va_handle va_handle;
137200a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
13733f012e29Smrg
13743f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
13753f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
13763f012e29Smrg
13773f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
13783f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
13793f012e29Smrg
13803f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
13813f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
13823f012e29Smrg
138300a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
138400a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
138500a23bdaSmrg
13863f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
13873f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
13883f012e29Smrg
13893f012e29Smrg	/* prepare resource */
13903f012e29Smrg	resources = calloc(1, sizeof(amdgpu_bo_handle));
13913f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
13923f012e29Smrg
139300a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
139400a23bdaSmrg		loop = 0;
139500a23bdaSmrg		while(loop < 2) {
139600a23bdaSmrg			/* allocate UC bo for sDMA use */
139700a23bdaSmrg			r = amdgpu_bo_alloc_and_map(device_handle,
139800a23bdaSmrg						    sdma_write_length, 4096,
139900a23bdaSmrg						    AMDGPU_GEM_DOMAIN_GTT,
140000a23bdaSmrg						    gtt_flags[loop], &bo, (void**)&bo_cpu,
140100a23bdaSmrg						    &bo_mc, &va_handle);
140200a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
14033f012e29Smrg
140400a23bdaSmrg			/* clear bo */
140500a23bdaSmrg			memset((void*)bo_cpu, 0, sdma_write_length);
14063f012e29Smrg
140700a23bdaSmrg			resources[0] = bo;
14083f012e29Smrg
140900a23bdaSmrg			/* fulfill PM4: test DMA const fill */
141000a23bdaSmrg			i = j = 0;
141100a23bdaSmrg			if (ip_type == AMDGPU_HW_IP_DMA) {
141200a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
141300a23bdaSmrg					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
141400a23bdaSmrg								  0, 0, 0,
141500a23bdaSmrg								  sdma_write_length / 4);
141600a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
141700a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
141800a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
141900a23bdaSmrg				} else {
142000a23bdaSmrg					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
142100a23bdaSmrg							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
142200a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
142300a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
142400a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
142500a23bdaSmrg					if (family_id >= AMDGPU_FAMILY_AI)
142600a23bdaSmrg						pm4[i++] = sdma_write_length - 1;
142700a23bdaSmrg					else
142800a23bdaSmrg						pm4[i++] = sdma_write_length;
142900a23bdaSmrg				}
143000a23bdaSmrg			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
143100a23bdaSmrg				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
143200a23bdaSmrg				if (family_id == AMDGPU_FAMILY_SI) {
143300a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
143400a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
143500a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
143600a23bdaSmrg						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
143700a23bdaSmrg						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
143800a23bdaSmrg						   PACKET3_DMA_DATA_SI_CP_SYNC;
143900a23bdaSmrg					pm4[i++] = 0xffffffff & bo_mc;
144000a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1441d8807b2fSmrg					pm4[i++] = sdma_write_length;
144200a23bdaSmrg				} else {
144300a23bdaSmrg					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
144400a23bdaSmrg					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
144500a23bdaSmrg						   PACKET3_DMA_DATA_DST_SEL(0) |
144600a23bdaSmrg						   PACKET3_DMA_DATA_SRC_SEL(2) |
144700a23bdaSmrg						   PACKET3_DMA_DATA_CP_SYNC;
144800a23bdaSmrg					pm4[i++] = 0xdeadbeaf;
144900a23bdaSmrg					pm4[i++] = 0;
145000a23bdaSmrg					pm4[i++] = 0xfffffffc & bo_mc;
145100a23bdaSmrg					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
145200a23bdaSmrg					pm4[i++] = sdma_write_length;
145300a23bdaSmrg				}
1454d8807b2fSmrg			}
14553f012e29Smrg
145600a23bdaSmrg			amdgpu_test_exec_cs_helper(context_handle,
145700a23bdaSmrg						   ip_type, ring_id,
145800a23bdaSmrg						   i, pm4,
145900a23bdaSmrg						   1, resources,
146000a23bdaSmrg						   ib_info, ibs_request);
14613f012e29Smrg
146200a23bdaSmrg			/* verify if SDMA test result meets with expected */
146300a23bdaSmrg			i = 0;
146400a23bdaSmrg			while(i < (sdma_write_length / 4)) {
146500a23bdaSmrg				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
146600a23bdaSmrg			}
14673f012e29Smrg
146800a23bdaSmrg			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
146900a23bdaSmrg						     sdma_write_length);
147000a23bdaSmrg			CU_ASSERT_EQUAL(r, 0);
147100a23bdaSmrg			loop++;
147200a23bdaSmrg		}
14733f012e29Smrg	}
14743f012e29Smrg	/* clean resources */
14753f012e29Smrg	free(resources);
14763f012e29Smrg	free(ibs_request);
14773f012e29Smrg	free(ib_info);
14783f012e29Smrg	free(pm4);
14793f012e29Smrg
14803f012e29Smrg	/* end of test */
14813f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
14823f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
14833f012e29Smrg}
14843f012e29Smrg
14853f012e29Smrgstatic void amdgpu_command_submission_sdma_const_fill(void)
14863f012e29Smrg{
14873f012e29Smrg	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
14883f012e29Smrg}
14893f012e29Smrg
14903f012e29Smrgstatic void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
14913f012e29Smrg{
14923f012e29Smrg	const int sdma_write_length = 1024;
14933f012e29Smrg	const int pm4_dw = 256;
14943f012e29Smrg	amdgpu_context_handle context_handle;
14953f012e29Smrg	amdgpu_bo_handle bo1, bo2;
14963f012e29Smrg	amdgpu_bo_handle *resources;
14973f012e29Smrg	uint32_t *pm4;
14983f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
14993f012e29Smrg	struct amdgpu_cs_request *ibs_request;
15003f012e29Smrg	uint64_t bo1_mc, bo2_mc;
15013f012e29Smrg	volatile unsigned char *bo1_cpu, *bo2_cpu;
150200a23bdaSmrg	int i, j, r, loop1, loop2, ring_id;
15033f012e29Smrg	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
15043f012e29Smrg	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
150500a23bdaSmrg	struct drm_amdgpu_info_hw_ip hw_ip_info;
15063f012e29Smrg
15073f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
15083f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
15093f012e29Smrg
15103f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
15113f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
15123f012e29Smrg
15133f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
15143f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
15153f012e29Smrg
151600a23bdaSmrg	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
151700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
151800a23bdaSmrg
15193f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
15203f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
15213f012e29Smrg
15223f012e29Smrg	/* prepare resource */
15233f012e29Smrg	resources = calloc(2, sizeof(amdgpu_bo_handle));
15243f012e29Smrg	CU_ASSERT_NOT_EQUAL(resources, NULL);
15253f012e29Smrg
152600a23bdaSmrg	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
152700a23bdaSmrg		loop1 = loop2 = 0;
152800a23bdaSmrg		/* run 9 circle to test all mapping combination */
152900a23bdaSmrg		while(loop1 < 2) {
153000a23bdaSmrg			while(loop2 < 2) {
153100a23bdaSmrg				/* allocate UC bo1for sDMA use */
153200a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
153300a23bdaSmrg							    sdma_write_length, 4096,
153400a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
153500a23bdaSmrg							    gtt_flags[loop1], &bo1,
153600a23bdaSmrg							    (void**)&bo1_cpu, &bo1_mc,
153700a23bdaSmrg							    &bo1_va_handle);
153800a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
153900a23bdaSmrg
154000a23bdaSmrg				/* set bo1 */
154100a23bdaSmrg				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
154200a23bdaSmrg
154300a23bdaSmrg				/* allocate UC bo2 for sDMA use */
154400a23bdaSmrg				r = amdgpu_bo_alloc_and_map(device_handle,
154500a23bdaSmrg							    sdma_write_length, 4096,
154600a23bdaSmrg							    AMDGPU_GEM_DOMAIN_GTT,
154700a23bdaSmrg							    gtt_flags[loop2], &bo2,
154800a23bdaSmrg							    (void**)&bo2_cpu, &bo2_mc,
154900a23bdaSmrg							    &bo2_va_handle);
155000a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
155100a23bdaSmrg
155200a23bdaSmrg				/* clear bo2 */
155300a23bdaSmrg				memset((void*)bo2_cpu, 0, sdma_write_length);
155400a23bdaSmrg
155500a23bdaSmrg				resources[0] = bo1;
155600a23bdaSmrg				resources[1] = bo2;
155700a23bdaSmrg
155800a23bdaSmrg				/* fulfill PM4: test DMA copy linear */
155900a23bdaSmrg				i = j = 0;
156000a23bdaSmrg				if (ip_type == AMDGPU_HW_IP_DMA) {
156100a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
156200a23bdaSmrg						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
156300a23bdaSmrg									  0, 0, 0,
156400a23bdaSmrg									  sdma_write_length);
156500a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
156600a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
156700a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
156800a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
156900a23bdaSmrg					} else {
157000a23bdaSmrg						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
157100a23bdaSmrg								       SDMA_COPY_SUB_OPCODE_LINEAR,
157200a23bdaSmrg								       0);
157300a23bdaSmrg						if (family_id >= AMDGPU_FAMILY_AI)
157400a23bdaSmrg							pm4[i++] = sdma_write_length - 1;
157500a23bdaSmrg						else
157600a23bdaSmrg							pm4[i++] = sdma_write_length;
157700a23bdaSmrg						pm4[i++] = 0;
157800a23bdaSmrg						pm4[i++] = 0xffffffff & bo1_mc;
157900a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
158000a23bdaSmrg						pm4[i++] = 0xffffffff & bo2_mc;
158100a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
158200a23bdaSmrg					}
158300a23bdaSmrg				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
158400a23bdaSmrg					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
158500a23bdaSmrg					if (family_id == AMDGPU_FAMILY_SI) {
158600a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
158700a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
158800a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
158900a23bdaSmrg							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
159000a23bdaSmrg							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
159100a23bdaSmrg							   PACKET3_DMA_DATA_SI_CP_SYNC |
159200a23bdaSmrg							   (0xffff00000000 & bo1_mc) >> 32;
159300a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
159400a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1595d8807b2fSmrg						pm4[i++] = sdma_write_length;
159600a23bdaSmrg					} else {
159700a23bdaSmrg						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
159800a23bdaSmrg						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
159900a23bdaSmrg							   PACKET3_DMA_DATA_DST_SEL(0) |
160000a23bdaSmrg							   PACKET3_DMA_DATA_SRC_SEL(0) |
160100a23bdaSmrg							   PACKET3_DMA_DATA_CP_SYNC;
160200a23bdaSmrg						pm4[i++] = 0xfffffffc & bo1_mc;
160300a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
160400a23bdaSmrg						pm4[i++] = 0xfffffffc & bo2_mc;
160500a23bdaSmrg						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
160600a23bdaSmrg						pm4[i++] = sdma_write_length;
160700a23bdaSmrg					}
1608d8807b2fSmrg				}
16093f012e29Smrg
161000a23bdaSmrg				amdgpu_test_exec_cs_helper(context_handle,
161100a23bdaSmrg							   ip_type, ring_id,
161200a23bdaSmrg							   i, pm4,
161300a23bdaSmrg							   2, resources,
161400a23bdaSmrg							   ib_info, ibs_request);
16153f012e29Smrg
161600a23bdaSmrg				/* verify if SDMA test result meets with expected */
161700a23bdaSmrg				i = 0;
161800a23bdaSmrg				while(i < sdma_write_length) {
161900a23bdaSmrg					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
162000a23bdaSmrg				}
162100a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
162200a23bdaSmrg							     sdma_write_length);
162300a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
162400a23bdaSmrg				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
162500a23bdaSmrg							     sdma_write_length);
162600a23bdaSmrg				CU_ASSERT_EQUAL(r, 0);
162700a23bdaSmrg				loop2++;
16283f012e29Smrg			}
162900a23bdaSmrg			loop1++;
16303f012e29Smrg		}
16313f012e29Smrg	}
16323f012e29Smrg	/* clean resources */
16333f012e29Smrg	free(resources);
16343f012e29Smrg	free(ibs_request);
16353f012e29Smrg	free(ib_info);
16363f012e29Smrg	free(pm4);
16373f012e29Smrg
16383f012e29Smrg	/* end of test */
16393f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
16403f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
16413f012e29Smrg}
16423f012e29Smrg
16433f012e29Smrgstatic void amdgpu_command_submission_sdma_copy_linear(void)
16443f012e29Smrg{
16453f012e29Smrg	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
16463f012e29Smrg}
16473f012e29Smrg
16483f012e29Smrgstatic void amdgpu_command_submission_sdma(void)
16493f012e29Smrg{
16503f012e29Smrg	amdgpu_command_submission_sdma_write_linear();
16513f012e29Smrg	amdgpu_command_submission_sdma_const_fill();
16523f012e29Smrg	amdgpu_command_submission_sdma_copy_linear();
16533f012e29Smrg}
16543f012e29Smrg
1655d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1656d8807b2fSmrg{
1657d8807b2fSmrg	amdgpu_context_handle context_handle;
1658d8807b2fSmrg	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1659d8807b2fSmrg	void *ib_result_cpu, *ib_result_ce_cpu;
1660d8807b2fSmrg	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1661d8807b2fSmrg	struct amdgpu_cs_request ibs_request[2] = {0};
1662d8807b2fSmrg	struct amdgpu_cs_ib_info ib_info[2];
1663d8807b2fSmrg	struct amdgpu_cs_fence fence_status[2] = {0};
1664d8807b2fSmrg	uint32_t *ptr;
1665d8807b2fSmrg	uint32_t expired;
1666d8807b2fSmrg	amdgpu_bo_list_handle bo_list;
1667d8807b2fSmrg	amdgpu_va_handle va_handle, va_handle_ce;
1668d8807b2fSmrg	int r;
1669d8807b2fSmrg	int i = 0, ib_cs_num = 2;
1670d8807b2fSmrg
1671d8807b2fSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1672d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1673d8807b2fSmrg
1674d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1675d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
1676d8807b2fSmrg				    &ib_result_handle, &ib_result_cpu,
1677d8807b2fSmrg				    &ib_result_mc_address, &va_handle);
1678d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1679d8807b2fSmrg
1680d8807b2fSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1681d8807b2fSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
1682d8807b2fSmrg				    &ib_result_ce_handle, &ib_result_ce_cpu,
1683d8807b2fSmrg				    &ib_result_ce_mc_address, &va_handle_ce);
1684d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1685d8807b2fSmrg
1686d8807b2fSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1687d8807b2fSmrg			       ib_result_ce_handle, &bo_list);
1688d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1689d8807b2fSmrg
1690d8807b2fSmrg	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1691d8807b2fSmrg
1692d8807b2fSmrg	/* IT_SET_CE_DE_COUNTERS */
1693d8807b2fSmrg	ptr = ib_result_ce_cpu;
1694d8807b2fSmrg	if (family_id != AMDGPU_FAMILY_SI) {
1695d8807b2fSmrg		ptr[i++] = 0xc0008900;
1696d8807b2fSmrg		ptr[i++] = 0;
1697d8807b2fSmrg	}
1698d8807b2fSmrg	ptr[i++] = 0xc0008400;
1699d8807b2fSmrg	ptr[i++] = 1;
1700d8807b2fSmrg	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1701d8807b2fSmrg	ib_info[0].size = i;
1702d8807b2fSmrg	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1703d8807b2fSmrg
1704d8807b2fSmrg	/* IT_WAIT_ON_CE_COUNTER */
1705d8807b2fSmrg	ptr = ib_result_cpu;
1706d8807b2fSmrg	ptr[0] = 0xc0008600;
1707d8807b2fSmrg	ptr[1] = 0x00000001;
1708d8807b2fSmrg	ib_info[1].ib_mc_address = ib_result_mc_address;
1709d8807b2fSmrg	ib_info[1].size = 2;
1710d8807b2fSmrg
1711d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
1712d8807b2fSmrg		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1713d8807b2fSmrg		ibs_request[i].number_of_ibs = 2;
1714d8807b2fSmrg		ibs_request[i].ibs = ib_info;
1715d8807b2fSmrg		ibs_request[i].resources = bo_list;
1716d8807b2fSmrg		ibs_request[i].fence_info.handle = NULL;
1717d8807b2fSmrg	}
1718d8807b2fSmrg
1719d8807b2fSmrg	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1720d8807b2fSmrg
1721d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1722d8807b2fSmrg
1723d8807b2fSmrg	for (i = 0; i < ib_cs_num; i++) {
1724d8807b2fSmrg		fence_status[i].context = context_handle;
1725d8807b2fSmrg		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1726d8807b2fSmrg		fence_status[i].fence = ibs_request[i].seq_no;
1727d8807b2fSmrg	}
1728d8807b2fSmrg
1729d8807b2fSmrg	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1730d8807b2fSmrg				AMDGPU_TIMEOUT_INFINITE,
1731d8807b2fSmrg				&expired, NULL);
1732d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1733d8807b2fSmrg
1734d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1735d8807b2fSmrg				     ib_result_mc_address, 4096);
1736d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1737d8807b2fSmrg
1738d8807b2fSmrg	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1739d8807b2fSmrg				     ib_result_ce_mc_address, 4096);
1740d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1741d8807b2fSmrg
1742d8807b2fSmrg	r = amdgpu_bo_list_destroy(bo_list);
1743d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1744d8807b2fSmrg
1745d8807b2fSmrg	r = amdgpu_cs_ctx_free(context_handle);
1746d8807b2fSmrg	CU_ASSERT_EQUAL(r, 0);
1747d8807b2fSmrg}
1748d8807b2fSmrg
1749d8807b2fSmrgstatic void amdgpu_command_submission_multi_fence(void)
1750d8807b2fSmrg{
1751d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(true);
1752d8807b2fSmrg	amdgpu_command_submission_multi_fence_wait_all(false);
1753d8807b2fSmrg}
1754d8807b2fSmrg
17553f012e29Smrgstatic void amdgpu_userptr_test(void)
17563f012e29Smrg{
17573f012e29Smrg	int i, r, j;
17583f012e29Smrg	uint32_t *pm4 = NULL;
17593f012e29Smrg	uint64_t bo_mc;
17603f012e29Smrg	void *ptr = NULL;
17613f012e29Smrg	int pm4_dw = 256;
17623f012e29Smrg	int sdma_write_length = 4;
17633f012e29Smrg	amdgpu_bo_handle handle;
17643f012e29Smrg	amdgpu_context_handle context_handle;
17653f012e29Smrg	struct amdgpu_cs_ib_info *ib_info;
17663f012e29Smrg	struct amdgpu_cs_request *ibs_request;
17673f012e29Smrg	amdgpu_bo_handle buf_handle;
17683f012e29Smrg	amdgpu_va_handle va_handle;
17693f012e29Smrg
17703f012e29Smrg	pm4 = calloc(pm4_dw, sizeof(*pm4));
17713f012e29Smrg	CU_ASSERT_NOT_EQUAL(pm4, NULL);
17723f012e29Smrg
17733f012e29Smrg	ib_info = calloc(1, sizeof(*ib_info));
17743f012e29Smrg	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
17753f012e29Smrg
17763f012e29Smrg	ibs_request = calloc(1, sizeof(*ibs_request));
17773f012e29Smrg	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
17783f012e29Smrg
17793f012e29Smrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
17803f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17813f012e29Smrg
17823f012e29Smrg	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
17833f012e29Smrg	CU_ASSERT_NOT_EQUAL(ptr, NULL);
17843f012e29Smrg	memset(ptr, 0, BUFFER_SIZE);
17853f012e29Smrg
17863f012e29Smrg	r = amdgpu_create_bo_from_user_mem(device_handle,
17873f012e29Smrg					   ptr, BUFFER_SIZE, &buf_handle);
17883f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17893f012e29Smrg
17903f012e29Smrg	r = amdgpu_va_range_alloc(device_handle,
17913f012e29Smrg				  amdgpu_gpu_va_range_general,
17923f012e29Smrg				  BUFFER_SIZE, 1, 0, &bo_mc,
17933f012e29Smrg				  &va_handle, 0);
17943f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17953f012e29Smrg
17963f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
17973f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
17983f012e29Smrg
17993f012e29Smrg	handle = buf_handle;
18003f012e29Smrg
18013f012e29Smrg	j = i = 0;
1802d8807b2fSmrg
1803d8807b2fSmrg	if (family_id == AMDGPU_FAMILY_SI)
1804d8807b2fSmrg		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1805d8807b2fSmrg				sdma_write_length);
1806d8807b2fSmrg	else
1807d8807b2fSmrg		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1808d8807b2fSmrg				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
18093f012e29Smrg	pm4[i++] = 0xffffffff & bo_mc;
18103f012e29Smrg	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1811d8807b2fSmrg	if (family_id >= AMDGPU_FAMILY_AI)
1812d8807b2fSmrg		pm4[i++] = sdma_write_length - 1;
1813d8807b2fSmrg	else if (family_id != AMDGPU_FAMILY_SI)
1814d8807b2fSmrg		pm4[i++] = sdma_write_length;
18153f012e29Smrg
18163f012e29Smrg	while (j++ < sdma_write_length)
18173f012e29Smrg		pm4[i++] = 0xdeadbeaf;
18183f012e29Smrg
181900a23bdaSmrg	if (!fork()) {
182000a23bdaSmrg		pm4[0] = 0x0;
182100a23bdaSmrg		exit(0);
182200a23bdaSmrg	}
182300a23bdaSmrg
18243f012e29Smrg	amdgpu_test_exec_cs_helper(context_handle,
18253f012e29Smrg				   AMDGPU_HW_IP_DMA, 0,
18263f012e29Smrg				   i, pm4,
18273f012e29Smrg				   1, &handle,
18283f012e29Smrg				   ib_info, ibs_request);
18293f012e29Smrg	i = 0;
18303f012e29Smrg	while (i < sdma_write_length) {
18313f012e29Smrg		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
18323f012e29Smrg	}
18333f012e29Smrg	free(ibs_request);
18343f012e29Smrg	free(ib_info);
18353f012e29Smrg	free(pm4);
18363f012e29Smrg
18373f012e29Smrg	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
18383f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18393f012e29Smrg	r = amdgpu_va_range_free(va_handle);
18403f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18413f012e29Smrg	r = amdgpu_bo_free(buf_handle);
18423f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
18433f012e29Smrg	free(ptr);
18443f012e29Smrg
18453f012e29Smrg	r = amdgpu_cs_ctx_free(context_handle);
18463f012e29Smrg	CU_ASSERT_EQUAL(r, 0);
184700a23bdaSmrg
184800a23bdaSmrg	wait(NULL);
184900a23bdaSmrg}
185000a23bdaSmrg
185100a23bdaSmrgstatic void amdgpu_sync_dependency_test(void)
185200a23bdaSmrg{
185300a23bdaSmrg	amdgpu_context_handle context_handle[2];
185400a23bdaSmrg	amdgpu_bo_handle ib_result_handle;
185500a23bdaSmrg	void *ib_result_cpu;
185600a23bdaSmrg	uint64_t ib_result_mc_address;
185700a23bdaSmrg	struct amdgpu_cs_request ibs_request;
185800a23bdaSmrg	struct amdgpu_cs_ib_info ib_info;
185900a23bdaSmrg	struct amdgpu_cs_fence fence_status;
186000a23bdaSmrg	uint32_t expired;
186100a23bdaSmrg	int i, j, r;
186200a23bdaSmrg	amdgpu_bo_list_handle bo_list;
186300a23bdaSmrg	amdgpu_va_handle va_handle;
186400a23bdaSmrg	static uint32_t *ptr;
186500a23bdaSmrg	uint64_t seq_no;
186600a23bdaSmrg
186700a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
186800a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
186900a23bdaSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
187000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
187100a23bdaSmrg
187200a23bdaSmrg	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
187300a23bdaSmrg			AMDGPU_GEM_DOMAIN_GTT, 0,
187400a23bdaSmrg						    &ib_result_handle, &ib_result_cpu,
187500a23bdaSmrg						    &ib_result_mc_address, &va_handle);
187600a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
187700a23bdaSmrg
187800a23bdaSmrg	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
187900a23bdaSmrg			       &bo_list);
188000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
188100a23bdaSmrg
188200a23bdaSmrg	ptr = ib_result_cpu;
188300a23bdaSmrg	i = 0;
188400a23bdaSmrg
188500a23bdaSmrg	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
188600a23bdaSmrg
188700a23bdaSmrg	/* Dispatch minimal init config and verify it's executed */
188800a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
188900a23bdaSmrg	ptr[i++] = 0x80000000;
189000a23bdaSmrg	ptr[i++] = 0x80000000;
189100a23bdaSmrg
189200a23bdaSmrg	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
189300a23bdaSmrg	ptr[i++] = 0x80000000;
189400a23bdaSmrg
189500a23bdaSmrg
189600a23bdaSmrg	/* Program compute regs */
189700a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
189800a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
189900a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
190000a23bdaSmrg	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
190100a23bdaSmrg
190200a23bdaSmrg
190300a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
190400a23bdaSmrg	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
190500a23bdaSmrg	/*
190600a23bdaSmrg	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
190700a23bdaSmrg	                                      SGPRS = 1
190800a23bdaSmrg	                                      PRIORITY = 0
190900a23bdaSmrg	                                      FLOAT_MODE = 192 (0xc0)
191000a23bdaSmrg	                                      PRIV = 0
191100a23bdaSmrg	                                      DX10_CLAMP = 1
191200a23bdaSmrg	                                      DEBUG_MODE = 0
191300a23bdaSmrg	                                      IEEE_MODE = 0
191400a23bdaSmrg	                                      BULKY = 0
191500a23bdaSmrg	                                      CDBG_USER = 0
191600a23bdaSmrg	 *
191700a23bdaSmrg	 */
191800a23bdaSmrg	ptr[i++] = 0x002c0040;
191900a23bdaSmrg
192000a23bdaSmrg
192100a23bdaSmrg	/*
192200a23bdaSmrg	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
192300a23bdaSmrg	                                      USER_SGPR = 8
192400a23bdaSmrg	                                      TRAP_PRESENT = 0
192500a23bdaSmrg	                                      TGID_X_EN = 0
192600a23bdaSmrg	                                      TGID_Y_EN = 0
192700a23bdaSmrg	                                      TGID_Z_EN = 0
192800a23bdaSmrg	                                      TG_SIZE_EN = 0
192900a23bdaSmrg	                                      TIDIG_COMP_CNT = 0
193000a23bdaSmrg	                                      EXCP_EN_MSB = 0
193100a23bdaSmrg	                                      LDS_SIZE = 0
193200a23bdaSmrg	                                      EXCP_EN = 0
193300a23bdaSmrg	 *
193400a23bdaSmrg	 */
193500a23bdaSmrg	ptr[i++] = 0x00000010;
193600a23bdaSmrg
193700a23bdaSmrg
193800a23bdaSmrg/*
193900a23bdaSmrg * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
194000a23bdaSmrg                                         WAVESIZE = 0
194100a23bdaSmrg *
194200a23bdaSmrg */
194300a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
194400a23bdaSmrg	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
194500a23bdaSmrg	ptr[i++] = 0x00000100;
194600a23bdaSmrg
194700a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
194800a23bdaSmrg	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
194900a23bdaSmrg	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
195000a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
195100a23bdaSmrg
195200a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
195300a23bdaSmrg	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
195400a23bdaSmrg	ptr[i++] = 0;
195500a23bdaSmrg
195600a23bdaSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
195700a23bdaSmrg	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
195800a23bdaSmrg	ptr[i++] = 1;
195900a23bdaSmrg	ptr[i++] = 1;
196000a23bdaSmrg	ptr[i++] = 1;
196100a23bdaSmrg
196200a23bdaSmrg
196300a23bdaSmrg	/* Dispatch */
196400a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
196500a23bdaSmrg	ptr[i++] = 1;
196600a23bdaSmrg	ptr[i++] = 1;
196700a23bdaSmrg	ptr[i++] = 1;
196800a23bdaSmrg	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
196900a23bdaSmrg
197000a23bdaSmrg
197100a23bdaSmrg	while (i & 7)
197200a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
197300a23bdaSmrg
197400a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
197500a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address;
197600a23bdaSmrg	ib_info.size = i;
197700a23bdaSmrg
197800a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
197900a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
198000a23bdaSmrg	ibs_request.ring = 0;
198100a23bdaSmrg	ibs_request.number_of_ibs = 1;
198200a23bdaSmrg	ibs_request.ibs = &ib_info;
198300a23bdaSmrg	ibs_request.resources = bo_list;
198400a23bdaSmrg	ibs_request.fence_info.handle = NULL;
198500a23bdaSmrg
198600a23bdaSmrg	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
198700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
198800a23bdaSmrg	seq_no = ibs_request.seq_no;
198900a23bdaSmrg
199000a23bdaSmrg
199100a23bdaSmrg
199200a23bdaSmrg	/* Prepare second command with dependency on the first */
199300a23bdaSmrg	j = i;
199400a23bdaSmrg	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
199500a23bdaSmrg	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
199600a23bdaSmrg	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
199700a23bdaSmrg	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
199800a23bdaSmrg	ptr[i++] = 99;
199900a23bdaSmrg
200000a23bdaSmrg	while (i & 7)
200100a23bdaSmrg		ptr[i++] =  0xffff1000; /* type3 nop packet */
200200a23bdaSmrg
200300a23bdaSmrg	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
200400a23bdaSmrg	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
200500a23bdaSmrg	ib_info.size = i - j;
200600a23bdaSmrg
200700a23bdaSmrg	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
200800a23bdaSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
200900a23bdaSmrg	ibs_request.ring = 0;
201000a23bdaSmrg	ibs_request.number_of_ibs = 1;
201100a23bdaSmrg	ibs_request.ibs = &ib_info;
201200a23bdaSmrg	ibs_request.resources = bo_list;
201300a23bdaSmrg	ibs_request.fence_info.handle = NULL;
201400a23bdaSmrg
201500a23bdaSmrg	ibs_request.number_of_dependencies = 1;
201600a23bdaSmrg
201700a23bdaSmrg	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
201800a23bdaSmrg	ibs_request.dependencies[0].context = context_handle[1];
201900a23bdaSmrg	ibs_request.dependencies[0].ip_instance = 0;
202000a23bdaSmrg	ibs_request.dependencies[0].ring = 0;
202100a23bdaSmrg	ibs_request.dependencies[0].fence = seq_no;
202200a23bdaSmrg
202300a23bdaSmrg
202400a23bdaSmrg	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
202500a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
202600a23bdaSmrg
202700a23bdaSmrg
202800a23bdaSmrg	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
202900a23bdaSmrg	fence_status.context = context_handle[0];
203000a23bdaSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
203100a23bdaSmrg	fence_status.ip_instance = 0;
203200a23bdaSmrg	fence_status.ring = 0;
203300a23bdaSmrg	fence_status.fence = ibs_request.seq_no;
203400a23bdaSmrg
203500a23bdaSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
203600a23bdaSmrg		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
203700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
203800a23bdaSmrg
203900a23bdaSmrg	/* Expect the second command to wait for shader to complete */
204000a23bdaSmrg	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
204100a23bdaSmrg
204200a23bdaSmrg	r = amdgpu_bo_list_destroy(bo_list);
204300a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
204400a23bdaSmrg
204500a23bdaSmrg	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
204600a23bdaSmrg				     ib_result_mc_address, 4096);
204700a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
204800a23bdaSmrg
204900a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[0]);
205000a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
205100a23bdaSmrg	r = amdgpu_cs_ctx_free(context_handle[1]);
205200a23bdaSmrg	CU_ASSERT_EQUAL(r, 0);
205300a23bdaSmrg
205400a23bdaSmrg	free(ibs_request.dependencies);
20553f012e29Smrg}
20565324fb0dSmrg
20575324fb0dSmrgstatic int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
20585324fb0dSmrg					   int cs_type)
20595324fb0dSmrg{
20605324fb0dSmrg	uint32_t shader_size;
20615324fb0dSmrg	const uint32_t *shader;
20625324fb0dSmrg
20635324fb0dSmrg	switch (cs_type) {
20645324fb0dSmrg		case CS_BUFFERCLEAR:
20655324fb0dSmrg			shader = bufferclear_cs_shader_gfx9;
20665324fb0dSmrg			shader_size = sizeof(bufferclear_cs_shader_gfx9);
20675324fb0dSmrg			break;
20685324fb0dSmrg		case CS_BUFFERCOPY:
20695324fb0dSmrg			shader = buffercopy_cs_shader_gfx9;
20705324fb0dSmrg			shader_size = sizeof(buffercopy_cs_shader_gfx9);
20715324fb0dSmrg			break;
20725324fb0dSmrg		default:
20735324fb0dSmrg			return -1;
20745324fb0dSmrg			break;
20755324fb0dSmrg	}
20765324fb0dSmrg
20775324fb0dSmrg	memcpy(ptr, shader, shader_size);
20785324fb0dSmrg	return 0;
20795324fb0dSmrg}
20805324fb0dSmrg
20815324fb0dSmrgstatic int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
20825324fb0dSmrg{
20835324fb0dSmrg	int i = 0;
20845324fb0dSmrg
20855324fb0dSmrg	/* Write context control and load shadowing register if necessary */
20865324fb0dSmrg	if (ip_type == AMDGPU_HW_IP_GFX) {
20875324fb0dSmrg		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
20885324fb0dSmrg		ptr[i++] = 0x80000000;
20895324fb0dSmrg		ptr[i++] = 0x80000000;
20905324fb0dSmrg	}
20915324fb0dSmrg
20925324fb0dSmrg	/* Issue commands to set default compute state. */
20935324fb0dSmrg	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
20945324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
20955324fb0dSmrg	ptr[i++] = 0x204;
20965324fb0dSmrg	i += 3;
20975324fb0dSmrg	/* clear mmCOMPUTE_RESOURCE_LIMITS */
20985324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
20995324fb0dSmrg	ptr[i++] = 0x215;
21005324fb0dSmrg	ptr[i++] = 0;
21015324fb0dSmrg	/* clear mmCOMPUTE_TMPRING_SIZE */
21025324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
21035324fb0dSmrg	ptr[i++] = 0x218;
21045324fb0dSmrg	ptr[i++] = 0;
21055324fb0dSmrg
21065324fb0dSmrg	return i;
21075324fb0dSmrg}
21085324fb0dSmrg
21095324fb0dSmrgstatic int amdgpu_dispatch_write_cumask(uint32_t *ptr)
21105324fb0dSmrg{
21115324fb0dSmrg	int i = 0;
21125324fb0dSmrg
21135324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
21145324fb0dSmrg	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
21155324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
21165324fb0dSmrg	ptr[i++] = 0x216;
21175324fb0dSmrg	ptr[i++] = 0xffffffff;
21185324fb0dSmrg	ptr[i++] = 0xffffffff;
21195324fb0dSmrg	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
21205324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
21215324fb0dSmrg	ptr[i++] = 0x219;
21225324fb0dSmrg	ptr[i++] = 0xffffffff;
21235324fb0dSmrg	ptr[i++] = 0xffffffff;
21245324fb0dSmrg
21255324fb0dSmrg	return i;
21265324fb0dSmrg}
21275324fb0dSmrg
21285324fb0dSmrgstatic int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
21295324fb0dSmrg{
21305324fb0dSmrg	int i, j;
21315324fb0dSmrg
21325324fb0dSmrg	i = 0;
21335324fb0dSmrg
21345324fb0dSmrg	/* Writes shader state to HW */
21355324fb0dSmrg	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
21365324fb0dSmrg	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
21375324fb0dSmrg	ptr[i++] = 0x20c;
21385324fb0dSmrg	ptr[i++] = (shader_addr >> 8);
21395324fb0dSmrg	ptr[i++] = (shader_addr >> 40);
21405324fb0dSmrg	/* write sh regs*/
21415324fb0dSmrg	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
21425324fb0dSmrg		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
21435324fb0dSmrg		/* - Gfx9ShRegBase */
21445324fb0dSmrg		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
21455324fb0dSmrg		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
21465324fb0dSmrg	}
21475324fb0dSmrg
21485324fb0dSmrg	return i;
21495324fb0dSmrg}
21505324fb0dSmrg
21515324fb0dSmrgstatic void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
21525324fb0dSmrg					 uint32_t ip_type,
21535324fb0dSmrg					 uint32_t ring)
21545324fb0dSmrg{
21555324fb0dSmrg	amdgpu_context_handle context_handle;
21565324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
21575324fb0dSmrg	volatile unsigned char *ptr_dst;
21585324fb0dSmrg	void *ptr_shader;
21595324fb0dSmrg	uint32_t *ptr_cmd;
21605324fb0dSmrg	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
21615324fb0dSmrg	amdgpu_va_handle va_dst, va_shader, va_cmd;
21625324fb0dSmrg	int i, r;
21635324fb0dSmrg	int bo_dst_size = 16384;
21645324fb0dSmrg	int bo_shader_size = 4096;
21655324fb0dSmrg	int bo_cmd_size = 4096;
21665324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
21675324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
21685324fb0dSmrg	amdgpu_bo_list_handle bo_list;
21695324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
21705324fb0dSmrg	uint32_t expired;
21715324fb0dSmrg
21725324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
21735324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
21745324fb0dSmrg
21755324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
21765324fb0dSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
21775324fb0dSmrg					&bo_cmd, (void **)&ptr_cmd,
21785324fb0dSmrg					&mc_address_cmd, &va_cmd);
21795324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
21805324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
21815324fb0dSmrg
21825324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
21835324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
21845324fb0dSmrg					&bo_shader, &ptr_shader,
21855324fb0dSmrg					&mc_address_shader, &va_shader);
21865324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
21875324fb0dSmrg
21885324fb0dSmrg	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
21895324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
21905324fb0dSmrg
21915324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
21925324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
21935324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
21945324fb0dSmrg					&mc_address_dst, &va_dst);
21955324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
21965324fb0dSmrg
21975324fb0dSmrg	i = 0;
21985324fb0dSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
21995324fb0dSmrg
22005324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
22015324fb0dSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
22025324fb0dSmrg
22035324fb0dSmrg	/* Writes shader state to HW */
22045324fb0dSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
22055324fb0dSmrg
22065324fb0dSmrg	/* Write constant data */
22075324fb0dSmrg	/* Writes the UAV constant data to the SGPRs. */
22085324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
22095324fb0dSmrg	ptr_cmd[i++] = 0x240;
22105324fb0dSmrg	ptr_cmd[i++] = mc_address_dst;
22115324fb0dSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
22125324fb0dSmrg	ptr_cmd[i++] = 0x400;
22135324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
22145324fb0dSmrg
22155324fb0dSmrg	/* Sets a range of pixel shader constants */
22165324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
22175324fb0dSmrg	ptr_cmd[i++] = 0x244;
22185324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
22195324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
22205324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
22215324fb0dSmrg	ptr_cmd[i++] = 0x22222222;
22225324fb0dSmrg
22235324fb0dSmrg	/* dispatch direct command */
22245324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
22255324fb0dSmrg	ptr_cmd[i++] = 0x10;
22265324fb0dSmrg	ptr_cmd[i++] = 1;
22275324fb0dSmrg	ptr_cmd[i++] = 1;
22285324fb0dSmrg	ptr_cmd[i++] = 1;
22295324fb0dSmrg
22305324fb0dSmrg	while (i & 7)
22315324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
22325324fb0dSmrg
22335324fb0dSmrg	resources[0] = bo_dst;
22345324fb0dSmrg	resources[1] = bo_shader;
22355324fb0dSmrg	resources[2] = bo_cmd;
22365324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
22375324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22385324fb0dSmrg
22395324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
22405324fb0dSmrg	ib_info.size = i;
22415324fb0dSmrg	ibs_request.ip_type = ip_type;
22425324fb0dSmrg	ibs_request.ring = ring;
22435324fb0dSmrg	ibs_request.resources = bo_list;
22445324fb0dSmrg	ibs_request.number_of_ibs = 1;
22455324fb0dSmrg	ibs_request.ibs = &ib_info;
22465324fb0dSmrg	ibs_request.fence_info.handle = NULL;
22475324fb0dSmrg
22485324fb0dSmrg	/* submit CS */
22495324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
22505324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22515324fb0dSmrg
22525324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
22535324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22545324fb0dSmrg
22555324fb0dSmrg	fence_status.ip_type = ip_type;
22565324fb0dSmrg	fence_status.ip_instance = 0;
22575324fb0dSmrg	fence_status.ring = ring;
22585324fb0dSmrg	fence_status.context = context_handle;
22595324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
22605324fb0dSmrg
22615324fb0dSmrg	/* wait for IB accomplished */
22625324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
22635324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
22645324fb0dSmrg					 0, &expired);
22655324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22665324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
22675324fb0dSmrg
22685324fb0dSmrg	/* verify if memset test result meets with expected */
22695324fb0dSmrg	i = 0;
22705324fb0dSmrg	while(i < bo_dst_size) {
22715324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
22725324fb0dSmrg	}
22735324fb0dSmrg
22745324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
22755324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22765324fb0dSmrg
22775324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
22785324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22795324fb0dSmrg
22805324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
22815324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22825324fb0dSmrg
22835324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
22845324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
22855324fb0dSmrg}
22865324fb0dSmrg
22875324fb0dSmrgstatic void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
22885324fb0dSmrg					uint32_t ip_type,
22895324fb0dSmrg					uint32_t ring)
22905324fb0dSmrg{
22915324fb0dSmrg	amdgpu_context_handle context_handle;
22925324fb0dSmrg	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
22935324fb0dSmrg	volatile unsigned char *ptr_dst;
22945324fb0dSmrg	void *ptr_shader;
22955324fb0dSmrg	unsigned char *ptr_src;
22965324fb0dSmrg	uint32_t *ptr_cmd;
22975324fb0dSmrg	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
22985324fb0dSmrg	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
22995324fb0dSmrg	int i, r;
23005324fb0dSmrg	int bo_dst_size = 16384;
23015324fb0dSmrg	int bo_shader_size = 4096;
23025324fb0dSmrg	int bo_cmd_size = 4096;
23035324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
23045324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
23055324fb0dSmrg	uint32_t expired;
23065324fb0dSmrg	amdgpu_bo_list_handle bo_list;
23075324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
23085324fb0dSmrg
23095324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
23105324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23115324fb0dSmrg
23125324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
23135324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
23145324fb0dSmrg				    &bo_cmd, (void **)&ptr_cmd,
23155324fb0dSmrg				    &mc_address_cmd, &va_cmd);
23165324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23175324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
23185324fb0dSmrg
23195324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
23205324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
23215324fb0dSmrg					&bo_shader, &ptr_shader,
23225324fb0dSmrg					&mc_address_shader, &va_shader);
23235324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23245324fb0dSmrg
23255324fb0dSmrg	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCOPY );
23265324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23275324fb0dSmrg
23285324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
23295324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
23305324fb0dSmrg					&bo_src, (void **)&ptr_src,
23315324fb0dSmrg					&mc_address_src, &va_src);
23325324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23335324fb0dSmrg
23345324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
23355324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
23365324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
23375324fb0dSmrg					&mc_address_dst, &va_dst);
23385324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23395324fb0dSmrg
23405324fb0dSmrg	memset(ptr_src, 0x55, bo_dst_size);
23415324fb0dSmrg
23425324fb0dSmrg	i = 0;
23435324fb0dSmrg	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
23445324fb0dSmrg
23455324fb0dSmrg	/*  Issue commands to set cu mask used in current dispatch */
23465324fb0dSmrg	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
23475324fb0dSmrg
23485324fb0dSmrg	/* Writes shader state to HW */
23495324fb0dSmrg	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
23505324fb0dSmrg
23515324fb0dSmrg	/* Write constant data */
23525324fb0dSmrg	/* Writes the texture resource constants data to the SGPRs */
23535324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
23545324fb0dSmrg	ptr_cmd[i++] = 0x240;
23555324fb0dSmrg	ptr_cmd[i++] = mc_address_src;
23565324fb0dSmrg	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
23575324fb0dSmrg	ptr_cmd[i++] = 0x400;
23585324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
23595324fb0dSmrg
23605324fb0dSmrg	/* Writes the UAV constant data to the SGPRs. */
23615324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
23625324fb0dSmrg	ptr_cmd[i++] = 0x244;
23635324fb0dSmrg	ptr_cmd[i++] = mc_address_dst;
23645324fb0dSmrg	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
23655324fb0dSmrg	ptr_cmd[i++] = 0x400;
23665324fb0dSmrg	ptr_cmd[i++] = 0x74fac;
23675324fb0dSmrg
23685324fb0dSmrg	/* dispatch direct command */
23695324fb0dSmrg	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
23705324fb0dSmrg	ptr_cmd[i++] = 0x10;
23715324fb0dSmrg	ptr_cmd[i++] = 1;
23725324fb0dSmrg	ptr_cmd[i++] = 1;
23735324fb0dSmrg	ptr_cmd[i++] = 1;
23745324fb0dSmrg
23755324fb0dSmrg	while (i & 7)
23765324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
23775324fb0dSmrg
23785324fb0dSmrg	resources[0] = bo_shader;
23795324fb0dSmrg	resources[1] = bo_src;
23805324fb0dSmrg	resources[2] = bo_dst;
23815324fb0dSmrg	resources[3] = bo_cmd;
23825324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
23835324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23845324fb0dSmrg
23855324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
23865324fb0dSmrg	ib_info.size = i;
23875324fb0dSmrg	ibs_request.ip_type = ip_type;
23885324fb0dSmrg	ibs_request.ring = ring;
23895324fb0dSmrg	ibs_request.resources = bo_list;
23905324fb0dSmrg	ibs_request.number_of_ibs = 1;
23915324fb0dSmrg	ibs_request.ibs = &ib_info;
23925324fb0dSmrg	ibs_request.fence_info.handle = NULL;
23935324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
23945324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
23955324fb0dSmrg
23965324fb0dSmrg	fence_status.ip_type = ip_type;
23975324fb0dSmrg	fence_status.ip_instance = 0;
23985324fb0dSmrg	fence_status.ring = ring;
23995324fb0dSmrg	fence_status.context = context_handle;
24005324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
24015324fb0dSmrg
24025324fb0dSmrg	/* wait for IB accomplished */
24035324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
24045324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
24055324fb0dSmrg					 0, &expired);
24065324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24075324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
24085324fb0dSmrg
24095324fb0dSmrg	/* verify if memcpy test result meets with expected */
24105324fb0dSmrg	i = 0;
24115324fb0dSmrg	while(i < bo_dst_size) {
24125324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
24135324fb0dSmrg		i++;
24145324fb0dSmrg	}
24155324fb0dSmrg
24165324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
24175324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24185324fb0dSmrg
24195324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
24205324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24215324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
24225324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24235324fb0dSmrg
24245324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
24255324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24265324fb0dSmrg
24275324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
24285324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24295324fb0dSmrg
24305324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
24315324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24325324fb0dSmrg}
24335324fb0dSmrgstatic void amdgpu_dispatch_test(void)
24345324fb0dSmrg{
24355324fb0dSmrg	int r;
24365324fb0dSmrg	struct drm_amdgpu_info_hw_ip info;
24375324fb0dSmrg	uint32_t ring_id;
24385324fb0dSmrg
24395324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
24405324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24415324fb0dSmrg
24425324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
24435324fb0dSmrg		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
24445324fb0dSmrg		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
24455324fb0dSmrg	}
24465324fb0dSmrg
24475324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
24485324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
24495324fb0dSmrg
24505324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
24515324fb0dSmrg		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
24525324fb0dSmrg		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
24535324fb0dSmrg	}
24545324fb0dSmrg}
24555324fb0dSmrg
24565324fb0dSmrgstatic int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
24575324fb0dSmrg{
24585324fb0dSmrg	int i;
24595324fb0dSmrg	uint32_t shader_offset= 256;
24605324fb0dSmrg	uint32_t mem_offset, patch_code_offset;
24615324fb0dSmrg	uint32_t shader_size, patchinfo_code_size;
24625324fb0dSmrg	const uint32_t *shader;
24635324fb0dSmrg	const uint32_t *patchinfo_code;
24645324fb0dSmrg	const uint32_t *patchcode_offset;
24655324fb0dSmrg
24665324fb0dSmrg	switch (ps_type) {
24675324fb0dSmrg		case PS_CONST:
24685324fb0dSmrg			shader = ps_const_shader_gfx9;
24695324fb0dSmrg			shader_size = sizeof(ps_const_shader_gfx9);
24705324fb0dSmrg			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
24715324fb0dSmrg			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
24725324fb0dSmrg			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
24735324fb0dSmrg			break;
24745324fb0dSmrg		case PS_TEX:
24755324fb0dSmrg			shader = ps_tex_shader_gfx9;
24765324fb0dSmrg			shader_size = sizeof(ps_tex_shader_gfx9);
24775324fb0dSmrg			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
24785324fb0dSmrg			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
24795324fb0dSmrg			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
24805324fb0dSmrg			break;
24815324fb0dSmrg		default:
24825324fb0dSmrg			return -1;
24835324fb0dSmrg			break;
24845324fb0dSmrg	}
24855324fb0dSmrg
24865324fb0dSmrg	/* write main shader program */
24875324fb0dSmrg	for (i = 0 ; i < 10; i++) {
24885324fb0dSmrg		mem_offset = i * shader_offset;
24895324fb0dSmrg		memcpy(ptr + mem_offset, shader, shader_size);
24905324fb0dSmrg	}
24915324fb0dSmrg
24925324fb0dSmrg	/* overwrite patch codes */
24935324fb0dSmrg	for (i = 0 ; i < 10; i++) {
24945324fb0dSmrg		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
24955324fb0dSmrg		patch_code_offset = i * patchinfo_code_size;
24965324fb0dSmrg		memcpy(ptr + mem_offset,
24975324fb0dSmrg			patchinfo_code + patch_code_offset,
24985324fb0dSmrg			patchinfo_code_size * sizeof(uint32_t));
24995324fb0dSmrg	}
25005324fb0dSmrg
25015324fb0dSmrg	return 0;
25025324fb0dSmrg}
25035324fb0dSmrg
25045324fb0dSmrg/* load RectPosTexFast_VS */
25055324fb0dSmrgstatic int amdgpu_draw_load_vs_shader(uint8_t *ptr)
25065324fb0dSmrg{
25075324fb0dSmrg	const uint32_t *shader;
25085324fb0dSmrg	uint32_t shader_size;
25095324fb0dSmrg
25105324fb0dSmrg	shader = vs_RectPosTexFast_shader_gfx9;
25115324fb0dSmrg	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
25125324fb0dSmrg
25135324fb0dSmrg	memcpy(ptr, shader, shader_size);
25145324fb0dSmrg
25155324fb0dSmrg	return 0;
25165324fb0dSmrg}
25175324fb0dSmrg
25185324fb0dSmrgstatic int amdgpu_draw_init(uint32_t *ptr)
25195324fb0dSmrg{
25205324fb0dSmrg	int i = 0;
25215324fb0dSmrg	const uint32_t *preamblecache_ptr;
25225324fb0dSmrg	uint32_t preamblecache_size;
25235324fb0dSmrg
25245324fb0dSmrg	/* Write context control and load shadowing register if necessary */
25255324fb0dSmrg	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
25265324fb0dSmrg	ptr[i++] = 0x80000000;
25275324fb0dSmrg	ptr[i++] = 0x80000000;
25285324fb0dSmrg
25295324fb0dSmrg	preamblecache_ptr = preamblecache_gfx9;
25305324fb0dSmrg	preamblecache_size = sizeof(preamblecache_gfx9);
25315324fb0dSmrg
25325324fb0dSmrg	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
25335324fb0dSmrg	return i + preamblecache_size/sizeof(uint32_t);
25345324fb0dSmrg}
25355324fb0dSmrg
25365324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
25375324fb0dSmrg							 uint64_t dst_addr)
25385324fb0dSmrg{
25395324fb0dSmrg	int i = 0;
25405324fb0dSmrg
25415324fb0dSmrg	/* setup color buffer */
25425324fb0dSmrg	/* offset   reg
25435324fb0dSmrg	   0xA318   CB_COLOR0_BASE
25445324fb0dSmrg	   0xA319   CB_COLOR0_BASE_EXT
25455324fb0dSmrg	   0xA31A   CB_COLOR0_ATTRIB2
25465324fb0dSmrg	   0xA31B   CB_COLOR0_VIEW
25475324fb0dSmrg	   0xA31C   CB_COLOR0_INFO
25485324fb0dSmrg	   0xA31D   CB_COLOR0_ATTRIB
25495324fb0dSmrg	   0xA31E   CB_COLOR0_DCC_CONTROL
25505324fb0dSmrg	   0xA31F   CB_COLOR0_CMASK
25515324fb0dSmrg	   0xA320   CB_COLOR0_CMASK_BASE_EXT
25525324fb0dSmrg	   0xA321   CB_COLOR0_FMASK
25535324fb0dSmrg	   0xA322   CB_COLOR0_FMASK_BASE_EXT
25545324fb0dSmrg	   0xA323   CB_COLOR0_CLEAR_WORD0
25555324fb0dSmrg	   0xA324   CB_COLOR0_CLEAR_WORD1
25565324fb0dSmrg	   0xA325   CB_COLOR0_DCC_BASE
25575324fb0dSmrg	   0xA326   CB_COLOR0_DCC_BASE_EXT */
25585324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
25595324fb0dSmrg	ptr[i++] = 0x318;
25605324fb0dSmrg	ptr[i++] = dst_addr >> 8;
25615324fb0dSmrg	ptr[i++] = dst_addr >> 40;
25625324fb0dSmrg	ptr[i++] = 0x7c01f;
25635324fb0dSmrg	ptr[i++] = 0;
25645324fb0dSmrg	ptr[i++] = 0x50438;
25655324fb0dSmrg	ptr[i++] = 0x10140000;
25665324fb0dSmrg	i += 9;
25675324fb0dSmrg
25685324fb0dSmrg	/* mmCB_MRT0_EPITCH */
25695324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
25705324fb0dSmrg	ptr[i++] = 0x1e8;
25715324fb0dSmrg	ptr[i++] = 0x1f;
25725324fb0dSmrg
25735324fb0dSmrg	/* 0xA32B   CB_COLOR1_BASE */
25745324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
25755324fb0dSmrg	ptr[i++] = 0x32b;
25765324fb0dSmrg	ptr[i++] = 0;
25775324fb0dSmrg
25785324fb0dSmrg	/* 0xA33A   CB_COLOR1_BASE */
25795324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
25805324fb0dSmrg	ptr[i++] = 0x33a;
25815324fb0dSmrg	ptr[i++] = 0;
25825324fb0dSmrg
25835324fb0dSmrg	/* SPI_SHADER_COL_FORMAT */
25845324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
25855324fb0dSmrg	ptr[i++] = 0x1c5;
25865324fb0dSmrg	ptr[i++] = 9;
25875324fb0dSmrg
25885324fb0dSmrg	/* Setup depth buffer */
25895324fb0dSmrg	/* mmDB_Z_INFO */
25905324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
25915324fb0dSmrg	ptr[i++] = 0xe;
25925324fb0dSmrg	i += 2;
25935324fb0dSmrg
25945324fb0dSmrg	return i;
25955324fb0dSmrg}
25965324fb0dSmrg
25975324fb0dSmrgstatic int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr)
25985324fb0dSmrg{
25995324fb0dSmrg	int i = 0;
26005324fb0dSmrg	const uint32_t *cached_cmd_ptr;
26015324fb0dSmrg	uint32_t cached_cmd_size;
26025324fb0dSmrg
26035324fb0dSmrg	/* mmPA_SC_TILE_STEERING_OVERRIDE */
26045324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26055324fb0dSmrg	ptr[i++] = 0xd7;
26065324fb0dSmrg	ptr[i++] = 0;
26075324fb0dSmrg
26085324fb0dSmrg	ptr[i++] = 0xffff1000;
26095324fb0dSmrg	ptr[i++] = 0xc0021000;
26105324fb0dSmrg
26115324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26125324fb0dSmrg	ptr[i++] = 0xd7;
26135324fb0dSmrg	ptr[i++] = 1;
26145324fb0dSmrg
26155324fb0dSmrg	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
26165324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
26175324fb0dSmrg	ptr[i++] = 0x2fe;
26185324fb0dSmrg	i += 16;
26195324fb0dSmrg
26205324fb0dSmrg	/* mmPA_SC_CENTROID_PRIORITY_0 */
26215324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
26225324fb0dSmrg	ptr[i++] = 0x2f5;
26235324fb0dSmrg	i += 2;
26245324fb0dSmrg
26255324fb0dSmrg	cached_cmd_ptr = cached_cmd_gfx9;
26265324fb0dSmrg	cached_cmd_size = sizeof(cached_cmd_gfx9);
26275324fb0dSmrg
26285324fb0dSmrg	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
26295324fb0dSmrg	i += cached_cmd_size/sizeof(uint32_t);
26305324fb0dSmrg
26315324fb0dSmrg	return i;
26325324fb0dSmrg}
26335324fb0dSmrg
26345324fb0dSmrgstatic int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
26355324fb0dSmrg						  int ps_type,
26365324fb0dSmrg						  uint64_t shader_addr)
26375324fb0dSmrg{
26385324fb0dSmrg	int i = 0;
26395324fb0dSmrg
26405324fb0dSmrg	/* mmPA_CL_VS_OUT_CNTL */
26415324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26425324fb0dSmrg	ptr[i++] = 0x207;
26435324fb0dSmrg	ptr[i++] = 0;
26445324fb0dSmrg
26455324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC3_VS */
26465324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
26475324fb0dSmrg	ptr[i++] = 0x46;
26485324fb0dSmrg	ptr[i++] = 0xffff;
26495324fb0dSmrg
26505324fb0dSmrg	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
26515324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
26525324fb0dSmrg	ptr[i++] = 0x48;
26535324fb0dSmrg	ptr[i++] = shader_addr >> 8;
26545324fb0dSmrg	ptr[i++] = shader_addr >> 40;
26555324fb0dSmrg
26565324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC1_VS */
26575324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
26585324fb0dSmrg	ptr[i++] = 0x4a;
26595324fb0dSmrg	ptr[i++] = 0xc0081;
26605324fb0dSmrg	/* mmSPI_SHADER_PGM_RSRC2_VS */
26615324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
26625324fb0dSmrg	ptr[i++] = 0x4b;
26635324fb0dSmrg	ptr[i++] = 0x18;
26645324fb0dSmrg
26655324fb0dSmrg	/* mmSPI_VS_OUT_CONFIG */
26665324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26675324fb0dSmrg	ptr[i++] = 0x1b1;
26685324fb0dSmrg	ptr[i++] = 2;
26695324fb0dSmrg
26705324fb0dSmrg	/* mmSPI_SHADER_POS_FORMAT */
26715324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
26725324fb0dSmrg	ptr[i++] = 0x1c3;
26735324fb0dSmrg	ptr[i++] = 4;
26745324fb0dSmrg
26755324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
26765324fb0dSmrg	ptr[i++] = 0x4c;
26775324fb0dSmrg	i += 2;
26785324fb0dSmrg	ptr[i++] = 0x42000000;
26795324fb0dSmrg	ptr[i++] = 0x42000000;
26805324fb0dSmrg
26815324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
26825324fb0dSmrg	ptr[i++] = 0x50;
26835324fb0dSmrg	i += 2;
26845324fb0dSmrg	if (ps_type == PS_CONST) {
26855324fb0dSmrg		i += 2;
26865324fb0dSmrg	} else if (ps_type == PS_TEX) {
26875324fb0dSmrg		ptr[i++] = 0x3f800000;
26885324fb0dSmrg		ptr[i++] = 0x3f800000;
26895324fb0dSmrg	}
26905324fb0dSmrg
26915324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
26925324fb0dSmrg	ptr[i++] = 0x54;
26935324fb0dSmrg	i += 4;
26945324fb0dSmrg
26955324fb0dSmrg	return i;
26965324fb0dSmrg}
26975324fb0dSmrg
26985324fb0dSmrgstatic int amdgpu_draw_ps_write2hw(uint32_t *ptr,
26995324fb0dSmrg				   int ps_type,
27005324fb0dSmrg				   uint64_t shader_addr)
27015324fb0dSmrg{
27025324fb0dSmrg	int i, j;
27035324fb0dSmrg	const uint32_t *sh_registers;
27045324fb0dSmrg	const uint32_t *context_registers;
27055324fb0dSmrg	uint32_t num_sh_reg, num_context_reg;
27065324fb0dSmrg
27075324fb0dSmrg	if (ps_type == PS_CONST) {
27085324fb0dSmrg		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
27095324fb0dSmrg		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
27105324fb0dSmrg		num_sh_reg = ps_num_sh_registers_gfx9;
27115324fb0dSmrg		num_context_reg = ps_num_context_registers_gfx9;
27125324fb0dSmrg	} else if (ps_type == PS_TEX) {
27135324fb0dSmrg		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
27145324fb0dSmrg		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
27155324fb0dSmrg		num_sh_reg = ps_num_sh_registers_gfx9;
27165324fb0dSmrg		num_context_reg = ps_num_context_registers_gfx9;
27175324fb0dSmrg	}
27185324fb0dSmrg
27195324fb0dSmrg	i = 0;
27205324fb0dSmrg
27215324fb0dSmrg	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
27225324fb0dSmrg	   0x2c08   SPI_SHADER_PGM_LO_PS
27235324fb0dSmrg	   0x2c09   SPI_SHADER_PGM_HI_PS */
27245324fb0dSmrg	shader_addr += 256 * 9;
27255324fb0dSmrg	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
27265324fb0dSmrg	ptr[i++] = 0x7;
27275324fb0dSmrg	ptr[i++] = 0xffff;
27285324fb0dSmrg	ptr[i++] = shader_addr >> 8;
27295324fb0dSmrg	ptr[i++] = shader_addr >> 40;
27305324fb0dSmrg
27315324fb0dSmrg	for (j = 0; j < num_sh_reg; j++) {
27325324fb0dSmrg		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
27335324fb0dSmrg		ptr[i++] = sh_registers[j * 2] - 0x2c00;
27345324fb0dSmrg		ptr[i++] = sh_registers[j * 2 + 1];
27355324fb0dSmrg	}
27365324fb0dSmrg
27375324fb0dSmrg	for (j = 0; j < num_context_reg; j++) {
27385324fb0dSmrg		if (context_registers[j * 2] != 0xA1C5) {
27395324fb0dSmrg			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
27405324fb0dSmrg			ptr[i++] = context_registers[j * 2] - 0xa000;
27415324fb0dSmrg			ptr[i++] = context_registers[j * 2 + 1];
27425324fb0dSmrg		}
27435324fb0dSmrg
27445324fb0dSmrg		if (context_registers[j * 2] == 0xA1B4) {
27455324fb0dSmrg			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
27465324fb0dSmrg			ptr[i++] = 0x1b3;
27475324fb0dSmrg			ptr[i++] = 2;
27485324fb0dSmrg		}
27495324fb0dSmrg	}
27505324fb0dSmrg
27515324fb0dSmrg	return i;
27525324fb0dSmrg}
27535324fb0dSmrg
27545324fb0dSmrgstatic int amdgpu_draw_draw(uint32_t *ptr)
27555324fb0dSmrg{
27565324fb0dSmrg	int i = 0;
27575324fb0dSmrg
27585324fb0dSmrg	/* mmIA_MULTI_VGT_PARAM */
27595324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
27605324fb0dSmrg	ptr[i++] = 0x40000258;
27615324fb0dSmrg	ptr[i++] = 0xd00ff;
27625324fb0dSmrg
27635324fb0dSmrg	/* mmVGT_PRIMITIVE_TYPE */
27645324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
27655324fb0dSmrg	ptr[i++] = 0x10000242;
27665324fb0dSmrg	ptr[i++] = 0x11;
27675324fb0dSmrg
27685324fb0dSmrg	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
27695324fb0dSmrg	ptr[i++] = 3;
27705324fb0dSmrg	ptr[i++] = 2;
27715324fb0dSmrg
27725324fb0dSmrg	return i;
27735324fb0dSmrg}
27745324fb0dSmrg
27755324fb0dSmrgvoid amdgpu_memset_draw(amdgpu_device_handle device_handle,
27765324fb0dSmrg			amdgpu_bo_handle bo_shader_ps,
27775324fb0dSmrg			amdgpu_bo_handle bo_shader_vs,
27785324fb0dSmrg			uint64_t mc_address_shader_ps,
27795324fb0dSmrg			uint64_t mc_address_shader_vs,
27805324fb0dSmrg			uint32_t ring_id)
27815324fb0dSmrg{
27825324fb0dSmrg	amdgpu_context_handle context_handle;
27835324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
27845324fb0dSmrg	volatile unsigned char *ptr_dst;
27855324fb0dSmrg	uint32_t *ptr_cmd;
27865324fb0dSmrg	uint64_t mc_address_dst, mc_address_cmd;
27875324fb0dSmrg	amdgpu_va_handle va_dst, va_cmd;
27885324fb0dSmrg	int i, r;
27895324fb0dSmrg	int bo_dst_size = 16384;
27905324fb0dSmrg	int bo_cmd_size = 4096;
27915324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
27925324fb0dSmrg	struct amdgpu_cs_ib_info ib_info = {0};
27935324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
27945324fb0dSmrg	uint32_t expired;
27955324fb0dSmrg	amdgpu_bo_list_handle bo_list;
27965324fb0dSmrg
27975324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
27985324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
27995324fb0dSmrg
28005324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
28015324fb0dSmrg					AMDGPU_GEM_DOMAIN_GTT, 0,
28025324fb0dSmrg					&bo_cmd, (void **)&ptr_cmd,
28035324fb0dSmrg					&mc_address_cmd, &va_cmd);
28045324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28055324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
28065324fb0dSmrg
28075324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
28085324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
28095324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
28105324fb0dSmrg					&mc_address_dst, &va_dst);
28115324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28125324fb0dSmrg
28135324fb0dSmrg	i = 0;
28145324fb0dSmrg	i += amdgpu_draw_init(ptr_cmd + i);
28155324fb0dSmrg
28165324fb0dSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst);
28175324fb0dSmrg
28185324fb0dSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i);
28195324fb0dSmrg
28205324fb0dSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs);
28215324fb0dSmrg
28225324fb0dSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
28235324fb0dSmrg
28245324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
28255324fb0dSmrg	ptr_cmd[i++] = 0xc;
28265324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
28275324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
28285324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
28295324fb0dSmrg	ptr_cmd[i++] = 0x33333333;
28305324fb0dSmrg
28315324fb0dSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
28325324fb0dSmrg
28335324fb0dSmrg	while (i & 7)
28345324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
28355324fb0dSmrg
28365324fb0dSmrg	resources[0] = bo_dst;
28375324fb0dSmrg	resources[1] = bo_shader_ps;
28385324fb0dSmrg	resources[2] = bo_shader_vs;
28395324fb0dSmrg	resources[3] = bo_cmd;
28405324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
28415324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28425324fb0dSmrg
28435324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
28445324fb0dSmrg	ib_info.size = i;
28455324fb0dSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
28465324fb0dSmrg	ibs_request.ring = ring_id;
28475324fb0dSmrg	ibs_request.resources = bo_list;
28485324fb0dSmrg	ibs_request.number_of_ibs = 1;
28495324fb0dSmrg	ibs_request.ibs = &ib_info;
28505324fb0dSmrg	ibs_request.fence_info.handle = NULL;
28515324fb0dSmrg
28525324fb0dSmrg	/* submit CS */
28535324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
28545324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28555324fb0dSmrg
28565324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
28575324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28585324fb0dSmrg
28595324fb0dSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
28605324fb0dSmrg	fence_status.ip_instance = 0;
28615324fb0dSmrg	fence_status.ring = ring_id;
28625324fb0dSmrg	fence_status.context = context_handle;
28635324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
28645324fb0dSmrg
28655324fb0dSmrg	/* wait for IB accomplished */
28665324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
28675324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
28685324fb0dSmrg					 0, &expired);
28695324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28705324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
28715324fb0dSmrg
28725324fb0dSmrg	/* verify if memset test result meets with expected */
28735324fb0dSmrg	i = 0;
28745324fb0dSmrg	while(i < bo_dst_size) {
28755324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
28765324fb0dSmrg	}
28775324fb0dSmrg
28785324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
28795324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28805324fb0dSmrg
28815324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
28825324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28835324fb0dSmrg
28845324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
28855324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
28865324fb0dSmrg}
28875324fb0dSmrg
28885324fb0dSmrgstatic void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
28895324fb0dSmrg				    uint32_t ring)
28905324fb0dSmrg{
28915324fb0dSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
28925324fb0dSmrg	void *ptr_shader_ps;
28935324fb0dSmrg	void *ptr_shader_vs;
28945324fb0dSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
28955324fb0dSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
28965324fb0dSmrg	int r;
28975324fb0dSmrg	int bo_shader_size = 4096;
28985324fb0dSmrg
28995324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
29005324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
29015324fb0dSmrg					&bo_shader_ps, &ptr_shader_ps,
29025324fb0dSmrg					&mc_address_shader_ps, &va_shader_ps);
29035324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29045324fb0dSmrg
29055324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
29065324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
29075324fb0dSmrg					&bo_shader_vs, &ptr_shader_vs,
29085324fb0dSmrg					&mc_address_shader_vs, &va_shader_vs);
29095324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29105324fb0dSmrg
29115324fb0dSmrg	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
29125324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29135324fb0dSmrg
29145324fb0dSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
29155324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29165324fb0dSmrg
29175324fb0dSmrg	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
29185324fb0dSmrg			mc_address_shader_ps, mc_address_shader_vs, ring);
29195324fb0dSmrg
29205324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
29215324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29225324fb0dSmrg
29235324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
29245324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29255324fb0dSmrg}
29265324fb0dSmrg
29275324fb0dSmrgstatic void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
29285324fb0dSmrg			       amdgpu_bo_handle bo_shader_ps,
29295324fb0dSmrg			       amdgpu_bo_handle bo_shader_vs,
29305324fb0dSmrg			       uint64_t mc_address_shader_ps,
29315324fb0dSmrg			       uint64_t mc_address_shader_vs,
29325324fb0dSmrg			       uint32_t ring)
29335324fb0dSmrg{
29345324fb0dSmrg	amdgpu_context_handle context_handle;
29355324fb0dSmrg	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
29365324fb0dSmrg	volatile unsigned char *ptr_dst;
29375324fb0dSmrg	unsigned char *ptr_src;
29385324fb0dSmrg	uint32_t *ptr_cmd;
29395324fb0dSmrg	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
29405324fb0dSmrg	amdgpu_va_handle va_dst, va_src, va_cmd;
29415324fb0dSmrg	int i, r;
29425324fb0dSmrg	int bo_size = 16384;
29435324fb0dSmrg	int bo_cmd_size = 4096;
29445324fb0dSmrg	struct amdgpu_cs_request ibs_request = {0};
29455324fb0dSmrg	struct amdgpu_cs_ib_info ib_info= {0};
29465324fb0dSmrg	uint32_t hang_state, hangs, expired;
29475324fb0dSmrg	amdgpu_bo_list_handle bo_list;
29485324fb0dSmrg	struct amdgpu_cs_fence fence_status = {0};
29495324fb0dSmrg
29505324fb0dSmrg	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
29515324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29525324fb0dSmrg
29535324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
29545324fb0dSmrg				    AMDGPU_GEM_DOMAIN_GTT, 0,
29555324fb0dSmrg				    &bo_cmd, (void **)&ptr_cmd,
29565324fb0dSmrg				    &mc_address_cmd, &va_cmd);
29575324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29585324fb0dSmrg	memset(ptr_cmd, 0, bo_cmd_size);
29595324fb0dSmrg
29605324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
29615324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
29625324fb0dSmrg					&bo_src, (void **)&ptr_src,
29635324fb0dSmrg					&mc_address_src, &va_src);
29645324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29655324fb0dSmrg
29665324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
29675324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
29685324fb0dSmrg					&bo_dst, (void **)&ptr_dst,
29695324fb0dSmrg					&mc_address_dst, &va_dst);
29705324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
29715324fb0dSmrg
29725324fb0dSmrg	memset(ptr_src, 0x55, bo_size);
29735324fb0dSmrg
29745324fb0dSmrg	i = 0;
29755324fb0dSmrg	i += amdgpu_draw_init(ptr_cmd + i);
29765324fb0dSmrg
29775324fb0dSmrg	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst);
29785324fb0dSmrg
29795324fb0dSmrg	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i);
29805324fb0dSmrg
29815324fb0dSmrg	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs);
29825324fb0dSmrg
29835324fb0dSmrg	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
29845324fb0dSmrg
29855324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
29865324fb0dSmrg	ptr_cmd[i++] = 0xc;
29875324fb0dSmrg	ptr_cmd[i++] = mc_address_src >> 8;
29885324fb0dSmrg	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
29895324fb0dSmrg	ptr_cmd[i++] = 0x7c01f;
29905324fb0dSmrg	ptr_cmd[i++] = 0x90500fac;
29915324fb0dSmrg	ptr_cmd[i++] = 0x3e000;
29925324fb0dSmrg	i += 3;
29935324fb0dSmrg
29945324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
29955324fb0dSmrg	ptr_cmd[i++] = 0x14;
29965324fb0dSmrg	ptr_cmd[i++] = 0x92;
29975324fb0dSmrg	i += 3;
29985324fb0dSmrg
29995324fb0dSmrg	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 1);
30005324fb0dSmrg	ptr_cmd[i++] = 0x191;
30015324fb0dSmrg	ptr_cmd[i++] = 0;
30025324fb0dSmrg
30035324fb0dSmrg	i += amdgpu_draw_draw(ptr_cmd + i);
30045324fb0dSmrg
30055324fb0dSmrg	while (i & 7)
30065324fb0dSmrg		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
30075324fb0dSmrg
30085324fb0dSmrg	resources[0] = bo_dst;
30095324fb0dSmrg	resources[1] = bo_src;
30105324fb0dSmrg	resources[2] = bo_shader_ps;
30115324fb0dSmrg	resources[3] = bo_shader_vs;
30125324fb0dSmrg	resources[4] = bo_cmd;
30135324fb0dSmrg	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
30145324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30155324fb0dSmrg
30165324fb0dSmrg	ib_info.ib_mc_address = mc_address_cmd;
30175324fb0dSmrg	ib_info.size = i;
30185324fb0dSmrg	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
30195324fb0dSmrg	ibs_request.ring = ring;
30205324fb0dSmrg	ibs_request.resources = bo_list;
30215324fb0dSmrg	ibs_request.number_of_ibs = 1;
30225324fb0dSmrg	ibs_request.ibs = &ib_info;
30235324fb0dSmrg	ibs_request.fence_info.handle = NULL;
30245324fb0dSmrg	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
30255324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30265324fb0dSmrg
30275324fb0dSmrg	fence_status.ip_type = AMDGPU_HW_IP_GFX;
30285324fb0dSmrg	fence_status.ip_instance = 0;
30295324fb0dSmrg	fence_status.ring = ring;
30305324fb0dSmrg	fence_status.context = context_handle;
30315324fb0dSmrg	fence_status.fence = ibs_request.seq_no;
30325324fb0dSmrg
30335324fb0dSmrg	/* wait for IB accomplished */
30345324fb0dSmrg	r = amdgpu_cs_query_fence_status(&fence_status,
30355324fb0dSmrg					 AMDGPU_TIMEOUT_INFINITE,
30365324fb0dSmrg					 0, &expired);
30375324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30385324fb0dSmrg	CU_ASSERT_EQUAL(expired, true);
30395324fb0dSmrg
30405324fb0dSmrg	/* verify if memcpy test result meets with expected */
30415324fb0dSmrg	i = 0;
30425324fb0dSmrg	while(i < bo_size) {
30435324fb0dSmrg		CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
30445324fb0dSmrg		i++;
30455324fb0dSmrg	}
30465324fb0dSmrg
30475324fb0dSmrg	r = amdgpu_bo_list_destroy(bo_list);
30485324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30495324fb0dSmrg
30505324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
30515324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30525324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
30535324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30545324fb0dSmrg
30555324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
30565324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30575324fb0dSmrg
30585324fb0dSmrg	r = amdgpu_cs_ctx_free(context_handle);
30595324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30605324fb0dSmrg}
30615324fb0dSmrg
30625324fb0dSmrgstatic void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring)
30635324fb0dSmrg{
30645324fb0dSmrg	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
30655324fb0dSmrg	void *ptr_shader_ps;
30665324fb0dSmrg	void *ptr_shader_vs;
30675324fb0dSmrg	uint64_t mc_address_shader_ps, mc_address_shader_vs;
30685324fb0dSmrg	amdgpu_va_handle va_shader_ps, va_shader_vs;
30695324fb0dSmrg	int bo_shader_size = 4096;
30705324fb0dSmrg	int r;
30715324fb0dSmrg
30725324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
30735324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
30745324fb0dSmrg					&bo_shader_ps, &ptr_shader_ps,
30755324fb0dSmrg					&mc_address_shader_ps, &va_shader_ps);
30765324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30775324fb0dSmrg
30785324fb0dSmrg	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
30795324fb0dSmrg					AMDGPU_GEM_DOMAIN_VRAM, 0,
30805324fb0dSmrg					&bo_shader_vs, &ptr_shader_vs,
30815324fb0dSmrg					&mc_address_shader_vs, &va_shader_vs);
30825324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30835324fb0dSmrg
30845324fb0dSmrg	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_TEX);
30855324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30865324fb0dSmrg
30875324fb0dSmrg	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
30885324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30895324fb0dSmrg
30905324fb0dSmrg	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
30915324fb0dSmrg			mc_address_shader_ps, mc_address_shader_vs, ring);
30925324fb0dSmrg
30935324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
30945324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30955324fb0dSmrg
30965324fb0dSmrg	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
30975324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
30985324fb0dSmrg}
30995324fb0dSmrg
31005324fb0dSmrgstatic void amdgpu_draw_test(void)
31015324fb0dSmrg{
31025324fb0dSmrg	int r;
31035324fb0dSmrg	struct drm_amdgpu_info_hw_ip info;
31045324fb0dSmrg	uint32_t ring_id;
31055324fb0dSmrg
31065324fb0dSmrg	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
31075324fb0dSmrg	CU_ASSERT_EQUAL(r, 0);
31085324fb0dSmrg
31095324fb0dSmrg	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
31105324fb0dSmrg		amdgpu_memset_draw_test(device_handle, ring_id);
31115324fb0dSmrg		amdgpu_memcpy_draw_test(device_handle, ring_id);
31125324fb0dSmrg	}
31135324fb0dSmrg}
3114