1#include <stdio.h>
2#include <stdlib.h>
3#include <unistd.h>
4#include <sys/types.h>
5#include <sys/stat.h>
6#include <string.h>
7#include <assert.h>
8
9#include "CUnit/Basic.h"
10#include "amdgpu_test.h"
11#include "shader_code.h"
12
13#define	PACKET3_DISPATCH_DIRECT				0x15
14#define PACKET3_CONTEXT_CONTROL                   0x28
15#define PACKET3_DRAW_INDEX_AUTO				0x2D
16#define PACKET3_SET_CONTEXT_REG				0x69
17#define PACKET3_SET_SH_REG                        0x76
18#define PACKET3_SET_SH_REG_OFFSET                       0x77
19#define PACKET3_SET_UCONFIG_REG				0x79
20#define PACKET3_SET_SH_REG_INDEX			0x9B
21
22#define	PACKET_TYPE3	3
23#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
24			 (((op) & 0xFF) << 8) |				\
25			 ((n) & 0x3FFF) << 16)
26#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
27
28
29struct shader_test_bo {
30	amdgpu_bo_handle bo;
31	unsigned size;
32	unsigned heap;
33	void *ptr;
34	uint64_t mc_address;
35	amdgpu_va_handle va;
36};
37
38struct shader_test_draw {
39	struct shader_test_bo ps_bo;
40	enum ps_type ps_type;
41	struct shader_test_bo vs_bo;
42	enum vs_type vs_type;
43};
44struct shader_test_dispatch {
45	struct shader_test_bo cs_bo;
46	enum cs_type cs_type;
47};
48
49struct shader_test_info {
50	amdgpu_device_handle device_handle;
51	enum amdgpu_test_gfx_version version;
52	unsigned ip;
53	unsigned ring;
54	int hang;
55	int hang_slow;
56};
57
58struct shader_test_priv {
59	const struct shader_test_info *info;
60	unsigned cmd_curr;
61
62	union {
63		struct shader_test_draw shader_draw;
64		struct shader_test_dispatch shader_dispatch;
65	};
66	struct shader_test_bo vtx_attributes_mem;
67	struct shader_test_bo cmd;
68	struct shader_test_bo src;
69	struct shader_test_bo dst;
70};
71
72static int shader_test_bo_alloc(amdgpu_device_handle device_handle,
73					    struct shader_test_bo *shader_test_bo)
74{
75	return amdgpu_bo_alloc_and_map(device_handle, shader_test_bo->size, 4096,
76				    shader_test_bo->heap, 0,
77				    &(shader_test_bo->bo), (void **)&(shader_test_bo->ptr),
78				    &(shader_test_bo->mc_address), &(shader_test_bo->va));
79}
80
81static int shader_test_bo_free(struct shader_test_bo *shader_test_bo)
82{
83	return amdgpu_bo_unmap_and_free(shader_test_bo->bo, shader_test_bo->va,
84					shader_test_bo->mc_address,
85					shader_test_bo->size);
86}
87
88void shader_test_for_each(amdgpu_device_handle device_handle, unsigned ip,
89				       void (*fn)(struct shader_test_info *test_info))
90{
91	int r;
92	uint32_t ring_id;
93	struct shader_test_info test_info = {0};
94	struct drm_amdgpu_info_hw_ip info = {0};
95
96	r = amdgpu_query_hw_ip_info(device_handle, ip, 0, &info);
97	CU_ASSERT_EQUAL(r, 0);
98	if (!info.available_rings) {
99		printf("SKIP ... as there's no %s ring\n",
100				(ip == AMDGPU_HW_IP_GFX) ? "graphics": "compute");
101		return;
102	}
103
104	switch (info.hw_ip_version_major) {
105	case 9:
106		test_info.version = AMDGPU_TEST_GFX_V9;
107		break;
108	case 10:
109		test_info.version = AMDGPU_TEST_GFX_V10;
110		break;
111	case 11:
112		test_info.version = AMDGPU_TEST_GFX_V11;
113		break;
114	default:
115		printf("SKIP ... unsupported gfx version %d\n", info.hw_ip_version_major);
116		return;
117	}
118
119	test_info.device_handle = device_handle;
120	test_info.ip = ip;
121
122	printf("\n");
123	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
124		printf("%s ring %d\n", (ip == AMDGPU_HW_IP_GFX) ? "graphics": "compute",
125					ring_id);
126		test_info.ring = ring_id;
127		fn(&test_info);
128	}
129}
130
131static void write_context_control(struct shader_test_priv *test_priv)
132{
133	int i = test_priv->cmd_curr;
134	uint32_t *ptr = test_priv->cmd.ptr;
135
136	if (test_priv->info->ip == AMDGPU_HW_IP_GFX) {
137		ptr[i++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
138		ptr[i++] = 0x80000000;
139		ptr[i++] = 0x80000000;
140	}
141
142	test_priv->cmd_curr = i;
143}
144
145static void shader_test_load_shader_hang_slow(struct shader_test_bo *shader_bo,
146								   struct shader_test_shader_bin *shader_bin)
147{
148	int i, j, loop;
149
150	loop = (shader_bo->size / sizeof(uint32_t) - shader_bin->header_length
151		- shader_bin->foot_length) / shader_bin->body_length;
152
153	memcpy(shader_bo->ptr, shader_bin->shader, shader_bin->header_length * sizeof(uint32_t));
154
155	j = shader_bin->header_length;
156	for (i = 0; i < loop; i++) {
157		memcpy(shader_bo->ptr + j,
158			shader_bin->shader + shader_bin->header_length,
159			shader_bin->body_length * sizeof(uint32_t));
160		j += shader_bin->body_length;
161	}
162
163	memcpy(shader_bo->ptr + j,
164		shader_bin->shader + shader_bin->header_length + shader_bin->body_length,
165		shader_bin->foot_length * sizeof(uint32_t));
166}
167
168static void amdgpu_dispatch_load_cs_shader_hang_slow(struct shader_test_priv *test_priv)
169{
170	struct amdgpu_gpu_info gpu_info = {0};
171	struct shader_test_shader_bin *cs_shader_bin;
172	int r;
173
174	r = amdgpu_query_gpu_info(test_priv->info->device_handle, &gpu_info);
175	CU_ASSERT_EQUAL(r, 0);
176
177	switch (gpu_info.family_id) {
178	case AMDGPU_FAMILY_AI:
179		cs_shader_bin = &memcpy_cs_hang_slow_ai;
180		break;
181	case AMDGPU_FAMILY_RV:
182		cs_shader_bin = &memcpy_cs_hang_slow_rv;
183		break;
184	default:
185		cs_shader_bin = &memcpy_cs_hang_slow_nv;
186		break;
187	}
188
189	shader_test_load_shader_hang_slow(&test_priv->shader_dispatch.cs_bo, cs_shader_bin);
190}
191
192static void amdgpu_dispatch_load_cs_shader(struct shader_test_priv *test_priv)
193{
194	if (test_priv->info->hang) {
195		if (test_priv->info->hang_slow)
196			amdgpu_dispatch_load_cs_shader_hang_slow(test_priv);
197		else
198			memcpy(test_priv->shader_dispatch.cs_bo.ptr, memcpy_shader_hang,
199				sizeof(memcpy_shader_hang));
200	} else {
201		memcpy(test_priv->shader_dispatch.cs_bo.ptr,
202			shader_test_cs[test_priv->info->version][test_priv->shader_dispatch.cs_type].shader,
203			shader_test_cs[test_priv->info->version][test_priv->shader_dispatch.cs_type].shader_size);
204	}
205}
206
207static void amdgpu_dispatch_init_gfx9(struct shader_test_priv *test_priv)
208{
209	int i;
210	uint32_t *ptr = test_priv->cmd.ptr;
211
212	/* Write context control and load shadowing register if necessary */
213	write_context_control(test_priv);
214
215	i = test_priv->cmd_curr;
216
217	/* Issue commands to set default compute state. */
218	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
219	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 3);
220	ptr[i++] = 0x204;
221	i += 3;
222
223	/* clear mmCOMPUTE_TMPRING_SIZE */
224	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1);
225	ptr[i++] = 0x218;
226	ptr[i++] = 0;
227
228	test_priv->cmd_curr = i;
229}
230
231static void amdgpu_dispatch_init_gfx10(struct shader_test_priv *test_priv)
232{
233	int i;
234	uint32_t *ptr = test_priv->cmd.ptr;
235
236	amdgpu_dispatch_init_gfx9(test_priv);
237
238	i = test_priv->cmd_curr;
239
240	/* mmCOMPUTE_SHADER_CHKSUM */
241	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1);
242	ptr[i++] = 0x22a;
243	ptr[i++] = 0;
244	/* mmCOMPUTE_REQ_CTRL */
245	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 6);
246	ptr[i++] = 0x222;
247	i += 6;
248	/* mmCP_COHER_START_DELAY */
249	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
250	ptr[i++] = 0x7b;
251	ptr[i++] = 0x20;
252
253	test_priv->cmd_curr = i;
254}
255
256static void amdgpu_dispatch_init_gfx11(struct shader_test_priv *test_priv)
257{
258	int i;
259	uint32_t *ptr = test_priv->cmd.ptr;
260
261	/* Write context control and load shadowing register if necessary */
262	write_context_control(test_priv);
263
264	i = test_priv->cmd_curr;
265
266	/* Issue commands to set default compute state. */
267	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
268	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 3);
269	ptr[i++] = 0x204;
270	i += 3;
271
272	/* clear mmCOMPUTE_TMPRING_SIZE */
273	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1);
274	ptr[i++] = 0x218;
275	ptr[i++] = 0;
276
277	/* mmCOMPUTE_REQ_CTRL */
278	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1);
279	ptr[i++] = 0x222;
280	ptr[i++] = 0;
281
282	/* mmCOMPUTE_USER_ACCUM_0 .. 3*/
283	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
284	ptr[i++] = 0x224;
285	i += 4;
286
287	/* mmCOMPUTE_SHADER_CHKSUM */
288	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
289	ptr[i++] = 0x22a;
290	ptr[i++] = 0;
291
292	test_priv->cmd_curr = i;
293}
294
295static void amdgpu_dispatch_init(struct shader_test_priv *test_priv)
296{
297	switch (test_priv->info->version) {
298	case AMDGPU_TEST_GFX_V9:
299		amdgpu_dispatch_init_gfx9(test_priv);
300		break;
301	case AMDGPU_TEST_GFX_V10:
302		amdgpu_dispatch_init_gfx10(test_priv);
303		break;
304	case AMDGPU_TEST_GFX_V11:
305		amdgpu_dispatch_init_gfx11(test_priv);
306		break;
307	case AMDGPU_TEST_GFX_MAX:
308		assert(1 && "Not Support gfx, never go here");
309		break;
310	}
311}
312
313static void amdgpu_dispatch_write_cumask(struct shader_test_priv *test_priv)
314{
315	int i = test_priv->cmd_curr;
316	uint32_t *ptr = test_priv->cmd.ptr;
317
318	/*  Issue commands to set cu mask used in current dispatch */
319	switch (test_priv->info->version) {
320	case AMDGPU_TEST_GFX_V9:
321		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
322		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 2);
323		ptr[i++] = 0x216;
324		ptr[i++] = 0xffffffff;
325		ptr[i++] = 0xffffffff;
326		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
327		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 2);
328		ptr[i++] = 0x219;
329		ptr[i++] = 0xffffffff;
330		ptr[i++] = 0xffffffff;
331		break;
332	case AMDGPU_TEST_GFX_V10:
333	case AMDGPU_TEST_GFX_V11:
334		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
335		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG_INDEX, 2);
336		ptr[i++] = 0x30000216;
337		ptr[i++] = 0xffffffff;
338		ptr[i++] = 0xffffffff;
339		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
340		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG_INDEX, 2);
341		ptr[i++] = 0x30000219;
342		ptr[i++] = 0xffffffff;
343		ptr[i++] = 0xffffffff;
344		break;
345	case AMDGPU_TEST_GFX_MAX:
346		assert(1 && "Not Support gfx, never go here");
347		break;
348	}
349
350	test_priv->cmd_curr = i;
351}
352
353static void amdgpu_dispatch_write2hw_gfx9(struct shader_test_priv *test_priv)
354{
355	const struct shader_test_cs_shader *cs_shader = &shader_test_cs[test_priv->info->version][test_priv->shader_dispatch.cs_type];
356	int j, i = test_priv->cmd_curr;
357	uint32_t *ptr = test_priv->cmd.ptr;
358	uint64_t shader_addr = test_priv->shader_dispatch.cs_bo.mc_address;
359
360	/* Writes shader state to HW */
361	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
362	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 2);
363	ptr[i++] = 0x20c;
364	ptr[i++] = (shader_addr >> 8);
365	ptr[i++] = (shader_addr >> 40);
366	/* write sh regs*/
367	for (j = 0; j < cs_shader->num_sh_reg; j++) {
368		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1);
369		/* - Gfx9ShRegBase */
370		ptr[i++] = cs_shader->sh_reg[j].reg_offset - shader_test_gfx_info[test_priv->info->version].sh_reg_base;
371		ptr[i++] = cs_shader->sh_reg[j].reg_value;
372	}
373
374	/* Write constant data */
375	if (CS_BUFFERCLEAR == test_priv->shader_dispatch.cs_type) {
376		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
377		ptr[i++] = 0x240;
378		ptr[i++] = test_priv->dst.mc_address;
379		ptr[i++] = (test_priv->dst.mc_address >> 32) | 0x100000;
380		ptr[i++] = test_priv->dst.size / 16;
381		ptr[i++] = 0x74fac;
382
383		/* Sets a range of pixel shader constants */
384		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
385		ptr[i++] = 0x244;
386		ptr[i++] = 0x22222222;
387		ptr[i++] = 0x22222222;
388		ptr[i++] = 0x22222222;
389		ptr[i++] = 0x22222222;
390	} else {
391		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
392		ptr[i++] = 0x240;
393		ptr[i++] = test_priv->src.mc_address;
394		ptr[i++] = (test_priv->src.mc_address >> 32) | 0x100000;
395		ptr[i++] = test_priv->src.size / 16;
396		ptr[i++] = 0x74fac;
397
398		/* Writes the UAV constant data to the SGPRs. */
399		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
400		ptr[i++] = 0x244;
401		ptr[i++] = test_priv->dst.mc_address;
402		ptr[i++] = (test_priv->dst.mc_address >> 32) | 0x100000;
403		ptr[i++] = test_priv->dst.size / 16;
404		ptr[i++] = 0x74fac;
405	}
406
407	test_priv->cmd_curr = i;
408}
409
410static void amdgpu_dispatch_write2hw_gfx10(struct shader_test_priv *test_priv)
411{
412	int i = test_priv->cmd_curr;
413	uint32_t *ptr = test_priv->cmd.ptr;
414	const struct shader_test_cs_shader *cs_shader = &shader_test_cs[test_priv->info->version][test_priv->shader_dispatch.cs_type];
415	int j;
416	uint64_t shader_addr = test_priv->shader_dispatch.cs_bo.mc_address;
417
418	/* Writes shader state to HW */
419	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
420	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 2);
421	ptr[i++] = 0x20c;
422	ptr[i++] = (shader_addr >> 8);
423	ptr[i++] = (shader_addr >> 40);
424	/* write sh regs*/
425	for (j = 0; j < cs_shader->num_sh_reg; j++) {
426		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1);
427		/* - Gfx9ShRegBase */
428		ptr[i++] = cs_shader->sh_reg[j].reg_offset - shader_test_gfx_info[test_priv->info->version].sh_reg_base;
429		ptr[i++] = cs_shader->sh_reg[j].reg_value;
430	}
431
432	/* mmCOMPUTE_PGM_RSRC3 */
433	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1);
434	ptr[i++] = 0x228;
435	ptr[i++] = 0;
436
437	if (CS_BUFFERCLEAR == test_priv->shader_dispatch.cs_type) {
438		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
439		ptr[i++] = 0x240;
440		ptr[i++] = test_priv->dst.mc_address;
441		ptr[i++] = (test_priv->dst.mc_address >> 32) | 0x100000;
442		ptr[i++] = test_priv->dst.size / 16;
443		ptr[i++] = 0x1104bfac;
444
445		/* Sets a range of pixel shader constants */
446		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
447		ptr[i++] = 0x244;
448		ptr[i++] = 0x22222222;
449		ptr[i++] = 0x22222222;
450		ptr[i++] = 0x22222222;
451		ptr[i++] = 0x22222222;
452	} else {
453		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
454		ptr[i++] = 0x240;
455		ptr[i++] = test_priv->src.mc_address;
456		ptr[i++] = (test_priv->src.mc_address >> 32) | 0x100000;
457		ptr[i++] = test_priv->src.size / 16;
458		ptr[i++] = 0x1104bfac;
459
460		/* Writes the UAV constant data to the SGPRs. */
461		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
462		ptr[i++] = 0x244;
463		ptr[i++] = test_priv->dst.mc_address;
464		ptr[i++] = (test_priv->dst.mc_address>> 32) | 0x100000;
465		ptr[i++] = test_priv->dst.size / 16;
466		ptr[i++] = 0x1104bfac;
467	}
468
469	test_priv->cmd_curr = i;
470}
471
472static void amdgpu_dispatch_write2hw_gfx11(struct shader_test_priv *test_priv)
473{
474	enum amdgpu_test_gfx_version version = test_priv->info->version;
475	const struct shader_test_cs_shader *cs_shader = &shader_test_cs[version][test_priv->shader_dispatch.cs_type];
476	int j, i = test_priv->cmd_curr;
477	uint32_t *ptr = test_priv->cmd.ptr;
478	uint64_t shader_addr = test_priv->shader_dispatch.cs_bo.mc_address;
479
480	/* Writes shader state to HW */
481	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
482	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 2);
483	ptr[i++] = 0x20c;
484	ptr[i++] = (shader_addr >> 8);
485	ptr[i++] = (shader_addr >> 40);
486
487	/* write sh regs*/
488	for (j = 0; j < cs_shader->num_sh_reg; j++) {
489		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1);
490		/* - Gfx9ShRegBase */
491		ptr[i++] = cs_shader->sh_reg[j].reg_offset - shader_test_gfx_info[version].sh_reg_base;
492		ptr[i++] = cs_shader->sh_reg[j].reg_value;
493		if (cs_shader->sh_reg[j].reg_offset == 0x2E12)
494			ptr[i-1] &= ~(1<<29);
495	}
496
497	/* mmCOMPUTE_PGM_RSRC3 */
498	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1);
499	ptr[i++] = 0x228;
500	ptr[i++] = 0x3f0;
501
502	/* Write constant data */
503	/* Writes the texture resource constants data to the SGPRs */
504	if (CS_BUFFERCLEAR == test_priv->shader_dispatch.cs_type) {
505		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
506		ptr[i++] = 0x240;
507		ptr[i++] = test_priv->dst.mc_address;
508		ptr[i++] = (test_priv->dst.mc_address >> 32) | 0x100000;
509		ptr[i++] = test_priv->dst.size / 16;
510		ptr[i++] = 0x1003dfac;
511
512		/* Sets a range of pixel shader constants */
513		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
514		ptr[i++] = 0x244;
515		ptr[i++] = 0x22222222;
516		ptr[i++] = 0x22222222;
517		ptr[i++] = 0x22222222;
518		ptr[i++] = 0x22222222;
519	} else {
520		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
521		ptr[i++] = 0x240;
522		ptr[i++] = test_priv->src.mc_address;
523		ptr[i++] = (test_priv->src.mc_address >> 32) | 0x100000;
524		ptr[i++] = test_priv->src.size / 16;
525		ptr[i++] = 0x1003dfac;
526
527		/* Writes the UAV constant data to the SGPRs. */
528		ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 4);
529		ptr[i++] = 0x244;
530		ptr[i++] = test_priv->dst.mc_address;
531		ptr[i++] = (test_priv->dst.mc_address>> 32) | 0x100000;
532		ptr[i++] = test_priv->dst.size / 16;
533		ptr[i++] = 0x1003dfac;
534	}
535
536	test_priv->cmd_curr = i;
537}
538
539static void amdgpu_dispatch_write2hw(struct shader_test_priv *test_priv)
540{
541	switch (test_priv->info->version) {
542	case AMDGPU_TEST_GFX_V9:
543		amdgpu_dispatch_write2hw_gfx9(test_priv);
544		break;
545	case AMDGPU_TEST_GFX_V10:
546		amdgpu_dispatch_write2hw_gfx10(test_priv);
547		break;
548	case AMDGPU_TEST_GFX_V11:
549		amdgpu_dispatch_write2hw_gfx11(test_priv);
550		break;
551	case AMDGPU_TEST_GFX_MAX:
552		assert(1 && "Not Support gfx, never go here");
553		break;
554	}
555}
556
557static void amdgpu_dispatch_write_dispatch_cmd(struct shader_test_priv *test_priv)
558{
559	int i = test_priv->cmd_curr;
560	uint32_t *ptr = test_priv->cmd.ptr;
561
562	/* clear mmCOMPUTE_RESOURCE_LIMITS */
563	ptr[i++] = PACKET3_COMPUTE(PACKET3_SET_SH_REG, 1);
564	ptr[i++] = 0x215;
565	ptr[i++] = 0;
566
567	/* dispatch direct command */
568	ptr[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
569	ptr[i++] = (test_priv->dst.size / 16 + 0x40 - 1 ) / 0x40;//0x10;
570	ptr[i++] = 1;
571	ptr[i++] = 1;
572	ptr[i++] = 1;
573
574	test_priv->cmd_curr = i;
575}
576static void amdgpu_test_dispatch_memset(struct shader_test_info *test_info)
577{
578	amdgpu_context_handle context_handle;
579	amdgpu_bo_handle resources[3];
580	struct shader_test_priv test_priv;
581	struct shader_test_bo *cmd = &(test_priv.cmd);
582	struct shader_test_bo *dst = &(test_priv.dst);
583	struct shader_test_bo *shader = &(test_priv.shader_dispatch.cs_bo);
584	uint32_t *ptr_cmd;
585	uint8_t *ptr_dst;
586	int i, r;
587	struct amdgpu_cs_request ibs_request = {0};
588	struct amdgpu_cs_ib_info ib_info= {0};
589	amdgpu_bo_list_handle bo_list;
590	struct amdgpu_cs_fence fence_status = {0};
591	uint32_t expired;
592	uint8_t cptr[16];
593
594	memset(&test_priv, 0, sizeof(test_priv));
595	test_priv.info = test_info;
596	test_priv.shader_dispatch.cs_type = CS_BUFFERCLEAR;
597	r = amdgpu_cs_ctx_create(test_info->device_handle, &context_handle);
598	CU_ASSERT_EQUAL(r, 0);
599
600	cmd->size = 4096;
601	cmd->heap = AMDGPU_GEM_DOMAIN_GTT;
602	r = shader_test_bo_alloc(test_info->device_handle, cmd);
603	CU_ASSERT_EQUAL(r, 0);
604	ptr_cmd = cmd->ptr;
605	memset(ptr_cmd, 0, cmd->size);
606
607	shader->size = 4096;
608	shader->heap = AMDGPU_GEM_DOMAIN_VRAM;
609	r = shader_test_bo_alloc(test_info->device_handle, shader);
610	CU_ASSERT_EQUAL(r, 0);
611	memset(shader->ptr, 0, shader->size);
612	amdgpu_dispatch_load_cs_shader(&test_priv);
613
614	dst->size = 0x4000;
615	dst->heap = AMDGPU_GEM_DOMAIN_VRAM;
616	r = shader_test_bo_alloc(test_info->device_handle, dst);
617	CU_ASSERT_EQUAL(r, 0);
618
619	amdgpu_dispatch_init(&test_priv);
620
621	/*  Issue commands to set cu mask used in current dispatch */
622	amdgpu_dispatch_write_cumask(&test_priv);
623
624	/* Writes shader state to HW */
625	amdgpu_dispatch_write2hw(&test_priv);
626
627	amdgpu_dispatch_write_dispatch_cmd(&test_priv);
628
629	i = test_priv.cmd_curr;
630	while (i & 7)
631		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
632	test_priv.cmd_curr = i;
633
634	resources[0] = dst->bo;
635	resources[1] = shader->bo;
636	resources[2] = cmd->bo;
637	r = amdgpu_bo_list_create(test_info->device_handle, 3, resources, NULL, &bo_list);
638	CU_ASSERT_EQUAL(r, 0);
639
640	ib_info.ib_mc_address = cmd->mc_address;
641	ib_info.size = test_priv.cmd_curr;
642	ibs_request.ip_type = test_info->ip;
643	ibs_request.ring = test_info->ring;
644	ibs_request.resources = bo_list;
645	ibs_request.number_of_ibs = 1;
646	ibs_request.ibs = &ib_info;
647	ibs_request.fence_info.handle = NULL;
648
649	/* submit CS */
650	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
651	CU_ASSERT_EQUAL(r, 0);
652
653	r = amdgpu_bo_list_destroy(bo_list);
654	CU_ASSERT_EQUAL(r, 0);
655
656	fence_status.ip_type = test_info->ip;
657	fence_status.ip_instance = 0;
658	fence_status.ring = test_info->ring;
659	fence_status.context = context_handle;
660	fence_status.fence = ibs_request.seq_no;
661
662	/* wait for IB accomplished */
663	r = amdgpu_cs_query_fence_status(&fence_status,
664					 AMDGPU_TIMEOUT_INFINITE,
665					 0, &expired);
666	CU_ASSERT_EQUAL(r, 0);
667	CU_ASSERT_EQUAL(expired, true);
668
669	/* verify if memset test result meets with expected */
670	i = 0;
671	ptr_dst = (uint8_t *)(dst->ptr);
672	memset(cptr, 0x22, 16);
673	CU_ASSERT_EQUAL(memcmp(ptr_dst + i, cptr, 16), 0);
674	i = dst->size - 16;
675	CU_ASSERT_EQUAL(memcmp(ptr_dst + i, cptr, 16), 0);
676	i = dst->size / 2;
677	CU_ASSERT_EQUAL(memcmp(ptr_dst + i, cptr, 16), 0);
678
679	r = shader_test_bo_free(dst);
680	CU_ASSERT_EQUAL(r, 0);
681
682	r = shader_test_bo_free(shader);
683	CU_ASSERT_EQUAL(r, 0);
684
685	r = shader_test_bo_free(cmd);
686	CU_ASSERT_EQUAL(r, 0);
687
688	r = amdgpu_cs_ctx_free(context_handle);
689	CU_ASSERT_EQUAL(r, 0);
690}
691
692static
693void amdgpu_test_dispatch_memcpy(struct shader_test_info *test_info)
694{
695	struct shader_test_priv test_priv;
696	amdgpu_context_handle context_handle;
697	amdgpu_bo_handle resources[4];
698	struct shader_test_bo *cmd = &(test_priv.cmd);
699	struct shader_test_bo *src = &(test_priv.src);
700	struct shader_test_bo *dst = &(test_priv.dst);
701	struct shader_test_bo *shader = &(test_priv.shader_dispatch.cs_bo);
702	uint32_t *ptr_cmd;
703	uint8_t *ptr_src;
704	uint8_t *ptr_dst;
705	int i, r;
706	struct amdgpu_cs_request ibs_request = {0};
707	struct amdgpu_cs_ib_info ib_info= {0};
708	uint32_t expired, hang_state, hangs;
709	amdgpu_bo_list_handle bo_list;
710	struct amdgpu_cs_fence fence_status = {0};
711
712	memset(&test_priv, 0, sizeof(test_priv));
713	test_priv.info = test_info;
714	test_priv.cmd.size = 4096;
715	test_priv.cmd.heap = AMDGPU_GEM_DOMAIN_GTT;
716
717	test_priv.shader_dispatch.cs_bo.heap = AMDGPU_GEM_DOMAIN_VRAM;
718	test_priv.shader_dispatch.cs_type = CS_BUFFERCOPY;
719	test_priv.src.heap = AMDGPU_GEM_DOMAIN_VRAM;
720	test_priv.dst.heap = AMDGPU_GEM_DOMAIN_VRAM;
721	if (test_info->hang_slow) {
722		test_priv.shader_dispatch.cs_bo.size = 0x4000000;
723		test_priv.src.size = 0x4000000;
724		test_priv.dst.size = 0x4000000;
725	} else {
726		test_priv.shader_dispatch.cs_bo.size = 4096;
727		test_priv.src.size = 0x4000;
728		test_priv.dst.size = 0x4000;
729	}
730
731	r = amdgpu_cs_ctx_create(test_info->device_handle, &context_handle);
732	CU_ASSERT_EQUAL(r, 0);
733
734	r = shader_test_bo_alloc(test_info->device_handle, cmd);
735	CU_ASSERT_EQUAL(r, 0);
736	ptr_cmd = cmd->ptr;
737	memset(ptr_cmd, 0, cmd->size);
738
739	r = shader_test_bo_alloc(test_info->device_handle, shader);
740	CU_ASSERT_EQUAL(r, 0);
741	memset(shader->ptr, 0, shader->size);
742	amdgpu_dispatch_load_cs_shader(&test_priv);
743
744	r = shader_test_bo_alloc(test_info->device_handle, src);
745	CU_ASSERT_EQUAL(r, 0);
746	ptr_src = (uint8_t *)(src->ptr);
747	memset(ptr_src, 0x55, src->size);
748
749	r = shader_test_bo_alloc(test_info->device_handle, dst);
750	CU_ASSERT_EQUAL(r, 0);
751
752	amdgpu_dispatch_init(&test_priv);
753
754	/*  Issue commands to set cu mask used in current dispatch */
755	amdgpu_dispatch_write_cumask(&test_priv);
756
757	/* Writes shader state to HW */
758	amdgpu_dispatch_write2hw(&test_priv);
759
760	amdgpu_dispatch_write_dispatch_cmd(&test_priv);
761
762	i = test_priv.cmd_curr;
763	while (i & 7)
764		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
765	test_priv.cmd_curr = i;
766
767	resources[0] = shader->bo;
768	resources[1] = src->bo;
769	resources[2] = dst->bo;
770	resources[3] = cmd->bo;
771	r = amdgpu_bo_list_create(test_info->device_handle, 4, resources, NULL, &bo_list);
772	CU_ASSERT_EQUAL(r, 0);
773
774	ib_info.ib_mc_address = cmd->mc_address;
775	ib_info.size = test_priv.cmd_curr;
776	ibs_request.ip_type = test_info->ip;
777	ibs_request.ring = test_info->ring;
778	ibs_request.resources = bo_list;
779	ibs_request.number_of_ibs = 1;
780	ibs_request.ibs = &ib_info;
781	ibs_request.fence_info.handle = NULL;
782	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
783	CU_ASSERT_EQUAL(r, 0);
784
785	fence_status.ip_type = test_info->ip;
786	fence_status.ip_instance = 0;
787	fence_status.ring = test_info->ring;
788	fence_status.context = context_handle;
789	fence_status.fence = ibs_request.seq_no;
790
791	/* wait for IB accomplished */
792	r = amdgpu_cs_query_fence_status(&fence_status,
793					 AMDGPU_TIMEOUT_INFINITE,
794					 0, &expired);
795
796	if (!test_info->hang) {
797		CU_ASSERT_EQUAL(r, 0);
798		CU_ASSERT_EQUAL(expired, true);
799
800		/* verify if memcpy test result meets with expected */
801		i = 0;
802		ptr_dst = (uint8_t *)dst->ptr;
803		CU_ASSERT_EQUAL(memcmp(ptr_dst + i, ptr_src + i, 16), 0);
804		i = dst->size - 16;
805		CU_ASSERT_EQUAL(memcmp(ptr_dst + i, ptr_src + i, 16), 0);
806		i = dst->size / 2;
807		CU_ASSERT_EQUAL(memcmp(ptr_dst + i, ptr_src + i, 16), 0);
808	} else {
809		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
810		CU_ASSERT_EQUAL(r, 0);
811		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
812	}
813
814	r = amdgpu_bo_list_destroy(bo_list);
815	CU_ASSERT_EQUAL(r, 0);
816
817	r = shader_test_bo_free(src);
818	CU_ASSERT_EQUAL(r, 0);
819	r = shader_test_bo_free(dst);
820	CU_ASSERT_EQUAL(r, 0);
821
822	r = shader_test_bo_free(shader);
823	CU_ASSERT_EQUAL(r, 0);
824
825	r = shader_test_bo_free(cmd);
826
827	r = amdgpu_cs_ctx_free(context_handle);
828	CU_ASSERT_EQUAL(r, 0);
829}
830
831static void shader_test_dispatch_cb(struct shader_test_info *test_info)
832{
833	amdgpu_test_dispatch_memset(test_info);
834	amdgpu_test_dispatch_memcpy(test_info);
835}
836static void shader_test_dispatch_hang_cb(struct shader_test_info *test_info)
837{
838	test_info->hang = 0;
839	amdgpu_test_dispatch_memcpy(test_info);
840
841	test_info->hang = 1;
842	amdgpu_test_dispatch_memcpy(test_info);
843
844	test_info->hang = 0;
845	amdgpu_test_dispatch_memcpy(test_info);
846}
847
848static void shader_test_dispatch_hang_slow_cb(struct shader_test_info *test_info)
849{
850	test_info->hang = 0;
851	test_info->hang_slow = 0;
852	amdgpu_test_dispatch_memcpy(test_info);
853
854	test_info->hang = 1;
855	test_info->hang_slow = 1;
856	amdgpu_test_dispatch_memcpy(test_info);
857
858	test_info->hang = 0;
859	test_info->hang_slow = 0;
860	amdgpu_test_dispatch_memcpy(test_info);
861}
862
863void amdgpu_test_dispatch_helper(amdgpu_device_handle device_handle, unsigned ip)
864{
865	shader_test_for_each(device_handle, ip, shader_test_dispatch_cb);
866}
867
868void amdgpu_test_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip)
869{
870	shader_test_for_each(device_handle, ip, shader_test_dispatch_hang_cb);
871}
872
873void amdgpu_test_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip)
874{
875	shader_test_for_each(device_handle, ip, shader_test_dispatch_hang_slow_cb);
876}
877
878static void amdgpu_draw_load_ps_shader_hang_slow(struct shader_test_priv *test_priv)
879{
880	struct amdgpu_gpu_info gpu_info = {0};
881	struct shader_test_shader_bin *ps_shader_bin = &memcpy_ps_hang_slow_navi21;
882	int r;
883
884	r = amdgpu_query_gpu_info(test_priv->info->device_handle, &gpu_info);
885	CU_ASSERT_EQUAL(r, 0);
886
887	switch (gpu_info.family_id) {
888		case AMDGPU_FAMILY_AI:
889		case AMDGPU_FAMILY_RV:
890			ps_shader_bin = &memcpy_ps_hang_slow_ai;
891			break;
892		case AMDGPU_FAMILY_NV:
893			if (gpu_info.chip_external_rev < 40)
894				ps_shader_bin = &memcpy_ps_hang_slow_navi10;
895			break;
896	}
897
898	shader_test_load_shader_hang_slow(&test_priv->shader_draw.ps_bo, ps_shader_bin);
899}
900
901static uint32_t round_up_size(uint32_t size)
902{
903	return (size + 255) & ~255;
904}
905static void amdgpu_draw_load_ps_shader(struct shader_test_priv *test_priv)
906{
907	uint8_t *ptr_shader = test_priv->shader_draw.ps_bo.ptr;
908	const struct shader_test_ps_shader *shader;
909	uint32_t shader_offset, num_export_fmt;
910	uint32_t mem_offset, patch_code_offset;
911	int i;
912
913	if (test_priv->info->hang) {
914		if (test_priv->info->hang_slow)
915			amdgpu_draw_load_ps_shader_hang_slow(test_priv);
916		else
917			memcpy(ptr_shader, memcpy_shader_hang, sizeof(memcpy_shader_hang));
918
919		return;
920	}
921
922	shader = &shader_test_ps[test_priv->info->version][test_priv->shader_draw.ps_type];
923	num_export_fmt = 10;
924	shader_offset = round_up_size(shader->shader_size);
925	/* write main shader program */
926	for (i = 0 ; i < num_export_fmt; i++) {
927		mem_offset = i * shader_offset;
928		memcpy(ptr_shader + mem_offset, shader->shader, shader->shader_size);
929	}
930
931	/* overwrite patch codes */
932	for (i = 0 ; i < num_export_fmt; i++) {
933		mem_offset = i * shader_offset + shader->patchinfo_code_offset[0] * sizeof(uint32_t);
934		patch_code_offset = i * shader->patchinfo_code_size;
935		memcpy(ptr_shader + mem_offset,
936			shader->patchinfo_code + patch_code_offset,
937			shader->patchinfo_code_size * sizeof(uint32_t));
938	}
939}
940
941/* load RectPosTexFast_VS */
942static void amdgpu_draw_load_vs_shader(struct shader_test_priv *test_priv)
943{
944	uint8_t *ptr_shader = test_priv->shader_draw.vs_bo.ptr;
945	const struct shader_test_vs_shader *shader = &shader_test_vs[test_priv->info->version][test_priv->shader_draw.vs_type];
946
947	memcpy(ptr_shader, shader->shader, shader->shader_size);
948}
949
950static void amdgpu_draw_init(struct shader_test_priv *test_priv)
951{
952	int i;
953	uint32_t *ptr = test_priv->cmd.ptr;
954	const struct shader_test_gfx_info *gfx_info = &shader_test_gfx_info[test_priv->info->version];
955
956	/* Write context control and load shadowing register if necessary */
957	write_context_control(test_priv);
958	i = test_priv->cmd_curr;
959
960	if (test_priv->info->version == AMDGPU_TEST_GFX_V11) {
961		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
962		ptr[i++] = 0x446;
963		ptr[i++] = (test_priv->vtx_attributes_mem.mc_address >> 16);
964		// mmSPI_ATTRIBUTE_RING_SIZE
965		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
966		ptr[i++] = 0x447;
967		ptr[i++] = 0x20001;
968	}
969	memcpy(ptr + i, gfx_info->preamble_cache, gfx_info->size_preamble_cache);
970
971	test_priv->cmd_curr = i + gfx_info->size_preamble_cache/sizeof(uint32_t);
972}
973
974static void amdgpu_draw_setup_and_write_drawblt_surf_info_gfx9(struct shader_test_priv *test_priv)
975{
976	int i = test_priv->cmd_curr;
977	uint32_t *ptr = test_priv->cmd.ptr;
978
979	/* setup color buffer */
980	/* offset   reg
981	   0xA318   CB_COLOR0_BASE
982	   0xA319   CB_COLOR0_BASE_EXT
983	   0xA31A   CB_COLOR0_ATTRIB2
984	   0xA31B   CB_COLOR0_VIEW
985	   0xA31C   CB_COLOR0_INFO
986	   0xA31D   CB_COLOR0_ATTRIB
987	   0xA31E   CB_COLOR0_DCC_CONTROL
988	   0xA31F   CB_COLOR0_CMASK
989	   0xA320   CB_COLOR0_CMASK_BASE_EXT
990	   0xA321   CB_COLOR0_FMASK
991	   0xA322   CB_COLOR0_FMASK_BASE_EXT
992	   0xA323   CB_COLOR0_CLEAR_WORD0
993	   0xA324   CB_COLOR0_CLEAR_WORD1
994	   0xA325   CB_COLOR0_DCC_BASE
995	   0xA326   CB_COLOR0_DCC_BASE_EXT */
996	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
997	ptr[i++] = 0x318;
998	ptr[i++] = test_priv->dst.mc_address >> 8;
999	ptr[i++] = test_priv->dst.mc_address >> 40;
1000	ptr[i++] = test_priv->info->hang_slow ? 0x3ffc7ff : 0x7c01f;
1001	ptr[i++] = 0;
1002	ptr[i++] = 0x50438;
1003	ptr[i++] = 0x10140000;
1004	i += 9;
1005
1006	/* mmCB_MRT0_EPITCH */
1007	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1008	ptr[i++] = 0x1e8;
1009	ptr[i++] = test_priv->info->hang_slow ? 0xfff : 0x1f;
1010
1011	/* 0xA32B   CB_COLOR1_BASE */
1012	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1013	ptr[i++] = 0x32b;
1014	ptr[i++] = 0;
1015
1016	/* 0xA33A   CB_COLOR1_BASE */
1017	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1018	ptr[i++] = 0x33a;
1019	ptr[i++] = 0;
1020
1021	/* SPI_SHADER_COL_FORMAT */
1022	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1023	ptr[i++] = 0x1c5;
1024	ptr[i++] = 9;
1025
1026	/* Setup depth buffer */
1027	/* mmDB_Z_INFO */
1028	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
1029	ptr[i++] = 0xe;
1030	i += 2;
1031
1032	test_priv->cmd_curr = i;
1033}
1034static void amdgpu_draw_setup_and_write_drawblt_surf_info_gfx10(struct shader_test_priv *test_priv)
1035{
1036	int i = test_priv->cmd_curr;
1037	uint32_t *ptr = test_priv->cmd.ptr;
1038
1039	/* setup color buffer */
1040	/* 0xA318   CB_COLOR0_BASE
1041	   0xA319   CB_COLOR0_PITCH
1042	   0xA31A   CB_COLOR0_SLICE
1043	   0xA31B   CB_COLOR0_VIEW
1044	   0xA31C   CB_COLOR0_INFO
1045	   0xA31D   CB_COLOR0_ATTRIB
1046	   0xA31E   CB_COLOR0_DCC_CONTROL
1047	   0xA31F   CB_COLOR0_CMASK
1048	   0xA320   CB_COLOR0_CMASK_SLICE
1049	   0xA321   CB_COLOR0_FMASK
1050	   0xA322   CB_COLOR0_FMASK_SLICE
1051	   0xA323   CB_COLOR0_CLEAR_WORD0
1052	   0xA324   CB_COLOR0_CLEAR_WORD1
1053	   0xA325   CB_COLOR0_DCC_BASE */
1054	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 14);
1055	ptr[i++] = 0x318;
1056	ptr[i++] = test_priv->dst.mc_address >> 8;
1057	i += 3;
1058	ptr[i++] = 0x50438;
1059	i += 9;
1060
1061	/* 0xA390   CB_COLOR0_BASE_EXT */
1062	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1063	ptr[i++] = 0x390;
1064	ptr[i++] = test_priv->dst.mc_address >> 40;
1065
1066	/* 0xA398   CB_COLOR0_CMASK_BASE_EXT */
1067	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1068	ptr[i++] = 0x398;
1069	ptr[i++] = 0;
1070
1071	/* 0xA3A0   CB_COLOR0_FMASK_BASE_EXT */
1072	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1073	ptr[i++] = 0x3a0;
1074	ptr[i++] = 0;
1075
1076	/* 0xA3A8   CB_COLOR0_DCC_BASE_EXT */
1077	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1078	ptr[i++] = 0x3a8;
1079	ptr[i++] = 0;
1080
1081	/* 0xA3B0   CB_COLOR0_ATTRIB2 */
1082	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1083	ptr[i++] = 0x3b0;
1084	ptr[i++] = test_priv->info->hang_slow ? 0x3ffc7ff : 0x7c01f;
1085
1086	/* 0xA3B8   CB_COLOR0_ATTRIB3 */
1087	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1088	ptr[i++] = 0x3b8;
1089	ptr[i++] = 0x9014000;
1090
1091	/* 0xA32B   CB_COLOR1_BASE */
1092	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1093	ptr[i++] = 0x32b;
1094	ptr[i++] = 0;
1095
1096	/* 0xA33A   CB_COLOR1_BASE */
1097	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1098	ptr[i++] = 0x33a;
1099	ptr[i++] = 0;
1100
1101	/* SPI_SHADER_COL_FORMAT */
1102	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1103	ptr[i++] = 0x1c5;
1104	ptr[i++] = 9;
1105
1106	/* Setup depth buffer */
1107	/* mmDB_Z_INFO */
1108	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
1109	ptr[i++] = 0x10;
1110	i += 2;
1111
1112	test_priv->cmd_curr = i;
1113}
1114
1115static void amdgpu_draw_setup_and_write_drawblt_surf_info_gfx11(struct shader_test_priv *test_priv)
1116{
1117	int i = test_priv->cmd_curr;
1118	uint32_t *ptr = test_priv->cmd.ptr;
1119
1120	/* mmCB_COLOR0_BASE */
1121	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1122	ptr[i++] = 0x318;
1123	ptr[i++] = test_priv->dst.mc_address >> 8;
1124	/* mmCB_COLOR0_VIEW .. mmCB_COLOR0_DCC_CONTROL */
1125	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 4);
1126	ptr[i++] = 0x31b;
1127	i++;
1128	ptr[i++] = 0x5040e;
1129	i += 2;
1130	/* mmCB_COLOR0_DCC_BASE */
1131	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1132	ptr[i++] = 0x325;
1133	ptr[i++] = 0;
1134	/* mmCB_COLOR0_BASE_EXT */
1135	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1136	ptr[i++] = 0x390;
1137	ptr[i++] = (test_priv->dst.mc_address >> 40) & 0xFF;
1138	/* mmCB_COLOR0_DCC_BASE_EXT */
1139	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1140	ptr[i++] = 0x3a8;
1141	ptr[i++] = 0;
1142	/* mmCB_COLOR0_ATTRIB2 */
1143	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1144	ptr[i++] = 0x3b0;
1145	ptr[i++] = test_priv->info->hang_slow ? 0x1ffc7ff : 0x7c01f;
1146	/* mmCB_COLOR0_ATTRIB3 */
1147	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1148	ptr[i++] = 0x3b8;
1149	ptr[i++] = test_priv->info->hang_slow ? 0x1028000 : 0x1018000;
1150	/* mmCB_COLOR0_INFO */
1151	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1152	ptr[i++] = 0x32b;
1153	ptr[i++] = 0;
1154	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1155	ptr[i++] = 0x33a;
1156	ptr[i++] = 0;
1157	/* mmSPI_SHADER_COL_FORMAT */
1158	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1159	ptr[i++] = 0x1c5;
1160	ptr[i++] = 0x9;
1161	/* mmDB_Z_INFO */
1162	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
1163	ptr[i++] = 0x10;
1164	i += 2;
1165
1166	test_priv->cmd_curr = i;
1167}
1168
1169static void amdgpu_draw_setup_and_write_drawblt_surf_info(struct shader_test_priv *test_priv)
1170{
1171	switch (test_priv->info->version) {
1172	case AMDGPU_TEST_GFX_V9:
1173		amdgpu_draw_setup_and_write_drawblt_surf_info_gfx9(test_priv);
1174		break;
1175	case AMDGPU_TEST_GFX_V10:
1176		amdgpu_draw_setup_and_write_drawblt_surf_info_gfx10(test_priv);
1177		break;
1178	case AMDGPU_TEST_GFX_V11:
1179		amdgpu_draw_setup_and_write_drawblt_surf_info_gfx11(test_priv);
1180		break;
1181	case AMDGPU_TEST_GFX_MAX:
1182		assert(1 && "Not Support gfx, never go here");
1183		break;
1184	}
1185}
1186
1187static void amdgpu_draw_setup_and_write_drawblt_state_gfx9(struct shader_test_priv *test_priv)
1188{
1189	int i = test_priv->cmd_curr;
1190	uint32_t *ptr = test_priv->cmd.ptr;
1191	const struct shader_test_gfx_info *gfx_info = &shader_test_gfx_info[test_priv->info->version];
1192
1193	/* mmPA_SC_TILE_STEERING_OVERRIDE */
1194	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1195	ptr[i++] = 0xd7;
1196	ptr[i++] = 0;
1197
1198	ptr[i++] = 0xffff1000;
1199	ptr[i++] = 0xc0021000;
1200
1201	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1202	ptr[i++] = 0xd7;
1203	ptr[i++] = 1;
1204
1205	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
1206	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
1207	ptr[i++] = 0x2fe;
1208	i += 16;
1209
1210	/* mmPA_SC_CENTROID_PRIORITY_0 */
1211	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
1212	ptr[i++] = 0x2f5;
1213	i += 2;
1214
1215	memcpy(ptr + i, gfx_info->cached_cmd, gfx_info->size_cached_cmd);
1216	if (test_priv->info->hang_slow)
1217		*(ptr + i + 12) = 0x8000800;
1218
1219	test_priv->cmd_curr = i + gfx_info->size_cached_cmd/sizeof(uint32_t);
1220}
1221
1222static void amdgpu_draw_setup_and_write_drawblt_state_gfx10(struct shader_test_priv *test_priv)
1223{
1224	int i = test_priv->cmd_curr;
1225	uint32_t *ptr = test_priv->cmd.ptr;
1226	const struct shader_test_gfx_info *gfx_info = &shader_test_gfx_info[test_priv->info->version];
1227
1228	/* mmPA_SC_TILE_STEERING_OVERRIDE */
1229	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1230	ptr[i++] = 0xd7;
1231	ptr[i++] = 0;
1232
1233	ptr[i++] = 0xffff1000;
1234	ptr[i++] = 0xc0021000;
1235
1236	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1237	ptr[i++] = 0xd7;
1238	ptr[i++] = 0;
1239
1240	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
1241	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
1242	ptr[i++] = 0x2fe;
1243	i += 16;
1244
1245	/* mmPA_SC_CENTROID_PRIORITY_0 */
1246	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
1247	ptr[i++] = 0x2f5;
1248	i += 2;
1249
1250	memcpy(ptr + i, gfx_info->cached_cmd, gfx_info->size_cached_cmd);
1251	if (test_priv->info->hang_slow)
1252		*(ptr + i + 12) = 0x8000800;
1253	i += gfx_info->size_cached_cmd/sizeof(uint32_t);
1254
1255	/* mmCB_RMI_GL2_CACHE_CONTROL */
1256	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1257	ptr[i++] = 0x104;
1258	ptr[i++] = 0x40aa0055;
1259	/* mmDB_RMI_L2_CACHE_CONTROL */
1260	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1261	ptr[i++] = 0x1f;
1262	ptr[i++] = 0x2a0055;
1263
1264	test_priv->cmd_curr = i;
1265}
1266
1267static void amdgpu_draw_setup_and_write_drawblt_state_gfx11(struct shader_test_priv *test_priv)
1268{
1269	int i = test_priv->cmd_curr;
1270	uint32_t *ptr = test_priv->cmd.ptr;
1271	const struct shader_test_gfx_info *gfx_info = &shader_test_gfx_info[test_priv->info->version];
1272
1273	/* mmPA_SC_TILE_STEERING_OVERRIDE */
1274	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1275	ptr[i++] = 0xd7;
1276	ptr[i++] = 0;
1277
1278	ptr[i++] = 0xffff1000;
1279	ptr[i++] = 0xc0021000;
1280
1281	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1282	ptr[i++] = 0xd7;
1283	i++;
1284
1285	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
1286	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
1287	ptr[i++] = 0x2fe;
1288	i += 16;
1289
1290	/* mmPA_SC_CENTROID_PRIORITY_0 */
1291	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
1292	ptr[i++] = 0x2f5;
1293	i += 2;
1294
1295	memcpy(ptr + i, gfx_info->cached_cmd, gfx_info->size_cached_cmd);
1296	if (test_priv->info->hang_slow)
1297		*(ptr + i + 12) = 0x8000800;
1298
1299	test_priv->cmd_curr = i + gfx_info->size_cached_cmd/sizeof(uint32_t);
1300}
1301
1302static void amdgpu_draw_setup_and_write_drawblt_state(struct shader_test_priv *test_priv)
1303{
1304	switch (test_priv->info->version) {
1305	case AMDGPU_TEST_GFX_V9:
1306		amdgpu_draw_setup_and_write_drawblt_state_gfx9(test_priv);
1307		break;
1308	case AMDGPU_TEST_GFX_V10:
1309		amdgpu_draw_setup_and_write_drawblt_state_gfx10(test_priv);
1310		break;
1311	case AMDGPU_TEST_GFX_V11:
1312		amdgpu_draw_setup_and_write_drawblt_state_gfx11(test_priv);
1313		break;
1314	case AMDGPU_TEST_GFX_MAX:
1315		assert(1 && "Not Support gfx, never go here");
1316		break;
1317	}
1318}
1319
1320static void amdgpu_draw_vs_RectPosTexFast_write2hw_gfx9(struct shader_test_priv *test_priv)
1321{
1322	int i = test_priv->cmd_curr;
1323	uint32_t *ptr = test_priv->cmd.ptr;
1324	uint64_t shader_addr = test_priv->shader_draw.vs_bo.mc_address;
1325	enum ps_type ps = test_priv->shader_draw.ps_type;
1326
1327	/* mmPA_CL_VS_OUT_CNTL */
1328	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1329	ptr[i++] = 0x207;
1330	ptr[i++] = 0;
1331
1332	/* mmSPI_SHADER_PGM_RSRC3_VS */
1333	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1);
1334	ptr[i++] = 0x46;
1335	ptr[i++] = 0xffff;
1336
1337	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
1338	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 2);
1339	ptr[i++] = 0x48;
1340	ptr[i++] = shader_addr >> 8;
1341	ptr[i++] = shader_addr >> 40;
1342
1343	/* mmSPI_SHADER_PGM_RSRC1_VS */
1344	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1);
1345	ptr[i++] = 0x4a;
1346	ptr[i++] = 0xc0081;
1347
1348	/* mmSPI_SHADER_PGM_RSRC2_VS */
1349	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1);
1350	ptr[i++] = 0x4b;
1351	ptr[i++] = 0x18;
1352
1353	/* mmSPI_VS_OUT_CONFIG */
1354	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1355	ptr[i++] = 0x1b1;
1356	ptr[i++] = 2;
1357
1358	/* mmSPI_SHADER_POS_FORMAT */
1359	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1360	ptr[i++] = 0x1c3;
1361	ptr[i++] = 4;
1362
1363	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4);
1364	ptr[i++] = 0x4c;
1365	i += 2;
1366	ptr[i++] = test_priv->info->hang_slow ? 0x45000000 : 0x42000000;
1367	ptr[i++] = test_priv->info->hang_slow ? 0x45000000 : 0x42000000;
1368
1369	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4);
1370	ptr[i++] = 0x50;
1371	i += 2;
1372	if (ps == PS_CONST) {
1373		i += 2;
1374	} else if (ps == PS_TEX) {
1375		ptr[i++] = 0x3f800000;
1376		ptr[i++] = 0x3f800000;
1377	}
1378
1379	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4);
1380	ptr[i++] = 0x54;
1381	i += 4;
1382
1383	test_priv->cmd_curr = i;
1384}
1385
1386static void amdgpu_draw_vs_RectPosTexFast_write2hw_gfx10(struct shader_test_priv *test_priv)
1387{
1388	int i = test_priv->cmd_curr;
1389	uint32_t *ptr = test_priv->cmd.ptr;
1390	uint64_t shader_addr = test_priv->shader_draw.vs_bo.mc_address;
1391	enum ps_type ps = test_priv->shader_draw.ps_type;
1392
1393	/* mmPA_CL_VS_OUT_CNTL */
1394	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1395	ptr[i++] = 0x207;
1396	ptr[i++] = 0;
1397
1398	/* mmSPI_SHADER_PGM_RSRC3_VS */
1399	ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1);
1400	ptr[i++] = 0x30000046;
1401	ptr[i++] = 0xffff;
1402	/* mmSPI_SHADER_PGM_RSRC4_VS */
1403	ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1);
1404	ptr[i++] = 0x30000041;
1405	ptr[i++] = 0xffff;
1406
1407	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
1408	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 2);
1409	ptr[i++] = 0x48;
1410	ptr[i++] = shader_addr >> 8;
1411	ptr[i++] = shader_addr >> 40;
1412
1413	/* mmSPI_SHADER_PGM_RSRC1_VS */
1414	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1);
1415	ptr[i++] = 0x4a;
1416	ptr[i++] = 0xc0041;
1417	/* mmSPI_SHADER_PGM_RSRC2_VS */
1418	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1);
1419	ptr[i++] = 0x4b;
1420	ptr[i++] = 0x18;
1421
1422	/* mmSPI_VS_OUT_CONFIG */
1423	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1424	ptr[i++] = 0x1b1;
1425	ptr[i++] = 2;
1426
1427	/* mmSPI_SHADER_POS_FORMAT */
1428	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1429	ptr[i++] = 0x1c3;
1430	ptr[i++] = 4;
1431
1432	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4);
1433	ptr[i++] = 0x4c;
1434	i += 2;
1435	ptr[i++] = test_priv->info->hang_slow ? 0x45000000 : 0x42000000;
1436	ptr[i++] = test_priv->info->hang_slow ? 0x45000000 : 0x42000000;
1437
1438	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4);
1439	ptr[i++] = 0x50;
1440	i += 2;
1441	if (ps == PS_CONST) {
1442		i += 2;
1443	} else if (ps == PS_TEX) {
1444		ptr[i++] = 0x3f800000;
1445		ptr[i++] = 0x3f800000;
1446	}
1447
1448	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4);
1449	ptr[i++] = 0x54;
1450	i += 4;
1451
1452	test_priv->cmd_curr = i;
1453}
1454
1455
1456static void amdgpu_draw_vs_RectPosTexFast_write2hw_gfx11(struct shader_test_priv *test_priv)
1457{
1458	int i = test_priv->cmd_curr;
1459	uint32_t *ptr = test_priv->cmd.ptr;
1460	const struct shader_test_gfx_info *gfx_info = &shader_test_gfx_info[test_priv->info->version];
1461	uint64_t shader_addr = test_priv->shader_draw.vs_bo.mc_address;
1462	const struct shader_test_vs_shader *shader = &shader_test_vs[test_priv->info->version][test_priv->shader_draw.vs_type];
1463	enum ps_type ps = test_priv->shader_draw.ps_type;
1464	int j, offset;
1465
1466	/* mmPA_CL_VS_OUT_CNTL */
1467	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1468	ptr[i++] = 0x207;
1469	ptr[i++] = 0;
1470
1471	/* mmSPI_SHADER_PGM_RSRC3_GS */
1472	ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1);
1473	ptr[i++] = 0x30000087;
1474	ptr[i++] = 0xffff;
1475	/* mmSPI_SHADER_PGM_RSRC4_GS */
1476	ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1);
1477	ptr[i++] = 0x30000081;
1478	ptr[i++] = 0x1fff0001;
1479
1480	/* mmSPI_SHADER_PGM_LO_ES */
1481	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 2);
1482	ptr[i++] = 0xc8;
1483	ptr[i++] = shader_addr >> 8;
1484	ptr[i++] = shader_addr >> 40;
1485
1486	/* write sh reg */
1487	for (j = 0; j < shader->num_sh_reg; j++) {
1488		ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1);
1489		ptr[i++] = shader->sh_reg[j].reg_offset - gfx_info->sh_reg_base;
1490		ptr[i++] = shader->sh_reg[j].reg_value;
1491	}
1492	/* write context reg */
1493	for (j = 0; j < shader->num_context_reg; j++) {
1494		switch (shader->context_reg[j].reg_offset) {
1495		case 0xA1B1: //mmSPI_VS_OUT_CONFIG
1496			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1497			ptr[i++] = shader->context_reg[j].reg_offset - gfx_info->context_reg_base;
1498			ptr[i++] = 2;
1499			break;
1500		case 0xA1C3: //mmSPI_SHADER_POS_FORMAT
1501			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1502			ptr[i++] = shader->context_reg[j].reg_offset - gfx_info->context_reg_base;
1503			ptr[i++] = 4;
1504			break;
1505		case 0xA2E4: //mmVGT_GS_INSTANCE_CNT
1506		case 0xA2CE: //mmVGT_GS_MAX_VERT_OUT
1507			break;
1508		default:
1509			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1510			ptr[i++] = shader->context_reg[j].reg_offset - gfx_info->context_reg_base;
1511			ptr[i++] = shader->context_reg[j].reg_value;
1512			break;
1513		}
1514	}
1515
1516	// write constant
1517	// dst rect
1518	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4);
1519	ptr[i++] = 0x8c;
1520	i += 2;
1521	ptr[i++] = test_priv->info->hang_slow ? 0x45000000 : 0x42000000;
1522	ptr[i++] = test_priv->info->hang_slow ? 0x45000000 : 0x42000000;
1523	// src rect
1524	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4);
1525	ptr[i++] = 0x90;
1526	i += 2;
1527	if (ps == PS_CONST) {
1528		i += 2;
1529	} else if (ps == PS_TEX) {
1530		ptr[i++] = 0x3f800000;
1531		ptr[i++] = 0x3f800000;
1532	}
1533
1534	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 4);
1535	ptr[i++] = 0x94;
1536	i += 4;
1537	// vtx_attributes_mem
1538	ptr[i++] = 0xc02f1000;
1539	offset = i * sizeof(uint32_t);
1540	i += 44;
1541	ptr[i++] = test_priv->vtx_attributes_mem.mc_address & 0xffffffff;
1542	ptr[i++] = 0xc0100000 | ((test_priv->vtx_attributes_mem.mc_address >> 32) & 0xffff);
1543	ptr[i++] = test_priv->vtx_attributes_mem.size / 16;
1544	ptr[i++] = 0x2043ffac;
1545	ptr[i++] = PACKET3(PACKET3_SET_SH_REG_OFFSET, 2);
1546	ptr[i++] = 0x98;
1547	ptr[i++] = offset;
1548	i++;
1549
1550	test_priv->cmd_curr = i;
1551}
1552
1553static void amdgpu_draw_vs_RectPosTexFast_write2hw(struct shader_test_priv *test_priv)
1554{
1555	switch (test_priv->info->version) {
1556	case AMDGPU_TEST_GFX_V9:
1557		amdgpu_draw_vs_RectPosTexFast_write2hw_gfx9(test_priv);
1558		break;
1559	case AMDGPU_TEST_GFX_V10:
1560		amdgpu_draw_vs_RectPosTexFast_write2hw_gfx10(test_priv);
1561		break;
1562	case AMDGPU_TEST_GFX_V11:
1563		amdgpu_draw_vs_RectPosTexFast_write2hw_gfx11(test_priv);
1564		break;
1565	case AMDGPU_TEST_GFX_MAX:
1566		assert(1 && "Not Support gfx, never go here");
1567		break;
1568	}
1569}
1570
1571static void amdgpu_draw_ps_write2hw_gfx9_10(struct shader_test_priv *test_priv)
1572{
1573	int i, j;
1574	uint64_t shader_addr = test_priv->shader_draw.ps_bo.mc_address;
1575	const struct shader_test_ps_shader *ps = &shader_test_ps[test_priv->info->version][test_priv->shader_draw.ps_type];
1576	uint32_t *ptr = test_priv->cmd.ptr;
1577
1578	i = test_priv->cmd_curr;
1579
1580	if (test_priv->info->version == AMDGPU_TEST_GFX_V9) {
1581		/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
1582		   0x2c08   SPI_SHADER_PGM_LO_PS
1583		   0x2c09   SPI_SHADER_PGM_HI_PS */
1584		/* multiplicator 9 is from  SPI_SHADER_COL_FORMAT */
1585		if (!test_priv->info->hang)
1586			shader_addr += 256 * 9;
1587		ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 3);
1588		ptr[i++] = 0x7;
1589		ptr[i++] = 0xffff;
1590		ptr[i++] = shader_addr >> 8;
1591		ptr[i++] = shader_addr >> 40;
1592	} else {
1593		//if (!test_priv->info->hang)
1594			shader_addr += 256 * 9;
1595		/* 0x2c08	 SPI_SHADER_PGM_LO_PS
1596		     0x2c09	 SPI_SHADER_PGM_HI_PS */
1597		ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 2);
1598		ptr[i++] = 0x8;
1599		ptr[i++] = shader_addr >> 8;
1600		ptr[i++] = shader_addr >> 40;
1601
1602		/* mmSPI_SHADER_PGM_RSRC3_PS */
1603		ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1);
1604		ptr[i++] = 0x30000007;
1605		ptr[i++] = 0xffff;
1606		/* mmSPI_SHADER_PGM_RSRC4_PS */
1607		ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1);
1608		ptr[i++] = 0x30000001;
1609		ptr[i++] = 0xffff;
1610	}
1611
1612	for (j = 0; j < ps->num_sh_reg; j++) {
1613		ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1);
1614		ptr[i++] = ps->sh_reg[j].reg_offset - 0x2c00;
1615		ptr[i++] = ps->sh_reg[j].reg_value;
1616	}
1617
1618	for (j = 0; j < ps->num_context_reg; j++) {
1619		if (ps->context_reg[j].reg_offset != 0xA1C5) {
1620			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1621			ptr[i++] = ps->context_reg[j].reg_offset - 0xa000;
1622			ptr[i++] = ps->context_reg[j].reg_value;
1623		}
1624
1625		if (ps->context_reg[j].reg_offset == 0xA1B4) {
1626			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1627			ptr[i++] = 0x1b3;
1628			ptr[i++] = 2;
1629		}
1630	}
1631
1632	test_priv->cmd_curr = i;
1633}
1634
1635static void amdgpu_draw_ps_write2hw_gfx11(struct shader_test_priv *test_priv)
1636{
1637	int i, j;
1638	uint64_t shader_addr = test_priv->shader_draw.ps_bo.mc_address;
1639	enum amdgpu_test_gfx_version version = test_priv->info->version;
1640	const struct shader_test_ps_shader *ps = &shader_test_ps[version][test_priv->shader_draw.ps_type];
1641	uint32_t *ptr = test_priv->cmd.ptr;
1642	uint32_t export_shader_offset;
1643
1644	i = test_priv->cmd_curr;
1645
1646	/* SPI_SHADER_PGM_LO_PS
1647	   SPI_SHADER_PGM_HI_PS */
1648	shader_addr >>= 8;
1649	if (!test_priv->info->hang) {
1650		export_shader_offset = (round_up_size(ps->shader_size) * 9) >> 8;
1651		shader_addr += export_shader_offset;
1652	}
1653	ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 2);
1654	ptr[i++] = 0x8;
1655	ptr[i++] = shader_addr & 0xffffffff;
1656	ptr[i++] = (shader_addr >> 32) & 0xffffffff;
1657	/* mmSPI_SHADER_PGM_RSRC3_PS */
1658	ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1);
1659	ptr[i++] = 0x30000007;
1660	ptr[i++] = 0xffff;
1661	/* mmSPI_SHADER_PGM_RSRC4_PS */
1662	ptr[i++] = PACKET3(PACKET3_SET_SH_REG_INDEX, 1);
1663	ptr[i++] = 0x30000001;
1664	ptr[i++] = 0x3fffff;
1665
1666	for (j = 0; j < ps->num_sh_reg; j++) {
1667		ptr[i++] = PACKET3(PACKET3_SET_SH_REG, 1);
1668		ptr[i++] = ps->sh_reg[j].reg_offset - shader_test_gfx_info[version].sh_reg_base;
1669		ptr[i++] = ps->sh_reg[j].reg_value;
1670	}
1671
1672	for (j = 0; j < ps->num_context_reg; j++) {
1673		/* !mmSPI_SHADER_COL_FORMAT */
1674		if (ps->context_reg[j].reg_offset != 0xA1C5) {
1675			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1676			ptr[i++] = ps->context_reg[j].reg_offset - shader_test_gfx_info[version].context_reg_base;
1677			ptr[i++] = ps->context_reg[j].reg_value;
1678		}
1679
1680		/* mmSPI_PS_INPUT_ADDR */
1681		if (ps->context_reg[j].reg_offset == 0xA1B4) {
1682			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
1683			ptr[i++] = 0x1b3;
1684			ptr[i++] = 2;
1685		}
1686	}
1687
1688	test_priv->cmd_curr = i;
1689}
1690
1691static void amdgpu_draw_ps_write2hw(struct shader_test_priv *test_priv)
1692{
1693	switch (test_priv->info->version) {
1694	case AMDGPU_TEST_GFX_V9:
1695	case AMDGPU_TEST_GFX_V10:
1696		amdgpu_draw_ps_write2hw_gfx9_10(test_priv);
1697		break;
1698	case AMDGPU_TEST_GFX_V11:
1699		amdgpu_draw_ps_write2hw_gfx11(test_priv);
1700		break;
1701	case AMDGPU_TEST_GFX_MAX:
1702		assert(1 && "Not Support gfx, never go here");
1703		break;
1704	}
1705}
1706
1707static void amdgpu_draw_draw(struct shader_test_priv *test_priv)
1708{
1709	int i = test_priv->cmd_curr;
1710	uint32_t *ptr = test_priv->cmd.ptr;
1711
1712	switch (test_priv->info->version) {
1713	case AMDGPU_TEST_GFX_V9:
1714		/* mmIA_MULTI_VGT_PARAM */
1715		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1716		ptr[i++] = 0x40000258;
1717		ptr[i++] = 0xd00ff;
1718		/* mmVGT_PRIMITIVE_TYPE */
1719		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1720		ptr[i++] = 0x10000242;
1721		ptr[i++] = 0x11;
1722		break;
1723	case AMDGPU_TEST_GFX_V10:
1724		/* mmGE_CNTL */
1725		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1726		ptr[i++] = 0x25b;
1727		ptr[i++] = 0xff;
1728		/* mmVGT_PRIMITIVE_TYPE */
1729		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1730		ptr[i++] = 0x242;
1731		ptr[i++] = 0x11;
1732		break;
1733	case AMDGPU_TEST_GFX_V11:
1734		/* mmGE_CNTL */
1735		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1736		ptr[i++] = 0x25b;
1737		ptr[i++] = 0x80fc80;
1738		/* mmVGT_PRIMITIVE_TYPE */
1739		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1740		ptr[i++] = 0x242;
1741		ptr[i++] = 0x11;
1742		break;
1743	case AMDGPU_TEST_GFX_MAX:
1744		assert(1 && "Not Support gfx, never go here");
1745		break;
1746	}
1747
1748	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
1749	ptr[i++] = 3;
1750	ptr[i++] = 2;
1751
1752	test_priv->cmd_curr = i;
1753}
1754
1755static void amdgpu_memset_draw_test(struct shader_test_info *test_info)
1756{
1757	struct shader_test_priv test_priv;
1758	amdgpu_context_handle context_handle;
1759	struct shader_test_bo *ps_bo = &(test_priv.shader_draw.ps_bo);
1760	struct shader_test_bo *vs_bo = &(test_priv.shader_draw.vs_bo);
1761	struct shader_test_bo *dst = &(test_priv.dst);
1762	struct shader_test_bo *cmd = &(test_priv.cmd);
1763	struct shader_test_bo *vtx_attributes_mem = &(test_priv.vtx_attributes_mem);
1764	amdgpu_bo_handle resources[5];
1765	uint8_t *ptr_dst;
1766	uint32_t *ptr_cmd;
1767	int i, r;
1768	struct amdgpu_cs_request ibs_request = {0};
1769	struct amdgpu_cs_ib_info ib_info = {0};
1770	struct amdgpu_cs_fence fence_status = {0};
1771	uint32_t expired;
1772	amdgpu_bo_list_handle bo_list;
1773	uint8_t cptr[16];
1774
1775	memset(&test_priv, 0, sizeof(test_priv));
1776	test_priv.info = test_info;
1777
1778	r = amdgpu_cs_ctx_create(test_info->device_handle, &context_handle);
1779	CU_ASSERT_EQUAL(r, 0);
1780
1781	ps_bo->size = 0x2000;
1782	ps_bo->heap = AMDGPU_GEM_DOMAIN_VRAM;
1783	r = shader_test_bo_alloc(test_info->device_handle, ps_bo);
1784	CU_ASSERT_EQUAL(r, 0);
1785	memset(ps_bo->ptr, 0, ps_bo->size);
1786
1787	vs_bo->size = 4096;
1788	vs_bo->heap = AMDGPU_GEM_DOMAIN_VRAM;
1789	r = shader_test_bo_alloc(test_info->device_handle, vs_bo);
1790	CU_ASSERT_EQUAL(r, 0);
1791	memset(vs_bo->ptr, 0, vs_bo->size);
1792
1793	test_priv.shader_draw.ps_type = PS_CONST;
1794	amdgpu_draw_load_ps_shader(&test_priv);
1795
1796	test_priv.shader_draw.vs_type = VS_RECTPOSTEXFAST;
1797	amdgpu_draw_load_vs_shader(&test_priv);
1798
1799	cmd->size = 4096;
1800	cmd->heap = AMDGPU_GEM_DOMAIN_GTT;
1801	r = shader_test_bo_alloc(test_info->device_handle, cmd);
1802	CU_ASSERT_EQUAL(r, 0);
1803	ptr_cmd = cmd->ptr;
1804	memset(ptr_cmd, 0, cmd->size);
1805
1806	dst->size = 0x4000;
1807	dst->heap = AMDGPU_GEM_DOMAIN_VRAM;
1808	r = shader_test_bo_alloc(test_info->device_handle, dst);
1809	CU_ASSERT_EQUAL(r, 0);
1810
1811	if (test_info->version == AMDGPU_TEST_GFX_V11) {
1812		vtx_attributes_mem->size = 0x4040000;
1813		vtx_attributes_mem->heap = AMDGPU_GEM_DOMAIN_VRAM;
1814
1815		r = shader_test_bo_alloc(test_info->device_handle, vtx_attributes_mem);
1816		CU_ASSERT_EQUAL(r, 0);
1817	}
1818
1819	amdgpu_draw_init(&test_priv);
1820
1821	amdgpu_draw_setup_and_write_drawblt_surf_info(&test_priv);
1822
1823	amdgpu_draw_setup_and_write_drawblt_state(&test_priv);
1824
1825	amdgpu_draw_vs_RectPosTexFast_write2hw(&test_priv);
1826
1827	amdgpu_draw_ps_write2hw(&test_priv);
1828
1829	i = test_priv.cmd_curr;
1830	/* ps constant data */
1831	ptr_cmd[i++] = PACKET3(PACKET3_SET_SH_REG, 4);
1832	ptr_cmd[i++] = 0xc;
1833	ptr_cmd[i++] = 0x33333333;
1834	ptr_cmd[i++] = 0x33333333;
1835	ptr_cmd[i++] = 0x33333333;
1836	ptr_cmd[i++] = 0x33333333;
1837	test_priv.cmd_curr = i;
1838
1839	amdgpu_draw_draw(&test_priv);
1840
1841	i = test_priv.cmd_curr;
1842	while (i & 7)
1843		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
1844	test_priv.cmd_curr = i;
1845
1846	i = 0;
1847	resources[i++] = dst->bo;
1848	resources[i++] = ps_bo->bo;
1849	resources[i++] = vs_bo->bo;
1850	resources[i++] = cmd->bo;
1851	if (vtx_attributes_mem->size)
1852		resources[i++] = vtx_attributes_mem->bo;
1853	r = amdgpu_bo_list_create(test_info->device_handle, i, resources, NULL, &bo_list);
1854	CU_ASSERT_EQUAL(r, 0);
1855
1856	ib_info.ib_mc_address = cmd->mc_address;
1857	ib_info.size = test_priv.cmd_curr;
1858	ibs_request.ip_type = test_info->ip;
1859	ibs_request.ring = test_info->ring;
1860	ibs_request.resources = bo_list;
1861	ibs_request.number_of_ibs = 1;
1862	ibs_request.ibs = &ib_info;
1863	ibs_request.fence_info.handle = NULL;
1864
1865	/* submit CS */
1866	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
1867	CU_ASSERT_EQUAL(r, 0);
1868
1869	r = amdgpu_bo_list_destroy(bo_list);
1870	CU_ASSERT_EQUAL(r, 0);
1871
1872	fence_status.ip_type = test_info->ip;
1873	fence_status.ip_instance = 0;
1874	fence_status.ring = test_info->ring;
1875	fence_status.context = context_handle;
1876	fence_status.fence = ibs_request.seq_no;
1877
1878	/* wait for IB accomplished */
1879	r = amdgpu_cs_query_fence_status(&fence_status,
1880					 AMDGPU_TIMEOUT_INFINITE,
1881					 0, &expired);
1882	CU_ASSERT_EQUAL(r, 0);
1883	CU_ASSERT_EQUAL(expired, true);
1884
1885	/* verify if memset test result meets with expected */
1886	i = 0;
1887	ptr_dst = dst->ptr;
1888	memset(cptr, 0x33, 16);
1889	CU_ASSERT_EQUAL(memcmp(ptr_dst + i, cptr, 16), 0);
1890	i = dst->size - 16;
1891	CU_ASSERT_EQUAL(memcmp(ptr_dst + i, cptr, 16), 0);
1892	i = dst->size / 2;
1893	CU_ASSERT_EQUAL(memcmp(ptr_dst + i, cptr, 16), 0);
1894
1895	if (vtx_attributes_mem->size) {
1896		r = shader_test_bo_free(vtx_attributes_mem);
1897		CU_ASSERT_EQUAL(r, 0);
1898	}
1899
1900	r = shader_test_bo_free(dst);
1901	CU_ASSERT_EQUAL(r, 0);
1902
1903	r = shader_test_bo_free(cmd);
1904	CU_ASSERT_EQUAL(r, 0);
1905
1906	r = shader_test_bo_free(ps_bo);
1907	CU_ASSERT_EQUAL(r, 0);
1908
1909	r = shader_test_bo_free(vs_bo);
1910	CU_ASSERT_EQUAL(r, 0);
1911
1912	r = amdgpu_cs_ctx_free(context_handle);
1913	CU_ASSERT_EQUAL(r, 0);
1914}
1915
1916static void amdgpu_memcpy_draw_test(struct shader_test_info *test_info)
1917{
1918	struct shader_test_priv test_priv;
1919	amdgpu_context_handle context_handle;
1920	struct shader_test_bo *ps_bo = &(test_priv.shader_draw.ps_bo);
1921	struct shader_test_bo *vs_bo = &(test_priv.shader_draw.vs_bo);
1922	struct shader_test_bo *src = &(test_priv.src);
1923	struct shader_test_bo *dst = &(test_priv.dst);
1924	struct shader_test_bo *cmd = &(test_priv.cmd);
1925	struct shader_test_bo *vtx_attributes_mem = &(test_priv.vtx_attributes_mem);
1926	amdgpu_bo_handle resources[6];
1927	uint8_t *ptr_dst;
1928	uint8_t *ptr_src;
1929	uint32_t *ptr_cmd;
1930	int i, r;
1931	struct amdgpu_cs_request ibs_request = {0};
1932	struct amdgpu_cs_ib_info ib_info = {0};
1933	uint32_t hang_state, hangs;
1934	uint32_t expired;
1935	amdgpu_bo_list_handle bo_list;
1936	struct amdgpu_cs_fence fence_status = {0};
1937
1938	memset(&test_priv, 0, sizeof(test_priv));
1939	test_priv.info = test_info;
1940	test_priv.cmd.size = 4096;
1941	test_priv.cmd.heap = AMDGPU_GEM_DOMAIN_GTT;
1942
1943	ps_bo->heap = AMDGPU_GEM_DOMAIN_VRAM;
1944	test_priv.shader_draw.ps_type = PS_TEX;
1945	vs_bo->size = 4096;
1946	vs_bo->heap = AMDGPU_GEM_DOMAIN_VRAM;
1947	test_priv.shader_draw.vs_type = VS_RECTPOSTEXFAST;
1948	test_priv.src.heap = AMDGPU_GEM_DOMAIN_VRAM;
1949	test_priv.dst.heap = AMDGPU_GEM_DOMAIN_VRAM;
1950	if (test_info->hang_slow) {
1951		test_priv.shader_draw.ps_bo.size = 16*1024*1024;
1952		test_priv.src.size = 0x4000000;
1953		test_priv.dst.size = 0x4000000;
1954	} else {
1955		test_priv.shader_draw.ps_bo.size = 0x2000;
1956		test_priv.src.size = 0x4000;
1957		test_priv.dst.size = 0x4000;
1958	}
1959
1960	r = amdgpu_cs_ctx_create(test_info->device_handle, &context_handle);
1961	CU_ASSERT_EQUAL(r, 0);
1962
1963	r = shader_test_bo_alloc(test_info->device_handle, ps_bo);
1964	CU_ASSERT_EQUAL(r, 0);
1965	memset(ps_bo->ptr, 0, ps_bo->size);
1966
1967	r = shader_test_bo_alloc(test_info->device_handle, vs_bo);
1968	CU_ASSERT_EQUAL(r, 0);
1969	memset(vs_bo->ptr, 0, vs_bo->size);
1970
1971	amdgpu_draw_load_ps_shader(&test_priv);
1972	amdgpu_draw_load_vs_shader(&test_priv);
1973
1974	r = shader_test_bo_alloc(test_info->device_handle, cmd);
1975	CU_ASSERT_EQUAL(r, 0);
1976	ptr_cmd = cmd->ptr;
1977	memset(ptr_cmd, 0, cmd->size);
1978
1979	r = shader_test_bo_alloc(test_info->device_handle, src);
1980	CU_ASSERT_EQUAL(r, 0);
1981	ptr_src = src->ptr;
1982	memset(ptr_src, 0x55, src->size);
1983
1984	r = shader_test_bo_alloc(test_info->device_handle, dst);
1985	CU_ASSERT_EQUAL(r, 0);
1986
1987	if (test_info->version == AMDGPU_TEST_GFX_V11) {
1988		vtx_attributes_mem->size = 0x4040000;
1989		vtx_attributes_mem->heap = AMDGPU_GEM_DOMAIN_VRAM;
1990
1991		r = shader_test_bo_alloc(test_info->device_handle, vtx_attributes_mem);
1992		CU_ASSERT_EQUAL(r, 0);
1993	}
1994
1995	amdgpu_draw_init(&test_priv);
1996
1997	amdgpu_draw_setup_and_write_drawblt_surf_info(&test_priv);
1998
1999	amdgpu_draw_setup_and_write_drawblt_state(&test_priv);
2000
2001	amdgpu_draw_vs_RectPosTexFast_write2hw(&test_priv);
2002
2003	amdgpu_draw_ps_write2hw(&test_priv);
2004
2005	// write ps user constant data
2006	i = test_priv.cmd_curr;
2007	ptr_cmd[i++] = PACKET3(PACKET3_SET_SH_REG, 8);
2008	switch (test_info->version) {
2009	case AMDGPU_TEST_GFX_V9:
2010		ptr_cmd[i++] = 0xc;
2011		ptr_cmd[i++] = src->mc_address >> 8;
2012		ptr_cmd[i++] = src->mc_address >> 40 | 0x10e00000;
2013		ptr_cmd[i++] = test_info->hang_slow ? 0x1ffcfff : 0x7c01f;
2014		ptr_cmd[i++] = 0x90500fac;
2015		ptr_cmd[i++] = test_info->hang_slow ? 0x1ffe000 : 0x3e000;
2016		i += 3;
2017		break;
2018	case AMDGPU_TEST_GFX_V10:
2019		ptr_cmd[i++] = 0xc;
2020		ptr_cmd[i++] = src->mc_address >> 8;
2021		ptr_cmd[i++] = src->mc_address >> 40 | 0xc4b00000;
2022		ptr_cmd[i++] = test_info->hang_slow ? 0x81ffc1ff : 0x8007c007;
2023		ptr_cmd[i++] = 0x90500fac;
2024		i += 2;
2025		ptr_cmd[i++] = test_info->hang_slow ? 0 : 0x400;
2026		i++;
2027		break;
2028	case AMDGPU_TEST_GFX_V11:
2029		ptr_cmd[i++] = 0xc;
2030		ptr_cmd[i++] = src->mc_address >> 8;
2031		ptr_cmd[i++] = src->mc_address >> 40 | 0xc4b00000;
2032		ptr_cmd[i++] = test_info->hang_slow ? 0x1ffc1ff : 0x7c007;
2033		ptr_cmd[i++] = test_info->hang_slow ? 0x90a00fac : 0x90600fac;
2034		i += 2;
2035		ptr_cmd[i++] = 0x400;
2036		i++;
2037		break;
2038	case AMDGPU_TEST_GFX_MAX:
2039		assert(1 && "Not Support gfx, never go here");
2040		break;
2041	}
2042
2043	ptr_cmd[i++] = PACKET3(PACKET3_SET_SH_REG, 4);
2044	ptr_cmd[i++] = 0x14;
2045	ptr_cmd[i++] = 0x92;
2046	i += 3;
2047
2048	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2049	ptr_cmd[i++] = 0x191;
2050	ptr_cmd[i++] = 0;
2051	test_priv.cmd_curr = i;
2052
2053	amdgpu_draw_draw(&test_priv);
2054
2055	i = test_priv.cmd_curr;
2056	while (i & 7)
2057		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2058	test_priv.cmd_curr = i;
2059
2060	i = 0;
2061	resources[i++] = dst->bo;
2062	resources[i++] = src->bo;
2063	resources[i++] = ps_bo->bo;
2064	resources[i++] = vs_bo->bo;
2065	resources[i++] = cmd->bo;
2066	if (vtx_attributes_mem->size)
2067		resources[i++] = vtx_attributes_mem->bo;
2068	r = amdgpu_bo_list_create(test_info->device_handle, i, resources, NULL, &bo_list);
2069	CU_ASSERT_EQUAL(r, 0);
2070
2071	ib_info.ib_mc_address = cmd->mc_address;
2072	ib_info.size = test_priv.cmd_curr;
2073	ibs_request.ip_type = test_info->ip;
2074	ibs_request.ring = test_info->ring;
2075	ibs_request.resources = bo_list;
2076	ibs_request.number_of_ibs = 1;
2077	ibs_request.ibs = &ib_info;
2078	ibs_request.fence_info.handle = NULL;
2079	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2080	CU_ASSERT_EQUAL(r, 0);
2081
2082	fence_status.ip_type = test_info->ip;
2083	fence_status.ip_instance = 0;
2084	fence_status.ring = test_info->ring;
2085	fence_status.context = context_handle;
2086	fence_status.fence = ibs_request.seq_no;
2087
2088	/* wait for IB accomplished */
2089	r = amdgpu_cs_query_fence_status(&fence_status,
2090					 AMDGPU_TIMEOUT_INFINITE,
2091					 0, &expired);
2092	if (!test_info->hang) {
2093		CU_ASSERT_EQUAL(r, 0);
2094		CU_ASSERT_EQUAL(expired, true);
2095
2096		/* verify if memcpy test result meets with expected */
2097		i = 0;
2098		ptr_dst = dst->ptr;
2099		CU_ASSERT_EQUAL(memcmp(ptr_dst + i, ptr_src + i, 16), 0);
2100		i = dst->size - 16;
2101		CU_ASSERT_EQUAL(memcmp(ptr_dst + i, ptr_src + i, 16), 0);
2102		i = dst->size / 2;
2103		CU_ASSERT_EQUAL(memcmp(ptr_dst + i, ptr_src + i, 16), 0);
2104	} else {
2105		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2106		CU_ASSERT_EQUAL(r, 0);
2107		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2108	}
2109
2110	r = amdgpu_bo_list_destroy(bo_list);
2111	CU_ASSERT_EQUAL(r, 0);
2112
2113	if (vtx_attributes_mem->size) {
2114		r = shader_test_bo_free(vtx_attributes_mem);
2115		CU_ASSERT_EQUAL(r, 0);
2116	}
2117
2118	r = shader_test_bo_free(src);
2119	CU_ASSERT_EQUAL(r, 0);
2120
2121	r = shader_test_bo_free(dst);
2122	CU_ASSERT_EQUAL(r, 0);
2123
2124	r = shader_test_bo_free(cmd);
2125	CU_ASSERT_EQUAL(r, 0);
2126
2127	r = shader_test_bo_free(ps_bo);
2128	CU_ASSERT_EQUAL(r, 0);
2129
2130	r = shader_test_bo_free(vs_bo);
2131	CU_ASSERT_EQUAL(r, 0);
2132
2133	r = amdgpu_cs_ctx_free(context_handle);
2134	CU_ASSERT_EQUAL(r, 0);
2135}
2136
2137static void shader_test_draw_cb(struct shader_test_info *test_info)
2138{
2139	amdgpu_memset_draw_test(test_info);
2140	amdgpu_memcpy_draw_test(test_info);
2141}
2142
2143static void shader_test_draw_hang_cb(struct shader_test_info *test_info)
2144{
2145	test_info->hang = 0;
2146	amdgpu_memcpy_draw_test(test_info);
2147
2148	test_info->hang = 1;
2149	amdgpu_memcpy_draw_test(test_info);
2150
2151	test_info->hang = 0;
2152	amdgpu_memcpy_draw_test(test_info);
2153}
2154
2155static void shader_test_draw_hang_slow_cb(struct shader_test_info *test_info)
2156{
2157	test_info->hang = 0;
2158	test_info->hang_slow = 0;
2159	amdgpu_memcpy_draw_test(test_info);
2160
2161	test_info->hang = 1;
2162	test_info->hang_slow = 1;
2163	amdgpu_memcpy_draw_test(test_info);
2164
2165	test_info->hang = 0;
2166	test_info->hang_slow = 0;
2167	amdgpu_memcpy_draw_test(test_info);
2168}
2169
2170
2171void amdgpu_test_draw_helper(amdgpu_device_handle device_handle)
2172{
2173	shader_test_for_each(device_handle, AMDGPU_HW_IP_GFX, shader_test_draw_cb);
2174}
2175
2176void amdgpu_test_draw_hang_helper(amdgpu_device_handle device_handle)
2177{
2178	shader_test_for_each(device_handle, AMDGPU_HW_IP_GFX, shader_test_draw_hang_cb);
2179}
2180
2181void amdgpu_test_draw_hang_slow_helper(amdgpu_device_handle device_handle)
2182{
2183	shader_test_for_each(device_handle, AMDGPU_HW_IP_GFX, shader_test_draw_hang_slow_cb);
2184}
2185