1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#ifdef XF86DRM_MODE
32
33#include "xf86.h"
34
35#include "evergreen_shader.h"
36#include "evergreen_reg.h"
37
38/* solid vs --------------------------------------- */
39int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
40{
41    int i = 0;
42
43    /* 0 */
44    shader[i++] = CF_DWORD0(ADDR(4),
45			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
46    shader[i++] = CF_DWORD1(POP_COUNT(0),
47			    CF_CONST(0),
48			    COND(SQ_CF_COND_ACTIVE),
49			    I_COUNT(1),
50			    VALID_PIXEL_MODE(0),
51			    END_OF_PROGRAM(0),
52			    CF_INST(SQ_CF_INST_VC),
53			    WHOLE_QUAD_MODE(0),
54			    BARRIER(1));
55    /* 1 */
56    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
57					  TYPE(SQ_EXPORT_POS),
58					  RW_GPR(1),
59					  RW_REL(ABSOLUTE),
60					  INDEX_GPR(0),
61					  ELEM_SIZE(0));
62    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
63					       SRC_SEL_Y(SQ_SEL_Y),
64					       SRC_SEL_Z(SQ_SEL_Z),
65					       SRC_SEL_W(SQ_SEL_W),
66					       BURST_COUNT(1),
67					       VALID_PIXEL_MODE(0),
68					       END_OF_PROGRAM(0),
69					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70					       MARK(0),
71					       BARRIER(1));
72    /* 2 - always export a param whether it's used or not */
73    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74					  TYPE(SQ_EXPORT_PARAM),
75					  RW_GPR(0),
76					  RW_REL(ABSOLUTE),
77					  INDEX_GPR(0),
78					  ELEM_SIZE(0));
79    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80					       SRC_SEL_Y(SQ_SEL_Y),
81					       SRC_SEL_Z(SQ_SEL_Z),
82					       SRC_SEL_W(SQ_SEL_W),
83					       BURST_COUNT(0),
84					       VALID_PIXEL_MODE(0),
85					       END_OF_PROGRAM(1),
86					       CF_INST(SQ_CF_INST_EXPORT_DONE),
87					       MARK(0),
88					       BARRIER(0));
89    /* 3 - padding */
90    shader[i++] = 0x00000000;
91    shader[i++] = 0x00000000;
92    /* 4/5 */
93    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
94			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
95			     FETCH_WHOLE_QUAD(0),
96			     BUFFER_ID(0),
97			     SRC_GPR(0),
98			     SRC_REL(ABSOLUTE),
99			     SRC_SEL_X(SQ_SEL_X),
100			     MEGA_FETCH_COUNT(8));
101    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
102				 DST_REL(0),
103				 DST_SEL_X(SQ_SEL_X),
104				 DST_SEL_Y(SQ_SEL_Y),
105				 DST_SEL_Z(SQ_SEL_0),
106				 DST_SEL_W(SQ_SEL_1),
107				 USE_CONST_FIELDS(0),
108				 DATA_FORMAT(FMT_32_32_FLOAT),
109				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
110				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
111				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
112    shader[i++] = VTX_DWORD2(OFFSET(0),
113#if X_BYTE_ORDER == X_BIG_ENDIAN
114			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
115#else
116			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
117#endif
118			     CONST_BUF_NO_STRIDE(0),
119			     MEGA_FETCH(1),
120			     ALT_CONST(0),
121			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
122    shader[i++] = VTX_DWORD_PAD;
123
124    return i;
125}
126
127/* solid ps --------------------------------------- */
128int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
129{
130    int i = 0;
131
132    /* 0 */
133    shader[i++] = CF_ALU_DWORD0(ADDR(2),
134				KCACHE_BANK0(0),
135				KCACHE_BANK1(0),
136				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
137    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
138				KCACHE_ADDR0(0),
139				KCACHE_ADDR1(0),
140				I_COUNT(4),
141				ALT_CONST(0),
142				CF_INST(SQ_CF_INST_ALU),
143				WHOLE_QUAD_MODE(0),
144				BARRIER(1));
145    /* 1 */
146    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
147					  TYPE(SQ_EXPORT_PIXEL),
148					  RW_GPR(0),
149					  RW_REL(ABSOLUTE),
150					  INDEX_GPR(0),
151					  ELEM_SIZE(1));
152    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
153					       SRC_SEL_Y(SQ_SEL_Y),
154					       SRC_SEL_Z(SQ_SEL_Z),
155					       SRC_SEL_W(SQ_SEL_W),
156					       BURST_COUNT(1),
157					       VALID_PIXEL_MODE(0),
158					       END_OF_PROGRAM(1),
159					       CF_INST(SQ_CF_INST_EXPORT_DONE),
160					       MARK(0),
161					       BARRIER(1));
162
163    /* 2 */
164    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
165			     SRC0_REL(ABSOLUTE),
166			     SRC0_ELEM(ELEM_X),
167			     SRC0_NEG(0),
168			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
169			     SRC1_REL(ABSOLUTE),
170			     SRC1_ELEM(ELEM_X),
171			     SRC1_NEG(0),
172			     INDEX_MODE(SQ_INDEX_AR_X),
173			     PRED_SEL(SQ_PRED_SEL_OFF),
174			     LAST(0));
175    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
176				 SRC1_ABS(0),
177				 UPDATE_EXECUTE_MASK(0),
178				 UPDATE_PRED(0),
179				 WRITE_MASK(1),
180				 OMOD(SQ_ALU_OMOD_OFF),
181				 ALU_INST(SQ_OP2_INST_MOV),
182				 BANK_SWIZZLE(SQ_ALU_VEC_012),
183				 DST_GPR(0),
184				 DST_REL(ABSOLUTE),
185				 DST_ELEM(ELEM_X),
186				 CLAMP(1));
187    /* 3 */
188    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
189			     SRC0_REL(ABSOLUTE),
190			     SRC0_ELEM(ELEM_Y),
191			     SRC0_NEG(0),
192			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
193			     SRC1_REL(ABSOLUTE),
194			     SRC1_ELEM(ELEM_Y),
195			     SRC1_NEG(0),
196			     INDEX_MODE(SQ_INDEX_AR_X),
197			     PRED_SEL(SQ_PRED_SEL_OFF),
198			     LAST(0));
199    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
200				 SRC1_ABS(0),
201				 UPDATE_EXECUTE_MASK(0),
202				 UPDATE_PRED(0),
203				 WRITE_MASK(1),
204				 OMOD(SQ_ALU_OMOD_OFF),
205				 ALU_INST(SQ_OP2_INST_MOV),
206				 BANK_SWIZZLE(SQ_ALU_VEC_012),
207				 DST_GPR(0),
208				 DST_REL(ABSOLUTE),
209				 DST_ELEM(ELEM_Y),
210				 CLAMP(1));
211    /* 4 */
212    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
213			     SRC0_REL(ABSOLUTE),
214			     SRC0_ELEM(ELEM_Z),
215			     SRC0_NEG(0),
216			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
217			     SRC1_REL(ABSOLUTE),
218			     SRC1_ELEM(ELEM_Z),
219			     SRC1_NEG(0),
220			     INDEX_MODE(SQ_INDEX_AR_X),
221			     PRED_SEL(SQ_PRED_SEL_OFF),
222			     LAST(0));
223    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
224				 SRC1_ABS(0),
225				 UPDATE_EXECUTE_MASK(0),
226				 UPDATE_PRED(0),
227				 WRITE_MASK(1),
228				 OMOD(SQ_ALU_OMOD_OFF),
229				 ALU_INST(SQ_OP2_INST_MOV),
230				 BANK_SWIZZLE(SQ_ALU_VEC_012),
231				 DST_GPR(0),
232				 DST_REL(ABSOLUTE),
233				 DST_ELEM(ELEM_Z),
234				 CLAMP(1));
235    /* 5 */
236    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
237			     SRC0_REL(ABSOLUTE),
238			     SRC0_ELEM(ELEM_W),
239			     SRC0_NEG(0),
240			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
241			     SRC1_REL(ABSOLUTE),
242			     SRC1_ELEM(ELEM_W),
243			     SRC1_NEG(0),
244			     INDEX_MODE(SQ_INDEX_AR_X),
245			     PRED_SEL(SQ_PRED_SEL_OFF),
246			     LAST(1));
247    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
248				 SRC1_ABS(0),
249				 UPDATE_EXECUTE_MASK(0),
250				 UPDATE_PRED(0),
251				 WRITE_MASK(1),
252				 OMOD(SQ_ALU_OMOD_OFF),
253				 ALU_INST(SQ_OP2_INST_MOV),
254				 BANK_SWIZZLE(SQ_ALU_VEC_012),
255				 DST_GPR(0),
256				 DST_REL(ABSOLUTE),
257				 DST_ELEM(ELEM_W),
258				 CLAMP(1));
259
260    return i;
261}
262
263/* copy vs --------------------------------------- */
264int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
265{
266    int i = 0;
267
268    /* 0 */
269    shader[i++] = CF_DWORD0(ADDR(4),
270			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
271    shader[i++] = CF_DWORD1(POP_COUNT(0),
272			    CF_CONST(0),
273			    COND(SQ_CF_COND_ACTIVE),
274			    I_COUNT(2),
275			    VALID_PIXEL_MODE(0),
276			    END_OF_PROGRAM(0),
277			    CF_INST(SQ_CF_INST_VC),
278			    WHOLE_QUAD_MODE(0),
279			    BARRIER(1));
280    /* 1 */
281    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
282					  TYPE(SQ_EXPORT_POS),
283					  RW_GPR(1),
284					  RW_REL(ABSOLUTE),
285					  INDEX_GPR(0),
286					  ELEM_SIZE(0));
287    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
288					       SRC_SEL_Y(SQ_SEL_Y),
289					       SRC_SEL_Z(SQ_SEL_Z),
290					       SRC_SEL_W(SQ_SEL_W),
291					       BURST_COUNT(0),
292					       VALID_PIXEL_MODE(0),
293					       END_OF_PROGRAM(0),
294					       CF_INST(SQ_CF_INST_EXPORT_DONE),
295					       MARK(0),
296					       BARRIER(1));
297    /* 2 */
298    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
299					  TYPE(SQ_EXPORT_PARAM),
300					  RW_GPR(0),
301					  RW_REL(ABSOLUTE),
302					  INDEX_GPR(0),
303					  ELEM_SIZE(0));
304    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
305					       SRC_SEL_Y(SQ_SEL_Y),
306					       SRC_SEL_Z(SQ_SEL_Z),
307					       SRC_SEL_W(SQ_SEL_W),
308					       BURST_COUNT(0),
309					       VALID_PIXEL_MODE(0),
310					       END_OF_PROGRAM(1),
311					       CF_INST(SQ_CF_INST_EXPORT_DONE),
312					       MARK(0),
313					       BARRIER(0));
314    /* 3 */
315    shader[i++] = 0x00000000;
316    shader[i++] = 0x00000000;
317    /* 4/5 */
318    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
319			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
320			     FETCH_WHOLE_QUAD(0),
321			     BUFFER_ID(0),
322			     SRC_GPR(0),
323			     SRC_REL(ABSOLUTE),
324			     SRC_SEL_X(SQ_SEL_X),
325			     MEGA_FETCH_COUNT(16));
326    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
327				 DST_REL(0),
328				 DST_SEL_X(SQ_SEL_X),
329				 DST_SEL_Y(SQ_SEL_Y),
330				 DST_SEL_Z(SQ_SEL_0),
331				 DST_SEL_W(SQ_SEL_1),
332				 USE_CONST_FIELDS(0),
333				 DATA_FORMAT(FMT_32_32_FLOAT),
334				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
335				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
336				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
337    shader[i++] = VTX_DWORD2(OFFSET(0),
338#if X_BYTE_ORDER == X_BIG_ENDIAN
339                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
340#else
341                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
342#endif
343			     CONST_BUF_NO_STRIDE(0),
344			     MEGA_FETCH(1),
345			     ALT_CONST(0),
346			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
347    shader[i++] = VTX_DWORD_PAD;
348    /* 6/7 */
349    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
350			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
351			     FETCH_WHOLE_QUAD(0),
352			     BUFFER_ID(0),
353			     SRC_GPR(0),
354			     SRC_REL(ABSOLUTE),
355			     SRC_SEL_X(SQ_SEL_X),
356			     MEGA_FETCH_COUNT(8));
357    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
358				 DST_REL(0),
359				 DST_SEL_X(SQ_SEL_X),
360				 DST_SEL_Y(SQ_SEL_Y),
361				 DST_SEL_Z(SQ_SEL_0),
362				 DST_SEL_W(SQ_SEL_1),
363				 USE_CONST_FIELDS(0),
364				 DATA_FORMAT(FMT_32_32_FLOAT),
365				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
366				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
367				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
368    shader[i++] = VTX_DWORD2(OFFSET(8),
369#if X_BYTE_ORDER == X_BIG_ENDIAN
370                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
371#else
372                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
373#endif
374			     CONST_BUF_NO_STRIDE(0),
375			     MEGA_FETCH(0),
376			     ALT_CONST(0),
377			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
378    shader[i++] = VTX_DWORD_PAD;
379
380    return i;
381}
382
383/* copy ps --------------------------------------- */
384int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
385{
386    int i = 0;
387
388    /* CF INST 0 */
389    shader[i++] = CF_ALU_DWORD0(ADDR(3),
390				KCACHE_BANK0(0),
391				KCACHE_BANK1(0),
392				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
393    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
394				KCACHE_ADDR0(0),
395				KCACHE_ADDR1(0),
396				I_COUNT(4),
397				ALT_CONST(0),
398				CF_INST(SQ_CF_INST_ALU),
399				WHOLE_QUAD_MODE(0),
400				BARRIER(1));
401    /* CF INST 1 */
402    shader[i++] = CF_DWORD0(ADDR(8),
403			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
404    shader[i++] = CF_DWORD1(POP_COUNT(0),
405			    CF_CONST(0),
406			    COND(SQ_CF_COND_ACTIVE),
407			    I_COUNT(1),
408			    VALID_PIXEL_MODE(0),
409			    END_OF_PROGRAM(0),
410			    CF_INST(SQ_CF_INST_TC),
411			    WHOLE_QUAD_MODE(0),
412			    BARRIER(1));
413    /* CF INST 2 */
414    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
415					  TYPE(SQ_EXPORT_PIXEL),
416					  RW_GPR(0),
417					  RW_REL(ABSOLUTE),
418					  INDEX_GPR(0),
419					  ELEM_SIZE(1));
420    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
421					       SRC_SEL_Y(SQ_SEL_Y),
422					       SRC_SEL_Z(SQ_SEL_Z),
423					       SRC_SEL_W(SQ_SEL_W),
424					       BURST_COUNT(1),
425					       VALID_PIXEL_MODE(0),
426					       END_OF_PROGRAM(1),
427					       CF_INST(SQ_CF_INST_EXPORT_DONE),
428					       MARK(0),
429					       BARRIER(1));
430
431    /* 3 interpolate tex coords */
432    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
433			     SRC0_REL(ABSOLUTE),
434			     SRC0_ELEM(ELEM_Y),
435			     SRC0_NEG(0),
436			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
437			     SRC1_REL(ABSOLUTE),
438			     SRC1_ELEM(ELEM_X),
439			     SRC1_NEG(0),
440			     INDEX_MODE(SQ_INDEX_AR_X),
441			     PRED_SEL(SQ_PRED_SEL_OFF),
442			     LAST(0));
443    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
444				 SRC1_ABS(0),
445				 UPDATE_EXECUTE_MASK(0),
446				 UPDATE_PRED(0),
447				 WRITE_MASK(1),
448				 OMOD(SQ_ALU_OMOD_OFF),
449				 ALU_INST(SQ_OP2_INST_INTERP_XY),
450				 BANK_SWIZZLE(SQ_ALU_VEC_210),
451				 DST_GPR(0),
452				 DST_REL(ABSOLUTE),
453				 DST_ELEM(ELEM_X),
454				 CLAMP(0));
455    /* 4 */
456    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
457			     SRC0_REL(ABSOLUTE),
458			     SRC0_ELEM(ELEM_X),
459			     SRC0_NEG(0),
460			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
461			     SRC1_REL(ABSOLUTE),
462			     SRC1_ELEM(ELEM_X),
463			     SRC1_NEG(0),
464			     INDEX_MODE(SQ_INDEX_AR_X),
465			     PRED_SEL(SQ_PRED_SEL_OFF),
466			     LAST(0));
467    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
468				 SRC1_ABS(0),
469				 UPDATE_EXECUTE_MASK(0),
470				 UPDATE_PRED(0),
471				 WRITE_MASK(1),
472				 OMOD(SQ_ALU_OMOD_OFF),
473				 ALU_INST(SQ_OP2_INST_INTERP_XY),
474				 BANK_SWIZZLE(SQ_ALU_VEC_210),
475				 DST_GPR(0),
476				 DST_REL(ABSOLUTE),
477				 DST_ELEM(ELEM_Y),
478				 CLAMP(0));
479    /* 5 */
480    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
481			     SRC0_REL(ABSOLUTE),
482			     SRC0_ELEM(ELEM_Y),
483			     SRC0_NEG(0),
484			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
485			     SRC1_REL(ABSOLUTE),
486			     SRC1_ELEM(ELEM_X),
487			     SRC1_NEG(0),
488			     INDEX_MODE(SQ_INDEX_AR_X),
489			     PRED_SEL(SQ_PRED_SEL_OFF),
490			     LAST(0));
491    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
492				 SRC1_ABS(0),
493				 UPDATE_EXECUTE_MASK(0),
494				 UPDATE_PRED(0),
495				 WRITE_MASK(0),
496				 OMOD(SQ_ALU_OMOD_OFF),
497				 ALU_INST(SQ_OP2_INST_INTERP_XY),
498				 BANK_SWIZZLE(SQ_ALU_VEC_210),
499				 DST_GPR(0),
500				 DST_REL(ABSOLUTE),
501				 DST_ELEM(ELEM_Z),
502				 CLAMP(0));
503    /* 6 */
504    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
505			     SRC0_REL(ABSOLUTE),
506			     SRC0_ELEM(ELEM_X),
507			     SRC0_NEG(0),
508			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
509			     SRC1_REL(ABSOLUTE),
510			     SRC1_ELEM(ELEM_X),
511			     SRC1_NEG(0),
512			     INDEX_MODE(SQ_INDEX_AR_X),
513			     PRED_SEL(SQ_PRED_SEL_OFF),
514			     LAST(1));
515    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
516				 SRC1_ABS(0),
517				 UPDATE_EXECUTE_MASK(0),
518				 UPDATE_PRED(0),
519				 WRITE_MASK(0),
520				 OMOD(SQ_ALU_OMOD_OFF),
521				 ALU_INST(SQ_OP2_INST_INTERP_XY),
522				 BANK_SWIZZLE(SQ_ALU_VEC_210),
523				 DST_GPR(0),
524				 DST_REL(ABSOLUTE),
525				 DST_ELEM(ELEM_W),
526				 CLAMP(0));
527
528    /* 7 */
529    shader[i++] = 0x00000000;
530    shader[i++] = 0x00000000;
531
532    /* 8/9 TEX INST 0 */
533    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
534			     INST_MOD(0),
535			     FETCH_WHOLE_QUAD(0),
536			     RESOURCE_ID(0),
537			     SRC_GPR(0),
538			     SRC_REL(ABSOLUTE),
539			     ALT_CONST(0),
540			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
541			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
542    shader[i++] = TEX_DWORD1(DST_GPR(0),
543			     DST_REL(ABSOLUTE),
544			     DST_SEL_X(SQ_SEL_X), /* R */
545			     DST_SEL_Y(SQ_SEL_Y), /* G */
546			     DST_SEL_Z(SQ_SEL_Z), /* B */
547			     DST_SEL_W(SQ_SEL_W), /* A */
548			     LOD_BIAS(0),
549			     COORD_TYPE_X(TEX_UNNORMALIZED),
550			     COORD_TYPE_Y(TEX_UNNORMALIZED),
551			     COORD_TYPE_Z(TEX_UNNORMALIZED),
552			     COORD_TYPE_W(TEX_UNNORMALIZED));
553    shader[i++] = TEX_DWORD2(OFFSET_X(0),
554			     OFFSET_Y(0),
555			     OFFSET_Z(0),
556			     SAMPLER_ID(0),
557			     SRC_SEL_X(SQ_SEL_X),
558			     SRC_SEL_Y(SQ_SEL_Y),
559			     SRC_SEL_Z(SQ_SEL_0),
560			     SRC_SEL_W(SQ_SEL_1));
561    shader[i++] = TEX_DWORD_PAD;
562
563    return i;
564}
565
566int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
567{
568    int i = 0;
569
570    /* 0 */
571    shader[i++] = CF_DWORD0(ADDR(6),
572			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
573    shader[i++] = CF_DWORD1(POP_COUNT(0),
574                            CF_CONST(0),
575                            COND(SQ_CF_COND_ACTIVE),
576                            I_COUNT(2),
577                            VALID_PIXEL_MODE(0),
578                            END_OF_PROGRAM(0),
579                            CF_INST(SQ_CF_INST_VC),
580                            WHOLE_QUAD_MODE(0),
581                            BARRIER(1));
582
583    /* 1 - ALU */
584    shader[i++] = CF_ALU_DWORD0(ADDR(4),
585				KCACHE_BANK0(0),
586				KCACHE_BANK1(0),
587				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
588    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
589				KCACHE_ADDR0(0),
590				KCACHE_ADDR1(0),
591				I_COUNT(2),
592				ALT_CONST(0),
593				CF_INST(SQ_CF_INST_ALU),
594				WHOLE_QUAD_MODE(0),
595				BARRIER(1));
596
597    /* 2 */
598    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
599                                          TYPE(SQ_EXPORT_POS),
600                                          RW_GPR(1),
601                                          RW_REL(ABSOLUTE),
602                                          INDEX_GPR(0),
603                                          ELEM_SIZE(3));
604    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
605                                               SRC_SEL_Y(SQ_SEL_Y),
606                                               SRC_SEL_Z(SQ_SEL_Z),
607                                               SRC_SEL_W(SQ_SEL_W),
608                                               BURST_COUNT(1),
609                                               VALID_PIXEL_MODE(0),
610                                               END_OF_PROGRAM(0),
611                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
612                                               MARK(0),
613                                               BARRIER(1));
614    /* 3 */
615    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
616                                          TYPE(SQ_EXPORT_PARAM),
617                                          RW_GPR(0),
618                                          RW_REL(ABSOLUTE),
619                                          INDEX_GPR(0),
620                                          ELEM_SIZE(3));
621    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
622                                               SRC_SEL_Y(SQ_SEL_Y),
623                                               SRC_SEL_Z(SQ_SEL_Z),
624                                               SRC_SEL_W(SQ_SEL_W),
625                                               BURST_COUNT(1),
626                                               VALID_PIXEL_MODE(0),
627                                               END_OF_PROGRAM(1),
628                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
629                                               MARK(0),
630                                               BARRIER(0));
631
632
633    /* 4 texX / w */
634    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
635                             SRC0_REL(ABSOLUTE),
636                             SRC0_ELEM(ELEM_X),
637                             SRC0_NEG(0),
638                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
639                             SRC1_REL(ABSOLUTE),
640                             SRC1_ELEM(ELEM_X),
641                             SRC1_NEG(0),
642                             INDEX_MODE(SQ_INDEX_AR_X),
643                             PRED_SEL(SQ_PRED_SEL_OFF),
644                             LAST(0));
645    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
646                                 SRC1_ABS(0),
647                                 UPDATE_EXECUTE_MASK(0),
648                                 UPDATE_PRED(0),
649                                 WRITE_MASK(1),
650                                 OMOD(SQ_ALU_OMOD_OFF),
651                                 ALU_INST(SQ_OP2_INST_MUL),
652                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
653                                 DST_GPR(0),
654                                 DST_REL(ABSOLUTE),
655                                 DST_ELEM(ELEM_X),
656                                 CLAMP(0));
657
658    /* 5 texY / h */
659    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
660                             SRC0_REL(ABSOLUTE),
661                             SRC0_ELEM(ELEM_Y),
662                             SRC0_NEG(0),
663                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
664                             SRC1_REL(ABSOLUTE),
665                             SRC1_ELEM(ELEM_Y),
666                             SRC1_NEG(0),
667                             INDEX_MODE(SQ_INDEX_AR_X),
668                             PRED_SEL(SQ_PRED_SEL_OFF),
669                             LAST(1));
670    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
671                                 SRC1_ABS(0),
672                                 UPDATE_EXECUTE_MASK(0),
673                                 UPDATE_PRED(0),
674                                 WRITE_MASK(1),
675                                 OMOD(SQ_ALU_OMOD_OFF),
676                                 ALU_INST(SQ_OP2_INST_MUL),
677                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
678                                 DST_GPR(0),
679                                 DST_REL(ABSOLUTE),
680                                 DST_ELEM(ELEM_Y),
681                                 CLAMP(0));
682
683    /* 6/7 */
684    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
685                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
686                             FETCH_WHOLE_QUAD(0),
687                             BUFFER_ID(0),
688                             SRC_GPR(0),
689                             SRC_REL(ABSOLUTE),
690                             SRC_SEL_X(SQ_SEL_X),
691                             MEGA_FETCH_COUNT(16));
692    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
693                                 DST_REL(ABSOLUTE),
694                                 DST_SEL_X(SQ_SEL_X),
695                                 DST_SEL_Y(SQ_SEL_Y),
696                                 DST_SEL_Z(SQ_SEL_0),
697                                 DST_SEL_W(SQ_SEL_1),
698                                 USE_CONST_FIELDS(0),
699                                 DATA_FORMAT(FMT_32_32_FLOAT),
700                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
701                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
702                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
703    shader[i++] = VTX_DWORD2(OFFSET(0),
704#if X_BYTE_ORDER == X_BIG_ENDIAN
705                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
706#else
707                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
708#endif
709                             CONST_BUF_NO_STRIDE(0),
710                             MEGA_FETCH(1),
711			     ALT_CONST(0),
712			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
713    shader[i++] = VTX_DWORD_PAD;
714    /* 8/9 */
715    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
716                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
717                             FETCH_WHOLE_QUAD(0),
718                             BUFFER_ID(0),
719                             SRC_GPR(0),
720                             SRC_REL(ABSOLUTE),
721                             SRC_SEL_X(SQ_SEL_X),
722                             MEGA_FETCH_COUNT(8));
723    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
724                                 DST_REL(ABSOLUTE),
725                                 DST_SEL_X(SQ_SEL_X),
726                                 DST_SEL_Y(SQ_SEL_Y),
727                                 DST_SEL_Z(SQ_SEL_0),
728                                 DST_SEL_W(SQ_SEL_1),
729                                 USE_CONST_FIELDS(0),
730                                 DATA_FORMAT(FMT_32_32_FLOAT),
731                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
732                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
733                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
734    shader[i++] = VTX_DWORD2(OFFSET(8),
735#if X_BYTE_ORDER == X_BIG_ENDIAN
736                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
737#else
738                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
739#endif
740                             CONST_BUF_NO_STRIDE(0),
741                             MEGA_FETCH(0),
742			     ALT_CONST(0),
743			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
744    shader[i++] = VTX_DWORD_PAD;
745
746    return i;
747}
748
749int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
750{
751    int i = 0;
752
753    /* 0 */
754    shader[i++] = CF_ALU_DWORD0(ADDR(5),
755				KCACHE_BANK0(0),
756				KCACHE_BANK1(0),
757				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
758    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
759				KCACHE_ADDR0(0),
760				KCACHE_ADDR1(0),
761				I_COUNT(4),
762				ALT_CONST(0),
763				CF_INST(SQ_CF_INST_ALU),
764				WHOLE_QUAD_MODE(0),
765				BARRIER(1));
766    /* 1 */
767    shader[i++] = CF_DWORD0(ADDR(21),
768			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
769    shader[i++] = CF_DWORD1(POP_COUNT(0),
770                            CF_CONST(0),
771                            COND(SQ_CF_COND_BOOL),
772                            I_COUNT(0),
773                            VALID_PIXEL_MODE(0),
774                            END_OF_PROGRAM(0),
775                            CF_INST(SQ_CF_INST_CALL),
776                            WHOLE_QUAD_MODE(0),
777                            BARRIER(0));
778    /* 2 */
779    shader[i++] = CF_DWORD0(ADDR(30),
780			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
781    shader[i++] = CF_DWORD1(POP_COUNT(0),
782                            CF_CONST(0),
783                            COND(SQ_CF_COND_NOT_BOOL),
784                            I_COUNT(0),
785                            VALID_PIXEL_MODE(0),
786                            END_OF_PROGRAM(0),
787                            CF_INST(SQ_CF_INST_CALL),
788                            WHOLE_QUAD_MODE(0),
789                            BARRIER(0));
790    /* 3 */
791    shader[i++] = CF_ALU_DWORD0(ADDR(9),
792                                KCACHE_BANK0(0),
793                                KCACHE_BANK1(0),
794                                KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
795    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
796                                KCACHE_ADDR0(0),
797                                KCACHE_ADDR1(0),
798                                I_COUNT(12),
799                                ALT_CONST(0),
800                                CF_INST(SQ_CF_INST_ALU),
801                                WHOLE_QUAD_MODE(0),
802                                BARRIER(1));
803    /* 4 */
804    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
805                                          TYPE(SQ_EXPORT_PIXEL),
806                                          RW_GPR(2),
807                                          RW_REL(ABSOLUTE),
808                                          INDEX_GPR(0),
809                                          ELEM_SIZE(3));
810    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
811                                               SRC_SEL_Y(SQ_SEL_Y),
812                                               SRC_SEL_Z(SQ_SEL_Z),
813                                               SRC_SEL_W(SQ_SEL_W),
814                                               BURST_COUNT(1),
815                                               VALID_PIXEL_MODE(0),
816                                               END_OF_PROGRAM(1),
817                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
818                                               MARK(0),
819                                               BARRIER(1));
820    /* 5 interpolate tex coords */
821    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
822			     SRC0_REL(ABSOLUTE),
823			     SRC0_ELEM(ELEM_Y),
824			     SRC0_NEG(0),
825			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
826			     SRC1_REL(ABSOLUTE),
827			     SRC1_ELEM(ELEM_X),
828			     SRC1_NEG(0),
829			     INDEX_MODE(SQ_INDEX_AR_X),
830			     PRED_SEL(SQ_PRED_SEL_OFF),
831			     LAST(0));
832    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
833				 SRC1_ABS(0),
834				 UPDATE_EXECUTE_MASK(0),
835				 UPDATE_PRED(0),
836				 WRITE_MASK(1),
837				 OMOD(SQ_ALU_OMOD_OFF),
838				 ALU_INST(SQ_OP2_INST_INTERP_XY),
839				 BANK_SWIZZLE(SQ_ALU_VEC_210),
840				 DST_GPR(0),
841				 DST_REL(ABSOLUTE),
842				 DST_ELEM(ELEM_X),
843				 CLAMP(0));
844    /* 6 */
845    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
846			     SRC0_REL(ABSOLUTE),
847			     SRC0_ELEM(ELEM_X),
848			     SRC0_NEG(0),
849			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
850			     SRC1_REL(ABSOLUTE),
851			     SRC1_ELEM(ELEM_X),
852			     SRC1_NEG(0),
853			     INDEX_MODE(SQ_INDEX_AR_X),
854			     PRED_SEL(SQ_PRED_SEL_OFF),
855			     LAST(0));
856    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
857				 SRC1_ABS(0),
858				 UPDATE_EXECUTE_MASK(0),
859				 UPDATE_PRED(0),
860				 WRITE_MASK(1),
861				 OMOD(SQ_ALU_OMOD_OFF),
862				 ALU_INST(SQ_OP2_INST_INTERP_XY),
863				 BANK_SWIZZLE(SQ_ALU_VEC_210),
864				 DST_GPR(0),
865				 DST_REL(ABSOLUTE),
866				 DST_ELEM(ELEM_Y),
867				 CLAMP(0));
868    /* 7 */
869    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
870			     SRC0_REL(ABSOLUTE),
871			     SRC0_ELEM(ELEM_Y),
872			     SRC0_NEG(0),
873			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
874			     SRC1_REL(ABSOLUTE),
875			     SRC1_ELEM(ELEM_X),
876			     SRC1_NEG(0),
877			     INDEX_MODE(SQ_INDEX_AR_X),
878			     PRED_SEL(SQ_PRED_SEL_OFF),
879			     LAST(0));
880    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
881				 SRC1_ABS(0),
882				 UPDATE_EXECUTE_MASK(0),
883				 UPDATE_PRED(0),
884				 WRITE_MASK(0),
885				 OMOD(SQ_ALU_OMOD_OFF),
886				 ALU_INST(SQ_OP2_INST_INTERP_XY),
887				 BANK_SWIZZLE(SQ_ALU_VEC_210),
888				 DST_GPR(0),
889				 DST_REL(ABSOLUTE),
890				 DST_ELEM(ELEM_Z),
891				 CLAMP(0));
892    /* 8 */
893    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
894			     SRC0_REL(ABSOLUTE),
895			     SRC0_ELEM(ELEM_X),
896			     SRC0_NEG(0),
897			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
898			     SRC1_REL(ABSOLUTE),
899			     SRC1_ELEM(ELEM_X),
900			     SRC1_NEG(0),
901			     INDEX_MODE(SQ_INDEX_AR_X),
902			     PRED_SEL(SQ_PRED_SEL_OFF),
903			     LAST(1));
904    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
905				 SRC1_ABS(0),
906				 UPDATE_EXECUTE_MASK(0),
907				 UPDATE_PRED(0),
908				 WRITE_MASK(0),
909				 OMOD(SQ_ALU_OMOD_OFF),
910				 ALU_INST(SQ_OP2_INST_INTERP_XY),
911				 BANK_SWIZZLE(SQ_ALU_VEC_210),
912				 DST_GPR(0),
913				 DST_REL(ABSOLUTE),
914				 DST_ELEM(ELEM_W),
915				 CLAMP(0));
916
917    /* 9,10,11,12 */
918    /* r2.x = MAD(c0.w, r1.x, c0.x) */
919    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
920                             SRC0_REL(ABSOLUTE),
921                             SRC0_ELEM(ELEM_W),
922                             SRC0_NEG(0),
923                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
924                             SRC1_REL(ABSOLUTE),
925                             SRC1_ELEM(ELEM_X),
926                             SRC1_NEG(0),
927                             INDEX_MODE(SQ_INDEX_LOOP),
928                             PRED_SEL(SQ_PRED_SEL_OFF),
929                             LAST(0));
930    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
931                                 SRC2_REL(ABSOLUTE),
932                                 SRC2_ELEM(ELEM_X),
933                                 SRC2_NEG(0),
934                                 ALU_INST(SQ_OP3_INST_MULADD),
935                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
936                                 DST_GPR(2),
937                                 DST_REL(ABSOLUTE),
938                                 DST_ELEM(ELEM_X),
939                                 CLAMP(0));
940    /* r2.y = MAD(c0.w, r1.x, c0.y) */
941    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
942                             SRC0_REL(ABSOLUTE),
943                             SRC0_ELEM(ELEM_W),
944                             SRC0_NEG(0),
945                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
946                             SRC1_REL(ABSOLUTE),
947                             SRC1_ELEM(ELEM_X),
948                             SRC1_NEG(0),
949                             INDEX_MODE(SQ_INDEX_LOOP),
950                             PRED_SEL(SQ_PRED_SEL_OFF),
951                             LAST(0));
952    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
953                                 SRC2_REL(ABSOLUTE),
954                                 SRC2_ELEM(ELEM_Y),
955                                 SRC2_NEG(0),
956                                 ALU_INST(SQ_OP3_INST_MULADD),
957                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
958                                 DST_GPR(2),
959                                 DST_REL(ABSOLUTE),
960                                 DST_ELEM(ELEM_Y),
961                                 CLAMP(0));
962    /* r2.z = MAD(c0.w, r1.x, c0.z) */
963    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
964                             SRC0_REL(ABSOLUTE),
965                             SRC0_ELEM(ELEM_W),
966                             SRC0_NEG(0),
967                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
968                             SRC1_REL(ABSOLUTE),
969                             SRC1_ELEM(ELEM_X),
970                             SRC1_NEG(0),
971                             INDEX_MODE(SQ_INDEX_LOOP),
972                             PRED_SEL(SQ_PRED_SEL_OFF),
973                             LAST(0));
974    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
975                                 SRC2_REL(ABSOLUTE),
976                                 SRC2_ELEM(ELEM_Z),
977                                 SRC2_NEG(0),
978                                 ALU_INST(SQ_OP3_INST_MULADD),
979                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
980                                 DST_GPR(2),
981                                 DST_REL(ABSOLUTE),
982                                 DST_ELEM(ELEM_Z),
983                                 CLAMP(0));
984    /* r2.w = MAD(0, 0, 1) */
985    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
986                             SRC0_REL(ABSOLUTE),
987                             SRC0_ELEM(ELEM_X),
988                             SRC0_NEG(0),
989                             SRC1_SEL(SQ_ALU_SRC_0),
990                             SRC1_REL(ABSOLUTE),
991                             SRC1_ELEM(ELEM_X),
992                             SRC1_NEG(0),
993                             INDEX_MODE(SQ_INDEX_LOOP),
994                             PRED_SEL(SQ_PRED_SEL_OFF),
995                             LAST(1));
996    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
997                                 SRC2_REL(ABSOLUTE),
998                                 SRC2_ELEM(ELEM_X),
999                                 SRC2_NEG(0),
1000                                 ALU_INST(SQ_OP3_INST_MULADD),
1001                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1002                                 DST_GPR(2),
1003                                 DST_REL(ABSOLUTE),
1004                                 DST_ELEM(ELEM_W),
1005                                 CLAMP(0));
1006
1007    /* 13,14,15,16 */
1008    /* r2.x = MAD(c1.x, r1.y, pv.x) */
1009    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1010                             SRC0_REL(ABSOLUTE),
1011                             SRC0_ELEM(ELEM_X),
1012                             SRC0_NEG(0),
1013                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1014                             SRC1_REL(ABSOLUTE),
1015                             SRC1_ELEM(ELEM_Y),
1016                             SRC1_NEG(0),
1017                             INDEX_MODE(SQ_INDEX_LOOP),
1018                             PRED_SEL(SQ_PRED_SEL_OFF),
1019                             LAST(0));
1020    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1021                                 SRC2_REL(ABSOLUTE),
1022                                 SRC2_ELEM(ELEM_X),
1023                                 SRC2_NEG(0),
1024                                 ALU_INST(SQ_OP3_INST_MULADD),
1025                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1026                                 DST_GPR(2),
1027                                 DST_REL(ABSOLUTE),
1028                                 DST_ELEM(ELEM_X),
1029                                 CLAMP(0));
1030    /* r2.y = MAD(c1.y, r1.y, pv.y) */
1031    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1032                             SRC0_REL(ABSOLUTE),
1033                             SRC0_ELEM(ELEM_Y),
1034                             SRC0_NEG(0),
1035                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1036                             SRC1_REL(ABSOLUTE),
1037                             SRC1_ELEM(ELEM_Y),
1038                             SRC1_NEG(0),
1039                             INDEX_MODE(SQ_INDEX_LOOP),
1040                             PRED_SEL(SQ_PRED_SEL_OFF),
1041                             LAST(0));
1042    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1043                                 SRC2_REL(ABSOLUTE),
1044                                 SRC2_ELEM(ELEM_Y),
1045                                 SRC2_NEG(0),
1046                                 ALU_INST(SQ_OP3_INST_MULADD),
1047                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1048                                 DST_GPR(2),
1049                                 DST_REL(ABSOLUTE),
1050                                 DST_ELEM(ELEM_Y),
1051                                 CLAMP(0));
1052    /* r2.z = MAD(c1.z, r1.y, pv.z) */
1053    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1054                             SRC0_REL(ABSOLUTE),
1055                             SRC0_ELEM(ELEM_Z),
1056                             SRC0_NEG(0),
1057                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1058                             SRC1_REL(ABSOLUTE),
1059                             SRC1_ELEM(ELEM_Y),
1060                             SRC1_NEG(0),
1061                             INDEX_MODE(SQ_INDEX_LOOP),
1062                             PRED_SEL(SQ_PRED_SEL_OFF),
1063                             LAST(0));
1064    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1065                                 SRC2_REL(ABSOLUTE),
1066                                 SRC2_ELEM(ELEM_Z),
1067                                 SRC2_NEG(0),
1068                                 ALU_INST(SQ_OP3_INST_MULADD),
1069                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1070                                 DST_GPR(2),
1071                                 DST_REL(ABSOLUTE),
1072                                 DST_ELEM(ELEM_Z),
1073                                 CLAMP(0));
1074    /* r2.w = MAD(0, 0, 1) */
1075    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1076                             SRC0_REL(ABSOLUTE),
1077                             SRC0_ELEM(ELEM_X),
1078                             SRC0_NEG(0),
1079                             SRC1_SEL(SQ_ALU_SRC_0),
1080                             SRC1_REL(ABSOLUTE),
1081                             SRC1_ELEM(ELEM_X),
1082                             SRC1_NEG(0),
1083                             INDEX_MODE(SQ_INDEX_LOOP),
1084                             PRED_SEL(SQ_PRED_SEL_OFF),
1085                             LAST(1));
1086    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1087                                 SRC2_REL(ABSOLUTE),
1088                                 SRC2_ELEM(ELEM_W),
1089                                 SRC2_NEG(0),
1090                                 ALU_INST(SQ_OP3_INST_MULADD),
1091                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1092                                 DST_GPR(2),
1093                                 DST_REL(ABSOLUTE),
1094                                 DST_ELEM(ELEM_W),
1095                                 CLAMP(0));
1096    /* 17,18,19,20 */
1097    /* r2.x = MAD(c2.x, r1.z, pv.x) */
1098    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1099                             SRC0_REL(ABSOLUTE),
1100                             SRC0_ELEM(ELEM_X),
1101                             SRC0_NEG(0),
1102                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1103                             SRC1_REL(ABSOLUTE),
1104                             SRC1_ELEM(ELEM_Z),
1105                             SRC1_NEG(0),
1106                             INDEX_MODE(SQ_INDEX_LOOP),
1107                             PRED_SEL(SQ_PRED_SEL_OFF),
1108                             LAST(0));
1109    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1110                                 SRC2_REL(ABSOLUTE),
1111                                 SRC2_ELEM(ELEM_X),
1112                                 SRC2_NEG(0),
1113                                 ALU_INST(SQ_OP3_INST_MULADD),
1114                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1115                                 DST_GPR(2),
1116                                 DST_REL(ABSOLUTE),
1117                                 DST_ELEM(ELEM_X),
1118                                 CLAMP(1));
1119    /* r2.y = MAD(c2.y, r1.z, pv.y) */
1120    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1121                             SRC0_REL(ABSOLUTE),
1122                             SRC0_ELEM(ELEM_Y),
1123                             SRC0_NEG(0),
1124                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1125                             SRC1_REL(ABSOLUTE),
1126                             SRC1_ELEM(ELEM_Z),
1127                             SRC1_NEG(0),
1128                             INDEX_MODE(SQ_INDEX_LOOP),
1129                             PRED_SEL(SQ_PRED_SEL_OFF),
1130                             LAST(0));
1131    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1132                                 SRC2_REL(ABSOLUTE),
1133                                 SRC2_ELEM(ELEM_Y),
1134                                 SRC2_NEG(0),
1135                                 ALU_INST(SQ_OP3_INST_MULADD),
1136                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1137                                 DST_GPR(2),
1138                                 DST_REL(ABSOLUTE),
1139                                 DST_ELEM(ELEM_Y),
1140                                 CLAMP(1));
1141    /* r2.z = MAD(c2.z, r1.z, pv.z) */
1142    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1143                             SRC0_REL(ABSOLUTE),
1144                             SRC0_ELEM(ELEM_Z),
1145                             SRC0_NEG(0),
1146                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1147                             SRC1_REL(ABSOLUTE),
1148                             SRC1_ELEM(ELEM_Z),
1149                             SRC1_NEG(0),
1150                             INDEX_MODE(SQ_INDEX_LOOP),
1151                             PRED_SEL(SQ_PRED_SEL_OFF),
1152                             LAST(0));
1153    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1154                                 SRC2_REL(ABSOLUTE),
1155                                 SRC2_ELEM(ELEM_Z),
1156                                 SRC2_NEG(0),
1157                                 ALU_INST(SQ_OP3_INST_MULADD),
1158                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1159                                 DST_GPR(2),
1160                                 DST_REL(ABSOLUTE),
1161                                 DST_ELEM(ELEM_Z),
1162                                 CLAMP(1));
1163    /* r2.w = MAD(0, 0, 1) */
1164    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1165                             SRC0_REL(ABSOLUTE),
1166                             SRC0_ELEM(ELEM_X),
1167                             SRC0_NEG(0),
1168                             SRC1_SEL(SQ_ALU_SRC_0),
1169                             SRC1_REL(ABSOLUTE),
1170                             SRC1_ELEM(ELEM_X),
1171                             SRC1_NEG(0),
1172                             INDEX_MODE(SQ_INDEX_LOOP),
1173                             PRED_SEL(SQ_PRED_SEL_OFF),
1174                             LAST(1));
1175    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1176                                 SRC2_REL(ABSOLUTE),
1177                                 SRC2_ELEM(ELEM_X),
1178                                 SRC2_NEG(0),
1179                                 ALU_INST(SQ_OP3_INST_MULADD),
1180                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1181                                 DST_GPR(2),
1182                                 DST_REL(ABSOLUTE),
1183                                 DST_ELEM(ELEM_W),
1184                                 CLAMP(1));
1185
1186    /* 21 */
1187    shader[i++] = CF_DWORD0(ADDR(24),
1188			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1189    shader[i++] = CF_DWORD1(POP_COUNT(0),
1190                            CF_CONST(0),
1191                            COND(SQ_CF_COND_ACTIVE),
1192                            I_COUNT(3),
1193                            VALID_PIXEL_MODE(0),
1194                            END_OF_PROGRAM(0),
1195                            CF_INST(SQ_CF_INST_TC),
1196                            WHOLE_QUAD_MODE(0),
1197                            BARRIER(1));
1198    /* 22 */
1199    shader[i++] = CF_DWORD0(ADDR(0),
1200			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1201    shader[i++] = CF_DWORD1(POP_COUNT(0),
1202			    CF_CONST(0),
1203			    COND(SQ_CF_COND_ACTIVE),
1204			    I_COUNT(0),
1205			    VALID_PIXEL_MODE(0),
1206			    END_OF_PROGRAM(0),
1207			    CF_INST(SQ_CF_INST_RETURN),
1208			    WHOLE_QUAD_MODE(0),
1209			    BARRIER(1));
1210    /* 23 */
1211    shader[i++] = 0x00000000;
1212    shader[i++] = 0x00000000;
1213    /* 24/25 */
1214    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1215                             INST_MOD(0),
1216                             FETCH_WHOLE_QUAD(0),
1217                             RESOURCE_ID(0),
1218                             SRC_GPR(0),
1219                             SRC_REL(ABSOLUTE),
1220                             ALT_CONST(0),
1221			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1222			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1223    shader[i++] = TEX_DWORD1(DST_GPR(1),
1224                             DST_REL(ABSOLUTE),
1225                             DST_SEL_X(SQ_SEL_X),
1226                             DST_SEL_Y(SQ_SEL_MASK),
1227                             DST_SEL_Z(SQ_SEL_MASK),
1228                             DST_SEL_W(SQ_SEL_1),
1229                             LOD_BIAS(0),
1230                             COORD_TYPE_X(TEX_NORMALIZED),
1231                             COORD_TYPE_Y(TEX_NORMALIZED),
1232                             COORD_TYPE_Z(TEX_NORMALIZED),
1233                             COORD_TYPE_W(TEX_NORMALIZED));
1234    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1235                             OFFSET_Y(0),
1236                             OFFSET_Z(0),
1237                             SAMPLER_ID(0),
1238                             SRC_SEL_X(SQ_SEL_X),
1239                             SRC_SEL_Y(SQ_SEL_Y),
1240                             SRC_SEL_Z(SQ_SEL_0),
1241                             SRC_SEL_W(SQ_SEL_1));
1242    shader[i++] = TEX_DWORD_PAD;
1243    /* 26/27 */
1244    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1245                             INST_MOD(0),
1246                             FETCH_WHOLE_QUAD(0),
1247                             RESOURCE_ID(1),
1248                             SRC_GPR(0),
1249                             SRC_REL(ABSOLUTE),
1250                             ALT_CONST(0),
1251			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1252			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1253    shader[i++] = TEX_DWORD1(DST_GPR(1),
1254                             DST_REL(ABSOLUTE),
1255                             DST_SEL_X(SQ_SEL_MASK),
1256                             DST_SEL_Y(SQ_SEL_MASK),
1257                             DST_SEL_Z(SQ_SEL_X),
1258                             DST_SEL_W(SQ_SEL_MASK),
1259                             LOD_BIAS(0),
1260                             COORD_TYPE_X(TEX_NORMALIZED),
1261                             COORD_TYPE_Y(TEX_NORMALIZED),
1262                             COORD_TYPE_Z(TEX_NORMALIZED),
1263                             COORD_TYPE_W(TEX_NORMALIZED));
1264    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1265                             OFFSET_Y(0),
1266                             OFFSET_Z(0),
1267                             SAMPLER_ID(1),
1268                             SRC_SEL_X(SQ_SEL_X),
1269                             SRC_SEL_Y(SQ_SEL_Y),
1270                             SRC_SEL_Z(SQ_SEL_0),
1271                             SRC_SEL_W(SQ_SEL_1));
1272    shader[i++] = TEX_DWORD_PAD;
1273    /* 28/29 */
1274    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1275                             INST_MOD(0),
1276                             FETCH_WHOLE_QUAD(0),
1277                             RESOURCE_ID(2),
1278                             SRC_GPR(0),
1279                             SRC_REL(ABSOLUTE),
1280                             ALT_CONST(0),
1281			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1282			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1283    shader[i++] = TEX_DWORD1(DST_GPR(1),
1284                             DST_REL(ABSOLUTE),
1285                             DST_SEL_X(SQ_SEL_MASK),
1286                             DST_SEL_Y(SQ_SEL_X),
1287                             DST_SEL_Z(SQ_SEL_MASK),
1288                             DST_SEL_W(SQ_SEL_MASK),
1289                             LOD_BIAS(0),
1290                             COORD_TYPE_X(TEX_NORMALIZED),
1291                             COORD_TYPE_Y(TEX_NORMALIZED),
1292                             COORD_TYPE_Z(TEX_NORMALIZED),
1293                             COORD_TYPE_W(TEX_NORMALIZED));
1294    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1295                             OFFSET_Y(0),
1296                             OFFSET_Z(0),
1297                             SAMPLER_ID(2),
1298                             SRC_SEL_X(SQ_SEL_X),
1299                             SRC_SEL_Y(SQ_SEL_Y),
1300                             SRC_SEL_Z(SQ_SEL_0),
1301                             SRC_SEL_W(SQ_SEL_1));
1302    shader[i++] = TEX_DWORD_PAD;
1303    /* 30 */
1304    shader[i++] = CF_DWORD0(ADDR(32),
1305			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1306    shader[i++] = CF_DWORD1(POP_COUNT(0),
1307                            CF_CONST(0),
1308                            COND(SQ_CF_COND_ACTIVE),
1309                            I_COUNT(1),
1310                            VALID_PIXEL_MODE(0),
1311                            END_OF_PROGRAM(0),
1312                            CF_INST(SQ_CF_INST_TC),
1313                            WHOLE_QUAD_MODE(0),
1314                            BARRIER(1));
1315    /* 31 */
1316    shader[i++] = CF_DWORD0(ADDR(0),
1317			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1318    shader[i++] = CF_DWORD1(POP_COUNT(0),
1319			    CF_CONST(0),
1320			    COND(SQ_CF_COND_ACTIVE),
1321			    I_COUNT(0),
1322			    VALID_PIXEL_MODE(0),
1323			    END_OF_PROGRAM(0),
1324			    CF_INST(SQ_CF_INST_RETURN),
1325			    WHOLE_QUAD_MODE(0),
1326			    BARRIER(1));
1327    /* 32/33 */
1328    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1329                             INST_MOD(0),
1330                             FETCH_WHOLE_QUAD(0),
1331                             RESOURCE_ID(0),
1332                             SRC_GPR(0),
1333                             SRC_REL(ABSOLUTE),
1334                             ALT_CONST(0),
1335                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1336                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1337    shader[i++] = TEX_DWORD1(DST_GPR(1),
1338                             DST_REL(ABSOLUTE),
1339                             DST_SEL_X(SQ_SEL_X),
1340                             DST_SEL_Y(SQ_SEL_Y),
1341                             DST_SEL_Z(SQ_SEL_Z),
1342                             DST_SEL_W(SQ_SEL_1),
1343                             LOD_BIAS(0),
1344                             COORD_TYPE_X(TEX_NORMALIZED),
1345                             COORD_TYPE_Y(TEX_NORMALIZED),
1346                             COORD_TYPE_Z(TEX_NORMALIZED),
1347                             COORD_TYPE_W(TEX_NORMALIZED));
1348    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1349                             OFFSET_Y(0),
1350                             OFFSET_Z(0),
1351                             SAMPLER_ID(0),
1352                             SRC_SEL_X(SQ_SEL_X),
1353                             SRC_SEL_Y(SQ_SEL_Y),
1354                             SRC_SEL_Z(SQ_SEL_0),
1355                             SRC_SEL_W(SQ_SEL_1));
1356    shader[i++] = TEX_DWORD_PAD;
1357
1358    return i;
1359}
1360
1361/* comp vs --------------------------------------- */
1362int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1363{
1364    int i = 0;
1365
1366    /* 0 */
1367    shader[i++] = CF_DWORD0(ADDR(3),
1368			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1369    shader[i++] = CF_DWORD1(POP_COUNT(0),
1370                            CF_CONST(0),
1371                            COND(SQ_CF_COND_BOOL),
1372                            I_COUNT(0),
1373                            VALID_PIXEL_MODE(0),
1374                            END_OF_PROGRAM(0),
1375                            CF_INST(SQ_CF_INST_CALL),
1376                            WHOLE_QUAD_MODE(0),
1377                            BARRIER(0));
1378    /* 1 */
1379    shader[i++] = CF_DWORD0(ADDR(9),
1380			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1381    shader[i++] = CF_DWORD1(POP_COUNT(0),
1382                            CF_CONST(0),
1383                            COND(SQ_CF_COND_NOT_BOOL),
1384                            I_COUNT(0),
1385                            VALID_PIXEL_MODE(0),
1386                            END_OF_PROGRAM(0),
1387                            CF_INST(SQ_CF_INST_CALL),
1388                            WHOLE_QUAD_MODE(0),
1389                            BARRIER(0));
1390    /* 2 */
1391    shader[i++] = CF_DWORD0(ADDR(0),
1392                            JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1393    shader[i++] = CF_DWORD1(POP_COUNT(0),
1394                            CF_CONST(0),
1395                            COND(SQ_CF_COND_ACTIVE),
1396                            I_COUNT(0),
1397                            VALID_PIXEL_MODE(0),
1398                            END_OF_PROGRAM(1),
1399                            CF_INST(SQ_CF_INST_NOP),
1400                            WHOLE_QUAD_MODE(0),
1401                            BARRIER(1));
1402    /* 3 - mask sub */
1403    shader[i++] = CF_DWORD0(ADDR(44),
1404			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1405    shader[i++] = CF_DWORD1(POP_COUNT(0),
1406			    CF_CONST(0),
1407			    COND(SQ_CF_COND_ACTIVE),
1408			    I_COUNT(3),
1409			    VALID_PIXEL_MODE(0),
1410			    END_OF_PROGRAM(0),
1411			    CF_INST(SQ_CF_INST_VC),
1412			    WHOLE_QUAD_MODE(0),
1413			    BARRIER(1));
1414
1415    /* 4 - ALU */
1416    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1417				KCACHE_BANK0(0),
1418				KCACHE_BANK1(0),
1419				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1420    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1421				KCACHE_ADDR0(0),
1422				KCACHE_ADDR1(0),
1423				I_COUNT(20),
1424				ALT_CONST(0),
1425				CF_INST(SQ_CF_INST_ALU),
1426				WHOLE_QUAD_MODE(0),
1427				BARRIER(1));
1428
1429    /* 5 - dst */
1430    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1431					  TYPE(SQ_EXPORT_POS),
1432					  RW_GPR(2),
1433					  RW_REL(ABSOLUTE),
1434					  INDEX_GPR(0),
1435					  ELEM_SIZE(0));
1436    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1437					       SRC_SEL_Y(SQ_SEL_Y),
1438					       SRC_SEL_Z(SQ_SEL_0),
1439					       SRC_SEL_W(SQ_SEL_1),
1440					       BURST_COUNT(1),
1441					       VALID_PIXEL_MODE(0),
1442					       END_OF_PROGRAM(0),
1443					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1444					       MARK(0),
1445					       BARRIER(1));
1446    /* 6 - src */
1447    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1448					  TYPE(SQ_EXPORT_PARAM),
1449					  RW_GPR(1),
1450					  RW_REL(ABSOLUTE),
1451					  INDEX_GPR(0),
1452					  ELEM_SIZE(0));
1453    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1454					       SRC_SEL_Y(SQ_SEL_Y),
1455					       SRC_SEL_Z(SQ_SEL_0),
1456					       SRC_SEL_W(SQ_SEL_1),
1457					       BURST_COUNT(1),
1458					       VALID_PIXEL_MODE(0),
1459					       END_OF_PROGRAM(0),
1460					       CF_INST(SQ_CF_INST_EXPORT),
1461					       MARK(0),
1462					       BARRIER(0));
1463    /* 7 - mask */
1464    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1465					  TYPE(SQ_EXPORT_PARAM),
1466					  RW_GPR(0),
1467					  RW_REL(ABSOLUTE),
1468					  INDEX_GPR(0),
1469					  ELEM_SIZE(0));
1470    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1471					       SRC_SEL_Y(SQ_SEL_Y),
1472					       SRC_SEL_Z(SQ_SEL_0),
1473					       SRC_SEL_W(SQ_SEL_1),
1474					       BURST_COUNT(1),
1475					       VALID_PIXEL_MODE(0),
1476					       END_OF_PROGRAM(0),
1477					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1478					       WHOLE_QUAD_MODE(0),
1479					       BARRIER(0));
1480    /* 8 */
1481    shader[i++] = CF_DWORD0(ADDR(0),
1482			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1483    shader[i++] = CF_DWORD1(POP_COUNT(0),
1484			    CF_CONST(0),
1485			    COND(SQ_CF_COND_ACTIVE),
1486			    I_COUNT(0),
1487			    VALID_PIXEL_MODE(0),
1488			    END_OF_PROGRAM(0),
1489			    CF_INST(SQ_CF_INST_RETURN),
1490			    WHOLE_QUAD_MODE(0),
1491			    BARRIER(1));
1492    /* 9 - non-mask sub */
1493    shader[i++] = CF_DWORD0(ADDR(50),
1494			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1495    shader[i++] = CF_DWORD1(POP_COUNT(0),
1496			    CF_CONST(0),
1497			    COND(SQ_CF_COND_ACTIVE),
1498			    I_COUNT(2),
1499			    VALID_PIXEL_MODE(0),
1500			    END_OF_PROGRAM(0),
1501			    CF_INST(SQ_CF_INST_VC),
1502			    WHOLE_QUAD_MODE(0),
1503			    BARRIER(1));
1504
1505    /* 10 - ALU */
1506    shader[i++] = CF_ALU_DWORD0(ADDR(34),
1507				KCACHE_BANK0(0),
1508				KCACHE_BANK1(0),
1509				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1510    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1511				KCACHE_ADDR0(0),
1512				KCACHE_ADDR1(0),
1513				I_COUNT(10),
1514				ALT_CONST(0),
1515				CF_INST(SQ_CF_INST_ALU),
1516				WHOLE_QUAD_MODE(0),
1517				BARRIER(1));
1518
1519    /* 11 - dst */
1520    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1521					  TYPE(SQ_EXPORT_POS),
1522					  RW_GPR(1),
1523					  RW_REL(ABSOLUTE),
1524					  INDEX_GPR(0),
1525					  ELEM_SIZE(0));
1526    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1527					       SRC_SEL_Y(SQ_SEL_Y),
1528					       SRC_SEL_Z(SQ_SEL_0),
1529					       SRC_SEL_W(SQ_SEL_1),
1530					       BURST_COUNT(0),
1531					       VALID_PIXEL_MODE(0),
1532					       END_OF_PROGRAM(0),
1533					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1534					       MARK(0),
1535					       BARRIER(1));
1536    /* 12 - src */
1537    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1538					  TYPE(SQ_EXPORT_PARAM),
1539					  RW_GPR(0),
1540					  RW_REL(ABSOLUTE),
1541					  INDEX_GPR(0),
1542					  ELEM_SIZE(0));
1543    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1544					       SRC_SEL_Y(SQ_SEL_Y),
1545					       SRC_SEL_Z(SQ_SEL_0),
1546					       SRC_SEL_W(SQ_SEL_1),
1547					       BURST_COUNT(0),
1548					       VALID_PIXEL_MODE(0),
1549					       END_OF_PROGRAM(0),
1550					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1551					       MARK(0),
1552					       BARRIER(0));
1553    /* 13 */
1554    shader[i++] = CF_DWORD0(ADDR(0),
1555			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1556    shader[i++] = CF_DWORD1(POP_COUNT(0),
1557			    CF_CONST(0),
1558			    COND(SQ_CF_COND_ACTIVE),
1559			    I_COUNT(0),
1560			    VALID_PIXEL_MODE(0),
1561			    END_OF_PROGRAM(0),
1562			    CF_INST(SQ_CF_INST_RETURN),
1563			    WHOLE_QUAD_MODE(0),
1564			    BARRIER(1));
1565
1566    /* 14 srcX.x DOT4 - mask */
1567    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1568                             SRC0_REL(ABSOLUTE),
1569                             SRC0_ELEM(ELEM_X),
1570                             SRC0_NEG(0),
1571                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1572                             SRC1_REL(ABSOLUTE),
1573                             SRC1_ELEM(ELEM_X),
1574                             SRC1_NEG(0),
1575                             INDEX_MODE(SQ_INDEX_LOOP),
1576                             PRED_SEL(SQ_PRED_SEL_OFF),
1577                             LAST(0));
1578    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1579                                 SRC1_ABS(0),
1580                                 UPDATE_EXECUTE_MASK(0),
1581                                 UPDATE_PRED(0),
1582                                 WRITE_MASK(1),
1583                                 OMOD(SQ_ALU_OMOD_OFF),
1584                                 ALU_INST(SQ_OP2_INST_DOT4),
1585                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1586                                 DST_GPR(3),
1587                                 DST_REL(ABSOLUTE),
1588                                 DST_ELEM(ELEM_X),
1589                                 CLAMP(0));
1590
1591    /* 15 srcX.y DOT4 - mask */
1592    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1593                             SRC0_REL(ABSOLUTE),
1594                             SRC0_ELEM(ELEM_Y),
1595                             SRC0_NEG(0),
1596                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1597                             SRC1_REL(ABSOLUTE),
1598                             SRC1_ELEM(ELEM_Y),
1599                             SRC1_NEG(0),
1600                             INDEX_MODE(SQ_INDEX_LOOP),
1601                             PRED_SEL(SQ_PRED_SEL_OFF),
1602                             LAST(0));
1603    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1604                                 SRC1_ABS(0),
1605                                 UPDATE_EXECUTE_MASK(0),
1606                                 UPDATE_PRED(0),
1607                                 WRITE_MASK(0),
1608                                 OMOD(SQ_ALU_OMOD_OFF),
1609                                 ALU_INST(SQ_OP2_INST_DOT4),
1610                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1611                                 DST_GPR(3),
1612                                 DST_REL(ABSOLUTE),
1613                                 DST_ELEM(ELEM_Y),
1614                                 CLAMP(0));
1615
1616    /* 16 srcX.z DOT4 - mask */
1617    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1618                             SRC0_REL(ABSOLUTE),
1619                             SRC0_ELEM(ELEM_Z),
1620                             SRC0_NEG(0),
1621                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1622                             SRC1_REL(ABSOLUTE),
1623                             SRC1_ELEM(ELEM_Z),
1624                             SRC1_NEG(0),
1625                             INDEX_MODE(SQ_INDEX_LOOP),
1626                             PRED_SEL(SQ_PRED_SEL_OFF),
1627                             LAST(0));
1628    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1629                                 SRC1_ABS(0),
1630                                 UPDATE_EXECUTE_MASK(0),
1631                                 UPDATE_PRED(0),
1632                                 WRITE_MASK(0),
1633                                 OMOD(SQ_ALU_OMOD_OFF),
1634                                 ALU_INST(SQ_OP2_INST_DOT4),
1635                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1636                                 DST_GPR(3),
1637                                 DST_REL(ABSOLUTE),
1638                                 DST_ELEM(ELEM_Z),
1639                                 CLAMP(0));
1640
1641    /* 17 srcX.w DOT4 - mask */
1642    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1643                             SRC0_REL(ABSOLUTE),
1644                             SRC0_ELEM(ELEM_W),
1645                             SRC0_NEG(0),
1646                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1647                             SRC1_REL(ABSOLUTE),
1648                             SRC1_ELEM(ELEM_W),
1649                             SRC1_NEG(0),
1650                             INDEX_MODE(SQ_INDEX_LOOP),
1651                             PRED_SEL(SQ_PRED_SEL_OFF),
1652                             LAST(1));
1653    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1654                                 SRC1_ABS(0),
1655                                 UPDATE_EXECUTE_MASK(0),
1656                                 UPDATE_PRED(0),
1657                                 WRITE_MASK(0),
1658                                 OMOD(SQ_ALU_OMOD_OFF),
1659                                 ALU_INST(SQ_OP2_INST_DOT4),
1660                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1661                                 DST_GPR(3),
1662                                 DST_REL(ABSOLUTE),
1663                                 DST_ELEM(ELEM_W),
1664                                 CLAMP(0));
1665
1666    /* 18 srcY.x DOT4 - mask */
1667    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1668                             SRC0_REL(ABSOLUTE),
1669                             SRC0_ELEM(ELEM_X),
1670                             SRC0_NEG(0),
1671                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1672                             SRC1_REL(ABSOLUTE),
1673                             SRC1_ELEM(ELEM_X),
1674                             SRC1_NEG(0),
1675                             INDEX_MODE(SQ_INDEX_LOOP),
1676                             PRED_SEL(SQ_PRED_SEL_OFF),
1677                             LAST(0));
1678    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1679                                 SRC1_ABS(0),
1680                                 UPDATE_EXECUTE_MASK(0),
1681                                 UPDATE_PRED(0),
1682                                 WRITE_MASK(0),
1683                                 OMOD(SQ_ALU_OMOD_OFF),
1684                                 ALU_INST(SQ_OP2_INST_DOT4),
1685                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1686                                 DST_GPR(3),
1687                                 DST_REL(ABSOLUTE),
1688                                 DST_ELEM(ELEM_X),
1689                                 CLAMP(0));
1690
1691    /* 19 srcY.y DOT4 - mask */
1692    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1693                             SRC0_REL(ABSOLUTE),
1694                             SRC0_ELEM(ELEM_Y),
1695                             SRC0_NEG(0),
1696                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1697                             SRC1_REL(ABSOLUTE),
1698                             SRC1_ELEM(ELEM_Y),
1699                             SRC1_NEG(0),
1700                             INDEX_MODE(SQ_INDEX_LOOP),
1701                             PRED_SEL(SQ_PRED_SEL_OFF),
1702                             LAST(0));
1703    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1704                                 SRC1_ABS(0),
1705                                 UPDATE_EXECUTE_MASK(0),
1706                                 UPDATE_PRED(0),
1707                                 WRITE_MASK(1),
1708                                 OMOD(SQ_ALU_OMOD_OFF),
1709                                 ALU_INST(SQ_OP2_INST_DOT4),
1710                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1711                                 DST_GPR(3),
1712                                 DST_REL(ABSOLUTE),
1713                                 DST_ELEM(ELEM_Y),
1714                                 CLAMP(0));
1715
1716    /* 20 srcY.z DOT4 - mask */
1717    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1718                             SRC0_REL(ABSOLUTE),
1719                             SRC0_ELEM(ELEM_Z),
1720                             SRC0_NEG(0),
1721                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1722                             SRC1_REL(ABSOLUTE),
1723                             SRC1_ELEM(ELEM_Z),
1724                             SRC1_NEG(0),
1725                             INDEX_MODE(SQ_INDEX_LOOP),
1726                             PRED_SEL(SQ_PRED_SEL_OFF),
1727                             LAST(0));
1728    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1729                                 SRC1_ABS(0),
1730                                 UPDATE_EXECUTE_MASK(0),
1731                                 UPDATE_PRED(0),
1732                                 WRITE_MASK(0),
1733                                 OMOD(SQ_ALU_OMOD_OFF),
1734                                 ALU_INST(SQ_OP2_INST_DOT4),
1735                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1736                                 DST_GPR(3),
1737                                 DST_REL(ABSOLUTE),
1738                                 DST_ELEM(ELEM_Z),
1739                                 CLAMP(0));
1740
1741    /* 21 srcY.w DOT4 - mask */
1742    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1743                             SRC0_REL(ABSOLUTE),
1744                             SRC0_ELEM(ELEM_W),
1745                             SRC0_NEG(0),
1746                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1747                             SRC1_REL(ABSOLUTE),
1748                             SRC1_ELEM(ELEM_W),
1749                             SRC1_NEG(0),
1750                             INDEX_MODE(SQ_INDEX_LOOP),
1751                             PRED_SEL(SQ_PRED_SEL_OFF),
1752                             LAST(1));
1753    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1754                                 SRC1_ABS(0),
1755                                 UPDATE_EXECUTE_MASK(0),
1756                                 UPDATE_PRED(0),
1757                                 WRITE_MASK(0),
1758                                 OMOD(SQ_ALU_OMOD_OFF),
1759                                 ALU_INST(SQ_OP2_INST_DOT4),
1760                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1761                                 DST_GPR(3),
1762                                 DST_REL(ABSOLUTE),
1763                                 DST_ELEM(ELEM_W),
1764                                 CLAMP(0));
1765
1766    /* 22 maskX.x DOT4 - mask */
1767    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1768                             SRC0_REL(ABSOLUTE),
1769                             SRC0_ELEM(ELEM_X),
1770                             SRC0_NEG(0),
1771                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1772                             SRC1_REL(ABSOLUTE),
1773                             SRC1_ELEM(ELEM_X),
1774                             SRC1_NEG(0),
1775                             INDEX_MODE(SQ_INDEX_LOOP),
1776                             PRED_SEL(SQ_PRED_SEL_OFF),
1777                             LAST(0));
1778    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1779                                 SRC1_ABS(0),
1780                                 UPDATE_EXECUTE_MASK(0),
1781                                 UPDATE_PRED(0),
1782                                 WRITE_MASK(1),
1783                                 OMOD(SQ_ALU_OMOD_OFF),
1784                                 ALU_INST(SQ_OP2_INST_DOT4),
1785                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1786                                 DST_GPR(4),
1787                                 DST_REL(ABSOLUTE),
1788                                 DST_ELEM(ELEM_X),
1789                                 CLAMP(0));
1790
1791    /* 23 maskX.y DOT4 - mask */
1792    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1793                             SRC0_REL(ABSOLUTE),
1794                             SRC0_ELEM(ELEM_Y),
1795                             SRC0_NEG(0),
1796                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1797                             SRC1_REL(ABSOLUTE),
1798                             SRC1_ELEM(ELEM_Y),
1799                             SRC1_NEG(0),
1800                             INDEX_MODE(SQ_INDEX_LOOP),
1801                             PRED_SEL(SQ_PRED_SEL_OFF),
1802                             LAST(0));
1803    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1804                                 SRC1_ABS(0),
1805                                 UPDATE_EXECUTE_MASK(0),
1806                                 UPDATE_PRED(0),
1807                                 WRITE_MASK(0),
1808                                 OMOD(SQ_ALU_OMOD_OFF),
1809                                 ALU_INST(SQ_OP2_INST_DOT4),
1810                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1811                                 DST_GPR(4),
1812                                 DST_REL(ABSOLUTE),
1813                                 DST_ELEM(ELEM_Y),
1814                                 CLAMP(0));
1815
1816    /* 24 maskX.z DOT4 - mask */
1817    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1818                             SRC0_REL(ABSOLUTE),
1819                             SRC0_ELEM(ELEM_Z),
1820                             SRC0_NEG(0),
1821                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1822                             SRC1_REL(ABSOLUTE),
1823                             SRC1_ELEM(ELEM_Z),
1824                             SRC1_NEG(0),
1825                             INDEX_MODE(SQ_INDEX_LOOP),
1826                             PRED_SEL(SQ_PRED_SEL_OFF),
1827                             LAST(0));
1828    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1829                                 SRC1_ABS(0),
1830                                 UPDATE_EXECUTE_MASK(0),
1831                                 UPDATE_PRED(0),
1832                                 WRITE_MASK(0),
1833                                 OMOD(SQ_ALU_OMOD_OFF),
1834                                 ALU_INST(SQ_OP2_INST_DOT4),
1835                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1836                                 DST_GPR(4),
1837                                 DST_REL(ABSOLUTE),
1838                                 DST_ELEM(ELEM_Z),
1839                                 CLAMP(0));
1840
1841    /* 25 maskX.w DOT4 - mask */
1842    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1843                             SRC0_REL(ABSOLUTE),
1844                             SRC0_ELEM(ELEM_W),
1845                             SRC0_NEG(0),
1846                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1847                             SRC1_REL(ABSOLUTE),
1848                             SRC1_ELEM(ELEM_W),
1849                             SRC1_NEG(0),
1850                             INDEX_MODE(SQ_INDEX_LOOP),
1851                             PRED_SEL(SQ_PRED_SEL_OFF),
1852                             LAST(1));
1853    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1854                                 SRC1_ABS(0),
1855                                 UPDATE_EXECUTE_MASK(0),
1856                                 UPDATE_PRED(0),
1857                                 WRITE_MASK(0),
1858                                 OMOD(SQ_ALU_OMOD_OFF),
1859                                 ALU_INST(SQ_OP2_INST_DOT4),
1860                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1861                                 DST_GPR(4),
1862                                 DST_REL(ABSOLUTE),
1863                                 DST_ELEM(ELEM_W),
1864                                 CLAMP(0));
1865
1866    /* 26 maskY.x DOT4 - mask */
1867    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1868                             SRC0_REL(ABSOLUTE),
1869                             SRC0_ELEM(ELEM_X),
1870                             SRC0_NEG(0),
1871                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1872                             SRC1_REL(ABSOLUTE),
1873                             SRC1_ELEM(ELEM_X),
1874                             SRC1_NEG(0),
1875                             INDEX_MODE(SQ_INDEX_LOOP),
1876                             PRED_SEL(SQ_PRED_SEL_OFF),
1877                             LAST(0));
1878    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1879                                 SRC1_ABS(0),
1880                                 UPDATE_EXECUTE_MASK(0),
1881                                 UPDATE_PRED(0),
1882                                 WRITE_MASK(0),
1883                                 OMOD(SQ_ALU_OMOD_OFF),
1884                                 ALU_INST(SQ_OP2_INST_DOT4),
1885                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1886                                 DST_GPR(4),
1887                                 DST_REL(ABSOLUTE),
1888                                 DST_ELEM(ELEM_X),
1889                                 CLAMP(0));
1890
1891    /* 27 maskY.y DOT4 - mask */
1892    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1893                             SRC0_REL(ABSOLUTE),
1894                             SRC0_ELEM(ELEM_Y),
1895                             SRC0_NEG(0),
1896                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1897                             SRC1_REL(ABSOLUTE),
1898                             SRC1_ELEM(ELEM_Y),
1899                             SRC1_NEG(0),
1900                             INDEX_MODE(SQ_INDEX_LOOP),
1901                             PRED_SEL(SQ_PRED_SEL_OFF),
1902                             LAST(0));
1903    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1904                                 SRC1_ABS(0),
1905                                 UPDATE_EXECUTE_MASK(0),
1906                                 UPDATE_PRED(0),
1907                                 WRITE_MASK(1),
1908                                 OMOD(SQ_ALU_OMOD_OFF),
1909                                 ALU_INST(SQ_OP2_INST_DOT4),
1910                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1911                                 DST_GPR(4),
1912                                 DST_REL(ABSOLUTE),
1913                                 DST_ELEM(ELEM_Y),
1914                                 CLAMP(0));
1915
1916    /* 28 maskY.z DOT4 - mask */
1917    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1918                             SRC0_REL(ABSOLUTE),
1919                             SRC0_ELEM(ELEM_Z),
1920                             SRC0_NEG(0),
1921                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1922                             SRC1_REL(ABSOLUTE),
1923                             SRC1_ELEM(ELEM_Z),
1924                             SRC1_NEG(0),
1925                             INDEX_MODE(SQ_INDEX_LOOP),
1926                             PRED_SEL(SQ_PRED_SEL_OFF),
1927                             LAST(0));
1928    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1929                                 SRC1_ABS(0),
1930                                 UPDATE_EXECUTE_MASK(0),
1931                                 UPDATE_PRED(0),
1932                                 WRITE_MASK(0),
1933                                 OMOD(SQ_ALU_OMOD_OFF),
1934                                 ALU_INST(SQ_OP2_INST_DOT4),
1935                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1936                                 DST_GPR(4),
1937                                 DST_REL(ABSOLUTE),
1938                                 DST_ELEM(ELEM_Z),
1939                                 CLAMP(0));
1940
1941    /* 29 maskY.w DOT4 - mask */
1942    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1943                             SRC0_REL(ABSOLUTE),
1944                             SRC0_ELEM(ELEM_W),
1945                             SRC0_NEG(0),
1946                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1947                             SRC1_REL(ABSOLUTE),
1948                             SRC1_ELEM(ELEM_W),
1949                             SRC1_NEG(0),
1950                             INDEX_MODE(SQ_INDEX_LOOP),
1951                             PRED_SEL(SQ_PRED_SEL_OFF),
1952                             LAST(1));
1953    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1954                                 SRC1_ABS(0),
1955                                 UPDATE_EXECUTE_MASK(0),
1956                                 UPDATE_PRED(0),
1957                                 WRITE_MASK(0),
1958                                 OMOD(SQ_ALU_OMOD_OFF),
1959                                 ALU_INST(SQ_OP2_INST_DOT4),
1960                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1961                                 DST_GPR(4),
1962                                 DST_REL(ABSOLUTE),
1963                                 DST_ELEM(ELEM_W),
1964                                 CLAMP(0));
1965
1966    /* 30 srcX / w */
1967    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1968                             SRC0_REL(ABSOLUTE),
1969                             SRC0_ELEM(ELEM_X),
1970                             SRC0_NEG(0),
1971                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1972                             SRC1_REL(ABSOLUTE),
1973                             SRC1_ELEM(ELEM_W),
1974                             SRC1_NEG(0),
1975                             INDEX_MODE(SQ_INDEX_AR_X),
1976                             PRED_SEL(SQ_PRED_SEL_OFF),
1977                             LAST(1));
1978    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1979                                 SRC1_ABS(0),
1980                                 UPDATE_EXECUTE_MASK(0),
1981                                 UPDATE_PRED(0),
1982                                 WRITE_MASK(1),
1983                                 OMOD(SQ_ALU_OMOD_OFF),
1984                                 ALU_INST(SQ_OP2_INST_MUL),
1985                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1986                                 DST_GPR(1),
1987                                 DST_REL(ABSOLUTE),
1988                                 DST_ELEM(ELEM_X),
1989                                 CLAMP(0));
1990
1991    /* 31 srcY / h */
1992    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1993                             SRC0_REL(ABSOLUTE),
1994                             SRC0_ELEM(ELEM_Y),
1995                             SRC0_NEG(0),
1996                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1997                             SRC1_REL(ABSOLUTE),
1998                             SRC1_ELEM(ELEM_W),
1999                             SRC1_NEG(0),
2000                             INDEX_MODE(SQ_INDEX_AR_X),
2001                             PRED_SEL(SQ_PRED_SEL_OFF),
2002                             LAST(1));
2003    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2004                                 SRC1_ABS(0),
2005                                 UPDATE_EXECUTE_MASK(0),
2006                                 UPDATE_PRED(0),
2007                                 WRITE_MASK(1),
2008                                 OMOD(SQ_ALU_OMOD_OFF),
2009                                 ALU_INST(SQ_OP2_INST_MUL),
2010                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2011                                 DST_GPR(1),
2012                                 DST_REL(ABSOLUTE),
2013                                 DST_ELEM(ELEM_Y),
2014                                 CLAMP(0));
2015
2016    /* 32 maskX / w */
2017    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2018                             SRC0_REL(ABSOLUTE),
2019                             SRC0_ELEM(ELEM_X),
2020                             SRC0_NEG(0),
2021                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
2022                             SRC1_REL(ABSOLUTE),
2023                             SRC1_ELEM(ELEM_W),
2024                             SRC1_NEG(0),
2025                             INDEX_MODE(SQ_INDEX_AR_X),
2026                             PRED_SEL(SQ_PRED_SEL_OFF),
2027                             LAST(1));
2028    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2029                                 SRC1_ABS(0),
2030                                 UPDATE_EXECUTE_MASK(0),
2031                                 UPDATE_PRED(0),
2032                                 WRITE_MASK(1),
2033                                 OMOD(SQ_ALU_OMOD_OFF),
2034                                 ALU_INST(SQ_OP2_INST_MUL),
2035                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2036                                 DST_GPR(0),
2037                                 DST_REL(ABSOLUTE),
2038                                 DST_ELEM(ELEM_X),
2039                                 CLAMP(0));
2040
2041    /* 33 maskY / h */
2042    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2043                             SRC0_REL(ABSOLUTE),
2044                             SRC0_ELEM(ELEM_Y),
2045                             SRC0_NEG(0),
2046                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
2047                             SRC1_REL(ABSOLUTE),
2048                             SRC1_ELEM(ELEM_W),
2049                             SRC1_NEG(0),
2050                             INDEX_MODE(SQ_INDEX_AR_X),
2051                             PRED_SEL(SQ_PRED_SEL_OFF),
2052                             LAST(1));
2053    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2054                                 SRC1_ABS(0),
2055                                 UPDATE_EXECUTE_MASK(0),
2056                                 UPDATE_PRED(0),
2057                                 WRITE_MASK(1),
2058                                 OMOD(SQ_ALU_OMOD_OFF),
2059                                 ALU_INST(SQ_OP2_INST_MUL),
2060                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2061                                 DST_GPR(0),
2062                                 DST_REL(ABSOLUTE),
2063                                 DST_ELEM(ELEM_Y),
2064                                 CLAMP(0));
2065
2066    /* 34 srcX.x DOT4 - non-mask */
2067    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2068                             SRC0_REL(ABSOLUTE),
2069                             SRC0_ELEM(ELEM_X),
2070                             SRC0_NEG(0),
2071                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2072                             SRC1_REL(ABSOLUTE),
2073                             SRC1_ELEM(ELEM_X),
2074                             SRC1_NEG(0),
2075                             INDEX_MODE(SQ_INDEX_LOOP),
2076                             PRED_SEL(SQ_PRED_SEL_OFF),
2077                             LAST(0));
2078    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2079                                 SRC1_ABS(0),
2080                                 UPDATE_EXECUTE_MASK(0),
2081                                 UPDATE_PRED(0),
2082                                 WRITE_MASK(1),
2083                                 OMOD(SQ_ALU_OMOD_OFF),
2084                                 ALU_INST(SQ_OP2_INST_DOT4),
2085                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2086                                 DST_GPR(2),
2087                                 DST_REL(ABSOLUTE),
2088                                 DST_ELEM(ELEM_X),
2089                                 CLAMP(0));
2090
2091    /* 35 srcX.y DOT4 - non-mask */
2092    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2093                             SRC0_REL(ABSOLUTE),
2094                             SRC0_ELEM(ELEM_Y),
2095                             SRC0_NEG(0),
2096                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2097                             SRC1_REL(ABSOLUTE),
2098                             SRC1_ELEM(ELEM_Y),
2099                             SRC1_NEG(0),
2100                             INDEX_MODE(SQ_INDEX_LOOP),
2101                             PRED_SEL(SQ_PRED_SEL_OFF),
2102                             LAST(0));
2103    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2104                                 SRC1_ABS(0),
2105                                 UPDATE_EXECUTE_MASK(0),
2106                                 UPDATE_PRED(0),
2107                                 WRITE_MASK(0),
2108                                 OMOD(SQ_ALU_OMOD_OFF),
2109                                 ALU_INST(SQ_OP2_INST_DOT4),
2110                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2111                                 DST_GPR(2),
2112                                 DST_REL(ABSOLUTE),
2113                                 DST_ELEM(ELEM_Y),
2114                                 CLAMP(0));
2115
2116    /* 36 srcX.z DOT4 - non-mask */
2117    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2118                             SRC0_REL(ABSOLUTE),
2119                             SRC0_ELEM(ELEM_Z),
2120                             SRC0_NEG(0),
2121                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2122                             SRC1_REL(ABSOLUTE),
2123                             SRC1_ELEM(ELEM_Z),
2124                             SRC1_NEG(0),
2125                             INDEX_MODE(SQ_INDEX_LOOP),
2126                             PRED_SEL(SQ_PRED_SEL_OFF),
2127                             LAST(0));
2128    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2129                                 SRC1_ABS(0),
2130                                 UPDATE_EXECUTE_MASK(0),
2131                                 UPDATE_PRED(0),
2132                                 WRITE_MASK(0),
2133                                 OMOD(SQ_ALU_OMOD_OFF),
2134                                 ALU_INST(SQ_OP2_INST_DOT4),
2135                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2136                                 DST_GPR(2),
2137                                 DST_REL(ABSOLUTE),
2138                                 DST_ELEM(ELEM_Z),
2139                                 CLAMP(0));
2140
2141    /* 37 srcX.w DOT4 - non-mask */
2142    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2143                             SRC0_REL(ABSOLUTE),
2144                             SRC0_ELEM(ELEM_W),
2145                             SRC0_NEG(0),
2146                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2147                             SRC1_REL(ABSOLUTE),
2148                             SRC1_ELEM(ELEM_W),
2149                             SRC1_NEG(0),
2150                             INDEX_MODE(SQ_INDEX_LOOP),
2151                             PRED_SEL(SQ_PRED_SEL_OFF),
2152                             LAST(1));
2153    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2154                                 SRC1_ABS(0),
2155                                 UPDATE_EXECUTE_MASK(0),
2156                                 UPDATE_PRED(0),
2157                                 WRITE_MASK(0),
2158                                 OMOD(SQ_ALU_OMOD_OFF),
2159                                 ALU_INST(SQ_OP2_INST_DOT4),
2160                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2161                                 DST_GPR(2),
2162                                 DST_REL(ABSOLUTE),
2163                                 DST_ELEM(ELEM_W),
2164                                 CLAMP(0));
2165
2166    /* 38 srcY.x DOT4 - non-mask */
2167    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2168                             SRC0_REL(ABSOLUTE),
2169                             SRC0_ELEM(ELEM_X),
2170                             SRC0_NEG(0),
2171                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2172                             SRC1_REL(ABSOLUTE),
2173                             SRC1_ELEM(ELEM_X),
2174                             SRC1_NEG(0),
2175                             INDEX_MODE(SQ_INDEX_LOOP),
2176                             PRED_SEL(SQ_PRED_SEL_OFF),
2177                             LAST(0));
2178    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2179                                 SRC1_ABS(0),
2180                                 UPDATE_EXECUTE_MASK(0),
2181                                 UPDATE_PRED(0),
2182                                 WRITE_MASK(0),
2183                                 OMOD(SQ_ALU_OMOD_OFF),
2184                                 ALU_INST(SQ_OP2_INST_DOT4),
2185                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2186                                 DST_GPR(2),
2187                                 DST_REL(ABSOLUTE),
2188                                 DST_ELEM(ELEM_X),
2189                                 CLAMP(0));
2190
2191    /* 39 srcY.y DOT4 - non-mask */
2192    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2193                             SRC0_REL(ABSOLUTE),
2194                             SRC0_ELEM(ELEM_Y),
2195                             SRC0_NEG(0),
2196                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2197                             SRC1_REL(ABSOLUTE),
2198                             SRC1_ELEM(ELEM_Y),
2199                             SRC1_NEG(0),
2200                             INDEX_MODE(SQ_INDEX_LOOP),
2201                             PRED_SEL(SQ_PRED_SEL_OFF),
2202                             LAST(0));
2203    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2204                                 SRC1_ABS(0),
2205                                 UPDATE_EXECUTE_MASK(0),
2206                                 UPDATE_PRED(0),
2207                                 WRITE_MASK(1),
2208                                 OMOD(SQ_ALU_OMOD_OFF),
2209                                 ALU_INST(SQ_OP2_INST_DOT4),
2210                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2211                                 DST_GPR(2),
2212                                 DST_REL(ABSOLUTE),
2213                                 DST_ELEM(ELEM_Y),
2214                                 CLAMP(0));
2215
2216    /* 40 srcY.z DOT4 - non-mask */
2217    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2218                             SRC0_REL(ABSOLUTE),
2219                             SRC0_ELEM(ELEM_Z),
2220                             SRC0_NEG(0),
2221                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2222                             SRC1_REL(ABSOLUTE),
2223                             SRC1_ELEM(ELEM_Z),
2224                             SRC1_NEG(0),
2225                             INDEX_MODE(SQ_INDEX_LOOP),
2226                             PRED_SEL(SQ_PRED_SEL_OFF),
2227                             LAST(0));
2228    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2229                                 SRC1_ABS(0),
2230                                 UPDATE_EXECUTE_MASK(0),
2231                                 UPDATE_PRED(0),
2232                                 WRITE_MASK(0),
2233                                 OMOD(SQ_ALU_OMOD_OFF),
2234                                 ALU_INST(SQ_OP2_INST_DOT4),
2235                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2236                                 DST_GPR(2),
2237                                 DST_REL(ABSOLUTE),
2238                                 DST_ELEM(ELEM_Z),
2239                                 CLAMP(0));
2240
2241    /* 41 srcY.w DOT4 - non-mask */
2242    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2243                             SRC0_REL(ABSOLUTE),
2244                             SRC0_ELEM(ELEM_W),
2245                             SRC0_NEG(0),
2246                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2247                             SRC1_REL(ABSOLUTE),
2248                             SRC1_ELEM(ELEM_W),
2249                             SRC1_NEG(0),
2250                             INDEX_MODE(SQ_INDEX_LOOP),
2251                             PRED_SEL(SQ_PRED_SEL_OFF),
2252                             LAST(1));
2253    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2254                                 SRC1_ABS(0),
2255                                 UPDATE_EXECUTE_MASK(0),
2256                                 UPDATE_PRED(0),
2257                                 WRITE_MASK(0),
2258                                 OMOD(SQ_ALU_OMOD_OFF),
2259                                 ALU_INST(SQ_OP2_INST_DOT4),
2260                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2261                                 DST_GPR(2),
2262                                 DST_REL(ABSOLUTE),
2263                                 DST_ELEM(ELEM_W),
2264                                 CLAMP(0));
2265
2266    /* 42 srcX / w */
2267    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2268                             SRC0_REL(ABSOLUTE),
2269                             SRC0_ELEM(ELEM_X),
2270                             SRC0_NEG(0),
2271                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2272                             SRC1_REL(ABSOLUTE),
2273                             SRC1_ELEM(ELEM_W),
2274                             SRC1_NEG(0),
2275                             INDEX_MODE(SQ_INDEX_AR_X),
2276                             PRED_SEL(SQ_PRED_SEL_OFF),
2277                             LAST(1));
2278    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2279                                 SRC1_ABS(0),
2280                                 UPDATE_EXECUTE_MASK(0),
2281                                 UPDATE_PRED(0),
2282                                 WRITE_MASK(1),
2283                                 OMOD(SQ_ALU_OMOD_OFF),
2284                                 ALU_INST(SQ_OP2_INST_MUL),
2285                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2286                                 DST_GPR(0),
2287                                 DST_REL(ABSOLUTE),
2288                                 DST_ELEM(ELEM_X),
2289                                 CLAMP(0));
2290
2291    /* 43 srcY / h */
2292    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2293                             SRC0_REL(ABSOLUTE),
2294                             SRC0_ELEM(ELEM_Y),
2295                             SRC0_NEG(0),
2296                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2297                             SRC1_REL(ABSOLUTE),
2298                             SRC1_ELEM(ELEM_W),
2299                             SRC1_NEG(0),
2300                             INDEX_MODE(SQ_INDEX_AR_X),
2301                             PRED_SEL(SQ_PRED_SEL_OFF),
2302                             LAST(1));
2303    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2304                                 SRC1_ABS(0),
2305                                 UPDATE_EXECUTE_MASK(0),
2306                                 UPDATE_PRED(0),
2307                                 WRITE_MASK(1),
2308                                 OMOD(SQ_ALU_OMOD_OFF),
2309                                 ALU_INST(SQ_OP2_INST_MUL),
2310                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2311                                 DST_GPR(0),
2312                                 DST_REL(ABSOLUTE),
2313                                 DST_ELEM(ELEM_Y),
2314                                 CLAMP(0));
2315
2316    /* mask vfetch - 44/45 - dst */
2317    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2318			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2319			     FETCH_WHOLE_QUAD(0),
2320			     BUFFER_ID(0),
2321			     SRC_GPR(0),
2322			     SRC_REL(ABSOLUTE),
2323			     SRC_SEL_X(SQ_SEL_X),
2324			     MEGA_FETCH_COUNT(24));
2325    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2326				 DST_REL(0),
2327				 DST_SEL_X(SQ_SEL_X),
2328				 DST_SEL_Y(SQ_SEL_Y),
2329				 DST_SEL_Z(SQ_SEL_0),
2330				 DST_SEL_W(SQ_SEL_1),
2331				 USE_CONST_FIELDS(0),
2332				 DATA_FORMAT(FMT_32_32_FLOAT),
2333				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2334				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2335				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2336    shader[i++] = VTX_DWORD2(OFFSET(0),
2337#if X_BYTE_ORDER == X_BIG_ENDIAN
2338                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2339#else
2340                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2341#endif
2342			     CONST_BUF_NO_STRIDE(0),
2343			     MEGA_FETCH(1),
2344			     ALT_CONST(0),
2345			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2346    shader[i++] = VTX_DWORD_PAD;
2347    /* 46/47 - src */
2348    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2349			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2350			     FETCH_WHOLE_QUAD(0),
2351			     BUFFER_ID(0),
2352			     SRC_GPR(0),
2353			     SRC_REL(ABSOLUTE),
2354			     SRC_SEL_X(SQ_SEL_X),
2355			     MEGA_FETCH_COUNT(8));
2356    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2357				 DST_REL(0),
2358				 DST_SEL_X(SQ_SEL_X),
2359				 DST_SEL_Y(SQ_SEL_Y),
2360				 DST_SEL_Z(SQ_SEL_1),
2361				 DST_SEL_W(SQ_SEL_0),
2362				 USE_CONST_FIELDS(0),
2363				 DATA_FORMAT(FMT_32_32_FLOAT),
2364				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2365				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2366				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2367    shader[i++] = VTX_DWORD2(OFFSET(8),
2368#if X_BYTE_ORDER == X_BIG_ENDIAN
2369                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2370#else
2371                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2372#endif
2373			     CONST_BUF_NO_STRIDE(0),
2374			     MEGA_FETCH(0),
2375			     ALT_CONST(0),
2376			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2377    shader[i++] = VTX_DWORD_PAD;
2378    /* 48/49 - mask */
2379    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2380			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2381			     FETCH_WHOLE_QUAD(0),
2382			     BUFFER_ID(0),
2383			     SRC_GPR(0),
2384			     SRC_REL(ABSOLUTE),
2385			     SRC_SEL_X(SQ_SEL_X),
2386			     MEGA_FETCH_COUNT(8));
2387    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2388				 DST_REL(0),
2389				 DST_SEL_X(SQ_SEL_X),
2390				 DST_SEL_Y(SQ_SEL_Y),
2391				 DST_SEL_Z(SQ_SEL_1),
2392				 DST_SEL_W(SQ_SEL_0),
2393				 USE_CONST_FIELDS(0),
2394				 DATA_FORMAT(FMT_32_32_FLOAT),
2395				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2396				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2397				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2398    shader[i++] = VTX_DWORD2(OFFSET(16),
2399#if X_BYTE_ORDER == X_BIG_ENDIAN
2400                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2401#else
2402                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2403#endif
2404			     CONST_BUF_NO_STRIDE(0),
2405			     MEGA_FETCH(0),
2406			     ALT_CONST(0),
2407			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2408    shader[i++] = VTX_DWORD_PAD;
2409
2410    /* no mask vfetch - 50/51 - dst */
2411    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2412			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2413			     FETCH_WHOLE_QUAD(0),
2414			     BUFFER_ID(0),
2415			     SRC_GPR(0),
2416			     SRC_REL(ABSOLUTE),
2417			     SRC_SEL_X(SQ_SEL_X),
2418			     MEGA_FETCH_COUNT(16));
2419    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2420				 DST_REL(0),
2421				 DST_SEL_X(SQ_SEL_X),
2422				 DST_SEL_Y(SQ_SEL_Y),
2423				 DST_SEL_Z(SQ_SEL_0),
2424				 DST_SEL_W(SQ_SEL_1),
2425				 USE_CONST_FIELDS(0),
2426				 DATA_FORMAT(FMT_32_32_FLOAT),
2427				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2428				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2429				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2430    shader[i++] = VTX_DWORD2(OFFSET(0),
2431#if X_BYTE_ORDER == X_BIG_ENDIAN
2432                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2433#else
2434                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2435#endif
2436			     CONST_BUF_NO_STRIDE(0),
2437			     MEGA_FETCH(1),
2438			     ALT_CONST(0),
2439			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2440    shader[i++] = VTX_DWORD_PAD;
2441    /* 52/53 - src */
2442    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2443			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2444			     FETCH_WHOLE_QUAD(0),
2445			     BUFFER_ID(0),
2446			     SRC_GPR(0),
2447			     SRC_REL(ABSOLUTE),
2448			     SRC_SEL_X(SQ_SEL_X),
2449			     MEGA_FETCH_COUNT(8));
2450    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2451				 DST_REL(0),
2452				 DST_SEL_X(SQ_SEL_X),
2453				 DST_SEL_Y(SQ_SEL_Y),
2454				 DST_SEL_Z(SQ_SEL_1),
2455				 DST_SEL_W(SQ_SEL_0),
2456				 USE_CONST_FIELDS(0),
2457				 DATA_FORMAT(FMT_32_32_FLOAT),
2458				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2459				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2460				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2461    shader[i++] = VTX_DWORD2(OFFSET(8),
2462#if X_BYTE_ORDER == X_BIG_ENDIAN
2463                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2464#else
2465                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2466#endif
2467			     CONST_BUF_NO_STRIDE(0),
2468			     MEGA_FETCH(0),
2469                             ALT_CONST(0),
2470                             BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2471    shader[i++] = VTX_DWORD_PAD;
2472
2473    return i;
2474}
2475
2476/* comp ps --------------------------------------- */
2477int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2478{
2479    int i = 0;
2480
2481    /* 0 */
2482    shader[i++] = CF_DWORD0(ADDR(3),
2483			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2484    shader[i++] = CF_DWORD1(POP_COUNT(0),
2485                            CF_CONST(0),
2486                            COND(SQ_CF_COND_BOOL),
2487                            I_COUNT(0),
2488                            VALID_PIXEL_MODE(0),
2489                            END_OF_PROGRAM(0),
2490                            CF_INST(SQ_CF_INST_CALL),
2491                            WHOLE_QUAD_MODE(0),
2492                            BARRIER(0));
2493    /* 1 */
2494    shader[i++] = CF_DWORD0(ADDR(8),
2495			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2496    shader[i++] = CF_DWORD1(POP_COUNT(0),
2497                            CF_CONST(0),
2498                            COND(SQ_CF_COND_NOT_BOOL),
2499                            I_COUNT(0),
2500                            VALID_PIXEL_MODE(0),
2501                            END_OF_PROGRAM(0),
2502                            CF_INST(SQ_CF_INST_CALL),
2503                            WHOLE_QUAD_MODE(0),
2504                            BARRIER(0));
2505    /* 2 */
2506    shader[i++] = CF_DWORD0(ADDR(0),
2507                            JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2508    shader[i++] = CF_DWORD1(POP_COUNT(0),
2509                            CF_CONST(0),
2510                            COND(SQ_CF_COND_ACTIVE),
2511                            I_COUNT(0),
2512                            VALID_PIXEL_MODE(0),
2513                            END_OF_PROGRAM(1),
2514                            CF_INST(SQ_CF_INST_NOP),
2515                            WHOLE_QUAD_MODE(0),
2516                            BARRIER(1));
2517
2518    /* 3 - mask sub */
2519    shader[i++] = CF_ALU_DWORD0(ADDR(12),
2520				KCACHE_BANK0(0),
2521				KCACHE_BANK1(0),
2522				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2523    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2524				KCACHE_ADDR0(0),
2525				KCACHE_ADDR1(0),
2526				I_COUNT(8),
2527				ALT_CONST(0),
2528				CF_INST(SQ_CF_INST_ALU),
2529				WHOLE_QUAD_MODE(0),
2530				BARRIER(1));
2531
2532    /* 4 */
2533    shader[i++] = CF_DWORD0(ADDR(28),
2534			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2535    shader[i++] = CF_DWORD1(POP_COUNT(0),
2536			    CF_CONST(0),
2537			    COND(SQ_CF_COND_ACTIVE),
2538			    I_COUNT(2),
2539			    VALID_PIXEL_MODE(0),
2540			    END_OF_PROGRAM(0),
2541			    CF_INST(SQ_CF_INST_TC),
2542			    WHOLE_QUAD_MODE(0),
2543			    BARRIER(1));
2544
2545    /* 5 */
2546    shader[i++] = CF_ALU_DWORD0(ADDR(20),
2547				KCACHE_BANK0(0),
2548				KCACHE_BANK1(0),
2549				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2550    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2551				KCACHE_ADDR0(0),
2552				KCACHE_ADDR1(0),
2553				I_COUNT(4),
2554				ALT_CONST(0),
2555				CF_INST(SQ_CF_INST_ALU),
2556				WHOLE_QUAD_MODE(0),
2557				BARRIER(1));
2558
2559    /* 6 */
2560    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2561					  TYPE(SQ_EXPORT_PIXEL),
2562					  RW_GPR(2),
2563					  RW_REL(ABSOLUTE),
2564					  INDEX_GPR(0),
2565					  ELEM_SIZE(1));
2566
2567    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2568					       SRC_SEL_Y(SQ_SEL_Y),
2569					       SRC_SEL_Z(SQ_SEL_Z),
2570					       SRC_SEL_W(SQ_SEL_W),
2571					       BURST_COUNT(1),
2572					       VALID_PIXEL_MODE(0),
2573					       END_OF_PROGRAM(0),
2574					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2575					       MARK(0),
2576					       BARRIER(1));
2577    /* 7 */
2578    shader[i++] = CF_DWORD0(ADDR(0),
2579			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2580    shader[i++] = CF_DWORD1(POP_COUNT(0),
2581			    CF_CONST(0),
2582			    COND(SQ_CF_COND_ACTIVE),
2583			    I_COUNT(0),
2584			    VALID_PIXEL_MODE(0),
2585			    END_OF_PROGRAM(0),
2586			    CF_INST(SQ_CF_INST_RETURN),
2587			    WHOLE_QUAD_MODE(0),
2588			    BARRIER(1));
2589
2590    /* 8 - non-mask sub */
2591    shader[i++] = CF_ALU_DWORD0(ADDR(24),
2592				KCACHE_BANK0(0),
2593				KCACHE_BANK1(0),
2594				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2595    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2596				KCACHE_ADDR0(0),
2597				KCACHE_ADDR1(0),
2598				I_COUNT(4),
2599				ALT_CONST(0),
2600				CF_INST(SQ_CF_INST_ALU),
2601				WHOLE_QUAD_MODE(0),
2602				BARRIER(1));
2603    /* 9 */
2604    shader[i++] = CF_DWORD0(ADDR(32),
2605			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2606    shader[i++] = CF_DWORD1(POP_COUNT(0),
2607			    CF_CONST(0),
2608			    COND(SQ_CF_COND_ACTIVE),
2609			    I_COUNT(1),
2610			    VALID_PIXEL_MODE(0),
2611			    END_OF_PROGRAM(0),
2612			    CF_INST(SQ_CF_INST_TC),
2613			    WHOLE_QUAD_MODE(0),
2614			    BARRIER(1));
2615
2616    /* 10 */
2617    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2618					  TYPE(SQ_EXPORT_PIXEL),
2619					  RW_GPR(0),
2620					  RW_REL(ABSOLUTE),
2621					  INDEX_GPR(0),
2622					  ELEM_SIZE(1));
2623    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2624					       SRC_SEL_Y(SQ_SEL_Y),
2625					       SRC_SEL_Z(SQ_SEL_Z),
2626					       SRC_SEL_W(SQ_SEL_W),
2627					       BURST_COUNT(1),
2628					       VALID_PIXEL_MODE(0),
2629					       END_OF_PROGRAM(0),
2630					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2631					       MARK(0),
2632					       BARRIER(1));
2633
2634    /* 11 */
2635    shader[i++] = CF_DWORD0(ADDR(0),
2636			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2637    shader[i++] = CF_DWORD1(POP_COUNT(0),
2638			    CF_CONST(0),
2639			    COND(SQ_CF_COND_ACTIVE),
2640			    I_COUNT(0),
2641			    VALID_PIXEL_MODE(0),
2642			    END_OF_PROGRAM(0),
2643			    CF_INST(SQ_CF_INST_RETURN),
2644			    WHOLE_QUAD_MODE(0),
2645			    BARRIER(1));
2646
2647    /* 12 interpolate src tex coords - mask */
2648    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2649			     SRC0_REL(ABSOLUTE),
2650			     SRC0_ELEM(ELEM_Y),
2651			     SRC0_NEG(0),
2652			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2653			     SRC1_REL(ABSOLUTE),
2654			     SRC1_ELEM(ELEM_X),
2655			     SRC1_NEG(0),
2656			     INDEX_MODE(SQ_INDEX_AR_X),
2657			     PRED_SEL(SQ_PRED_SEL_OFF),
2658			     LAST(0));
2659    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2660				 SRC1_ABS(0),
2661				 UPDATE_EXECUTE_MASK(0),
2662				 UPDATE_PRED(0),
2663				 WRITE_MASK(1),
2664				 OMOD(SQ_ALU_OMOD_OFF),
2665				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2666				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2667				 DST_GPR(1),
2668				 DST_REL(ABSOLUTE),
2669				 DST_ELEM(ELEM_X),
2670				 CLAMP(0));
2671    /* 13 */
2672    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2673			     SRC0_REL(ABSOLUTE),
2674			     SRC0_ELEM(ELEM_X),
2675			     SRC0_NEG(0),
2676			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2677			     SRC1_REL(ABSOLUTE),
2678			     SRC1_ELEM(ELEM_X),
2679			     SRC1_NEG(0),
2680			     INDEX_MODE(SQ_INDEX_AR_X),
2681			     PRED_SEL(SQ_PRED_SEL_OFF),
2682			     LAST(0));
2683    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2684				 SRC1_ABS(0),
2685				 UPDATE_EXECUTE_MASK(0),
2686				 UPDATE_PRED(0),
2687				 WRITE_MASK(1),
2688				 OMOD(SQ_ALU_OMOD_OFF),
2689				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2690				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2691				 DST_GPR(1),
2692				 DST_REL(ABSOLUTE),
2693				 DST_ELEM(ELEM_Y),
2694				 CLAMP(0));
2695    /* 14 */
2696    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2697			     SRC0_REL(ABSOLUTE),
2698			     SRC0_ELEM(ELEM_Y),
2699			     SRC0_NEG(0),
2700			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2701			     SRC1_REL(ABSOLUTE),
2702			     SRC1_ELEM(ELEM_X),
2703			     SRC1_NEG(0),
2704			     INDEX_MODE(SQ_INDEX_AR_X),
2705			     PRED_SEL(SQ_PRED_SEL_OFF),
2706			     LAST(0));
2707    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2708				 SRC1_ABS(0),
2709				 UPDATE_EXECUTE_MASK(0),
2710				 UPDATE_PRED(0),
2711				 WRITE_MASK(0),
2712				 OMOD(SQ_ALU_OMOD_OFF),
2713				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2714				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2715				 DST_GPR(1),
2716				 DST_REL(ABSOLUTE),
2717				 DST_ELEM(ELEM_Z),
2718				 CLAMP(0));
2719    /* 15 */
2720    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2721			     SRC0_REL(ABSOLUTE),
2722			     SRC0_ELEM(ELEM_X),
2723			     SRC0_NEG(0),
2724			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2725			     SRC1_REL(ABSOLUTE),
2726			     SRC1_ELEM(ELEM_X),
2727			     SRC1_NEG(0),
2728			     INDEX_MODE(SQ_INDEX_AR_X),
2729			     PRED_SEL(SQ_PRED_SEL_OFF),
2730			     LAST(1));
2731    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2732				 SRC1_ABS(0),
2733				 UPDATE_EXECUTE_MASK(0),
2734				 UPDATE_PRED(0),
2735				 WRITE_MASK(0),
2736				 OMOD(SQ_ALU_OMOD_OFF),
2737				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2738				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2739				 DST_GPR(1),
2740				 DST_REL(ABSOLUTE),
2741				 DST_ELEM(ELEM_W),
2742				 CLAMP(0));
2743
2744    /* 16 interpolate mask tex coords */
2745    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2746			     SRC0_REL(ABSOLUTE),
2747			     SRC0_ELEM(ELEM_Y),
2748			     SRC0_NEG(0),
2749			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2750			     SRC1_REL(ABSOLUTE),
2751			     SRC1_ELEM(ELEM_X),
2752			     SRC1_NEG(0),
2753			     INDEX_MODE(SQ_INDEX_AR_X),
2754			     PRED_SEL(SQ_PRED_SEL_OFF),
2755			     LAST(0));
2756    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2757				 SRC1_ABS(0),
2758				 UPDATE_EXECUTE_MASK(0),
2759				 UPDATE_PRED(0),
2760				 WRITE_MASK(1),
2761				 OMOD(SQ_ALU_OMOD_OFF),
2762				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2763				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2764				 DST_GPR(0),
2765				 DST_REL(ABSOLUTE),
2766				 DST_ELEM(ELEM_X),
2767				 CLAMP(0));
2768    /* 17 */
2769    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2770			     SRC0_REL(ABSOLUTE),
2771			     SRC0_ELEM(ELEM_X),
2772			     SRC0_NEG(0),
2773			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2774			     SRC1_REL(ABSOLUTE),
2775			     SRC1_ELEM(ELEM_X),
2776			     SRC1_NEG(0),
2777			     INDEX_MODE(SQ_INDEX_AR_X),
2778			     PRED_SEL(SQ_PRED_SEL_OFF),
2779			     LAST(0));
2780    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2781				 SRC1_ABS(0),
2782				 UPDATE_EXECUTE_MASK(0),
2783				 UPDATE_PRED(0),
2784				 WRITE_MASK(1),
2785				 OMOD(SQ_ALU_OMOD_OFF),
2786				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2787				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2788				 DST_GPR(0),
2789				 DST_REL(ABSOLUTE),
2790				 DST_ELEM(ELEM_Y),
2791				 CLAMP(0));
2792    /* 18 */
2793    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2794			     SRC0_REL(ABSOLUTE),
2795			     SRC0_ELEM(ELEM_Y),
2796			     SRC0_NEG(0),
2797			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2798			     SRC1_REL(ABSOLUTE),
2799			     SRC1_ELEM(ELEM_X),
2800			     SRC1_NEG(0),
2801			     INDEX_MODE(SQ_INDEX_AR_X),
2802			     PRED_SEL(SQ_PRED_SEL_OFF),
2803			     LAST(0));
2804    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2805				 SRC1_ABS(0),
2806				 UPDATE_EXECUTE_MASK(0),
2807				 UPDATE_PRED(0),
2808				 WRITE_MASK(0),
2809				 OMOD(SQ_ALU_OMOD_OFF),
2810				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2811				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2812				 DST_GPR(0),
2813				 DST_REL(ABSOLUTE),
2814				 DST_ELEM(ELEM_Z),
2815				 CLAMP(0));
2816    /* 19 */
2817    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2818			     SRC0_REL(ABSOLUTE),
2819			     SRC0_ELEM(ELEM_X),
2820			     SRC0_NEG(0),
2821			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2822			     SRC1_REL(ABSOLUTE),
2823			     SRC1_ELEM(ELEM_X),
2824			     SRC1_NEG(0),
2825			     INDEX_MODE(SQ_INDEX_AR_X),
2826			     PRED_SEL(SQ_PRED_SEL_OFF),
2827			     LAST(1));
2828    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2829				 SRC1_ABS(0),
2830				 UPDATE_EXECUTE_MASK(0),
2831				 UPDATE_PRED(0),
2832				 WRITE_MASK(0),
2833				 OMOD(SQ_ALU_OMOD_OFF),
2834				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2835				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2836				 DST_GPR(0),
2837				 DST_REL(ABSOLUTE),
2838				 DST_ELEM(ELEM_W),
2839				 CLAMP(0));
2840
2841    /* 20 - alu 0 */
2842    /* MUL gpr[2].x gpr[0].x gpr[1].x */
2843    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2844			     SRC0_REL(ABSOLUTE),
2845			     SRC0_ELEM(ELEM_X),
2846			     SRC0_NEG(0),
2847			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2848			     SRC1_REL(ABSOLUTE),
2849			     SRC1_ELEM(ELEM_X),
2850			     SRC1_NEG(0),
2851			     INDEX_MODE(SQ_INDEX_LOOP),
2852			     PRED_SEL(SQ_PRED_SEL_OFF),
2853			     LAST(0));
2854    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2855				 SRC1_ABS(0),
2856				 UPDATE_EXECUTE_MASK(0),
2857				 UPDATE_PRED(0),
2858				 WRITE_MASK(1),
2859				 OMOD(SQ_ALU_OMOD_OFF),
2860				 ALU_INST(SQ_OP2_INST_MUL),
2861				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2862				 DST_GPR(2),
2863				 DST_REL(ABSOLUTE),
2864				 DST_ELEM(ELEM_X),
2865				 CLAMP(1));
2866    /* 21 - alu 1 */
2867    /* MUL gpr[2].y gpr[0].y gpr[1].y */
2868    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2869			     SRC0_REL(ABSOLUTE),
2870			     SRC0_ELEM(ELEM_Y),
2871			     SRC0_NEG(0),
2872			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2873			     SRC1_REL(ABSOLUTE),
2874			     SRC1_ELEM(ELEM_Y),
2875			     SRC1_NEG(0),
2876			     INDEX_MODE(SQ_INDEX_LOOP),
2877			     PRED_SEL(SQ_PRED_SEL_OFF),
2878			     LAST(0));
2879    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2880				 SRC1_ABS(0),
2881				 UPDATE_EXECUTE_MASK(0),
2882				 UPDATE_PRED(0),
2883				 WRITE_MASK(1),
2884				 OMOD(SQ_ALU_OMOD_OFF),
2885				 ALU_INST(SQ_OP2_INST_MUL),
2886				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2887				 DST_GPR(2),
2888				 DST_REL(ABSOLUTE),
2889				 DST_ELEM(ELEM_Y),
2890				 CLAMP(1));
2891    /* 22 - alu 2 */
2892    /* MUL gpr[2].z gpr[0].z gpr[1].z */
2893    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2894			     SRC0_REL(ABSOLUTE),
2895			     SRC0_ELEM(ELEM_Z),
2896			     SRC0_NEG(0),
2897			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2898			     SRC1_REL(ABSOLUTE),
2899			     SRC1_ELEM(ELEM_Z),
2900			     SRC1_NEG(0),
2901			     INDEX_MODE(SQ_INDEX_LOOP),
2902			     PRED_SEL(SQ_PRED_SEL_OFF),
2903			     LAST(0));
2904    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2905				 SRC1_ABS(0),
2906				 UPDATE_EXECUTE_MASK(0),
2907				 UPDATE_PRED(0),
2908				 WRITE_MASK(1),
2909				 OMOD(SQ_ALU_OMOD_OFF),
2910				 ALU_INST(SQ_OP2_INST_MUL),
2911				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2912				 DST_GPR(2),
2913				 DST_REL(ABSOLUTE),
2914				 DST_ELEM(ELEM_Z),
2915				 CLAMP(1));
2916    /* 23 - alu 3 */
2917    /* MUL gpr[2].w gpr[0].w gpr[1].w */
2918    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2919			     SRC0_REL(ABSOLUTE),
2920			     SRC0_ELEM(ELEM_W),
2921			     SRC0_NEG(0),
2922			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2923			     SRC1_REL(ABSOLUTE),
2924			     SRC1_ELEM(ELEM_W),
2925			     SRC1_NEG(0),
2926			     INDEX_MODE(SQ_INDEX_LOOP),
2927			     PRED_SEL(SQ_PRED_SEL_OFF),
2928			     LAST(1));
2929    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2930				 SRC1_ABS(0),
2931				 UPDATE_EXECUTE_MASK(0),
2932				 UPDATE_PRED(0),
2933				 WRITE_MASK(1),
2934				 OMOD(SQ_ALU_OMOD_OFF),
2935				 ALU_INST(SQ_OP2_INST_MUL),
2936				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2937				 DST_GPR(2),
2938				 DST_REL(ABSOLUTE),
2939				 DST_ELEM(ELEM_W),
2940				 CLAMP(1));
2941
2942    /* 24 - interpolate tex coords - non-mask */
2943    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2944			     SRC0_REL(ABSOLUTE),
2945			     SRC0_ELEM(ELEM_Y),
2946			     SRC0_NEG(0),
2947			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2948			     SRC1_REL(ABSOLUTE),
2949			     SRC1_ELEM(ELEM_X),
2950			     SRC1_NEG(0),
2951			     INDEX_MODE(SQ_INDEX_AR_X),
2952			     PRED_SEL(SQ_PRED_SEL_OFF),
2953			     LAST(0));
2954    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2955				 SRC1_ABS(0),
2956				 UPDATE_EXECUTE_MASK(0),
2957				 UPDATE_PRED(0),
2958				 WRITE_MASK(1),
2959				 OMOD(SQ_ALU_OMOD_OFF),
2960				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2961				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2962				 DST_GPR(0),
2963				 DST_REL(ABSOLUTE),
2964				 DST_ELEM(ELEM_X),
2965				 CLAMP(0));
2966    /* 25 */
2967    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2968			     SRC0_REL(ABSOLUTE),
2969			     SRC0_ELEM(ELEM_X),
2970			     SRC0_NEG(0),
2971			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2972			     SRC1_REL(ABSOLUTE),
2973			     SRC1_ELEM(ELEM_X),
2974			     SRC1_NEG(0),
2975			     INDEX_MODE(SQ_INDEX_AR_X),
2976			     PRED_SEL(SQ_PRED_SEL_OFF),
2977			     LAST(0));
2978    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2979				 SRC1_ABS(0),
2980				 UPDATE_EXECUTE_MASK(0),
2981				 UPDATE_PRED(0),
2982				 WRITE_MASK(1),
2983				 OMOD(SQ_ALU_OMOD_OFF),
2984				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2985				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2986				 DST_GPR(0),
2987				 DST_REL(ABSOLUTE),
2988				 DST_ELEM(ELEM_Y),
2989				 CLAMP(0));
2990    /* 26 */
2991    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2992			     SRC0_REL(ABSOLUTE),
2993			     SRC0_ELEM(ELEM_Y),
2994			     SRC0_NEG(0),
2995			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2996			     SRC1_REL(ABSOLUTE),
2997			     SRC1_ELEM(ELEM_X),
2998			     SRC1_NEG(0),
2999			     INDEX_MODE(SQ_INDEX_AR_X),
3000			     PRED_SEL(SQ_PRED_SEL_OFF),
3001			     LAST(0));
3002    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3003				 SRC1_ABS(0),
3004				 UPDATE_EXECUTE_MASK(0),
3005				 UPDATE_PRED(0),
3006				 WRITE_MASK(0),
3007				 OMOD(SQ_ALU_OMOD_OFF),
3008				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3009				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3010				 DST_GPR(0),
3011				 DST_REL(ABSOLUTE),
3012				 DST_ELEM(ELEM_Z),
3013				 CLAMP(0));
3014    /* 27 */
3015    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3016			     SRC0_REL(ABSOLUTE),
3017			     SRC0_ELEM(ELEM_X),
3018			     SRC0_NEG(0),
3019			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
3020			     SRC1_REL(ABSOLUTE),
3021			     SRC1_ELEM(ELEM_X),
3022			     SRC1_NEG(0),
3023			     INDEX_MODE(SQ_INDEX_AR_X),
3024			     PRED_SEL(SQ_PRED_SEL_OFF),
3025			     LAST(1));
3026    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3027				 SRC1_ABS(0),
3028				 UPDATE_EXECUTE_MASK(0),
3029				 UPDATE_PRED(0),
3030				 WRITE_MASK(0),
3031				 OMOD(SQ_ALU_OMOD_OFF),
3032				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3033				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3034				 DST_GPR(0),
3035				 DST_REL(ABSOLUTE),
3036				 DST_ELEM(ELEM_W),
3037				 CLAMP(0));
3038
3039    /* 28/29 - src - mask */
3040    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3041			     INST_MOD(0),
3042			     FETCH_WHOLE_QUAD(0),
3043			     RESOURCE_ID(0),
3044			     SRC_GPR(1),
3045			     SRC_REL(ABSOLUTE),
3046			     ALT_CONST(0),
3047			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3048			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3049    shader[i++] = TEX_DWORD1(DST_GPR(1),
3050			     DST_REL(ABSOLUTE),
3051			     DST_SEL_X(SQ_SEL_X),
3052			     DST_SEL_Y(SQ_SEL_Y),
3053			     DST_SEL_Z(SQ_SEL_Z),
3054			     DST_SEL_W(SQ_SEL_W),
3055			     LOD_BIAS(0),
3056			     COORD_TYPE_X(TEX_NORMALIZED),
3057			     COORD_TYPE_Y(TEX_NORMALIZED),
3058			     COORD_TYPE_Z(TEX_NORMALIZED),
3059			     COORD_TYPE_W(TEX_NORMALIZED));
3060    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3061			     OFFSET_Y(0),
3062			     OFFSET_Z(0),
3063			     SAMPLER_ID(0),
3064			     SRC_SEL_X(SQ_SEL_X),
3065			     SRC_SEL_Y(SQ_SEL_Y),
3066			     SRC_SEL_Z(SQ_SEL_0),
3067			     SRC_SEL_W(SQ_SEL_1));
3068    shader[i++] = TEX_DWORD_PAD;
3069    /* 30/31 - mask */
3070    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3071			     INST_MOD(0),
3072			     FETCH_WHOLE_QUAD(0),
3073			     RESOURCE_ID(1),
3074			     SRC_GPR(0),
3075			     SRC_REL(ABSOLUTE),
3076                             ALT_CONST(0),
3077                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3078                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3079    shader[i++] = TEX_DWORD1(DST_GPR(0),
3080			     DST_REL(ABSOLUTE),
3081			     DST_SEL_X(SQ_SEL_X),
3082			     DST_SEL_Y(SQ_SEL_Y),
3083			     DST_SEL_Z(SQ_SEL_Z),
3084			     DST_SEL_W(SQ_SEL_W),
3085			     LOD_BIAS(0),
3086			     COORD_TYPE_X(TEX_NORMALIZED),
3087			     COORD_TYPE_Y(TEX_NORMALIZED),
3088			     COORD_TYPE_Z(TEX_NORMALIZED),
3089			     COORD_TYPE_W(TEX_NORMALIZED));
3090    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3091			     OFFSET_Y(0),
3092			     OFFSET_Z(0),
3093			     SAMPLER_ID(1),
3094			     SRC_SEL_X(SQ_SEL_X),
3095			     SRC_SEL_Y(SQ_SEL_Y),
3096			     SRC_SEL_Z(SQ_SEL_0),
3097			     SRC_SEL_W(SQ_SEL_1));
3098    shader[i++] = TEX_DWORD_PAD;
3099
3100    /* 32/33 - src - non-mask */
3101    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3102			     INST_MOD(0),
3103			     FETCH_WHOLE_QUAD(0),
3104			     RESOURCE_ID(0),
3105			     SRC_GPR(0),
3106			     SRC_REL(ABSOLUTE),
3107			     ALT_CONST(0),
3108			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3109			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3110    shader[i++] = TEX_DWORD1(DST_GPR(0),
3111			     DST_REL(ABSOLUTE),
3112			     DST_SEL_X(SQ_SEL_X),
3113			     DST_SEL_Y(SQ_SEL_Y),
3114			     DST_SEL_Z(SQ_SEL_Z),
3115			     DST_SEL_W(SQ_SEL_W),
3116			     LOD_BIAS(0),
3117			     COORD_TYPE_X(TEX_NORMALIZED),
3118			     COORD_TYPE_Y(TEX_NORMALIZED),
3119			     COORD_TYPE_Z(TEX_NORMALIZED),
3120			     COORD_TYPE_W(TEX_NORMALIZED));
3121    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3122			     OFFSET_Y(0),
3123			     OFFSET_Z(0),
3124			     SAMPLER_ID(0),
3125			     SRC_SEL_X(SQ_SEL_X),
3126			     SRC_SEL_Y(SQ_SEL_Y),
3127			     SRC_SEL_Z(SQ_SEL_0),
3128			     SRC_SEL_W(SQ_SEL_1));
3129    shader[i++] = TEX_DWORD_PAD;
3130
3131    return i;
3132}
3133
3134#endif
3135