1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "evergreen_shader.h"
34#include "evergreen_reg.h"
35
36/* solid vs --------------------------------------- */
37int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
38{
39    int i = 0;
40
41    /* 0 */
42    shader[i++] = CF_DWORD0(ADDR(4),
43			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
44    shader[i++] = CF_DWORD1(POP_COUNT(0),
45			    CF_CONST(0),
46			    COND(SQ_CF_COND_ACTIVE),
47			    I_COUNT(1),
48			    VALID_PIXEL_MODE(0),
49			    END_OF_PROGRAM(0),
50			    CF_INST(SQ_CF_INST_VC),
51			    WHOLE_QUAD_MODE(0),
52			    BARRIER(1));
53    /* 1 */
54    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
55					  TYPE(SQ_EXPORT_POS),
56					  RW_GPR(1),
57					  RW_REL(ABSOLUTE),
58					  INDEX_GPR(0),
59					  ELEM_SIZE(0));
60    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
61					       SRC_SEL_Y(SQ_SEL_Y),
62					       SRC_SEL_Z(SQ_SEL_Z),
63					       SRC_SEL_W(SQ_SEL_W),
64					       BURST_COUNT(1),
65					       VALID_PIXEL_MODE(0),
66					       END_OF_PROGRAM(0),
67					       CF_INST(SQ_CF_INST_EXPORT_DONE),
68					       MARK(0),
69					       BARRIER(1));
70    /* 2 - always export a param whether it's used or not */
71    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
72					  TYPE(SQ_EXPORT_PARAM),
73					  RW_GPR(0),
74					  RW_REL(ABSOLUTE),
75					  INDEX_GPR(0),
76					  ELEM_SIZE(0));
77    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
78					       SRC_SEL_Y(SQ_SEL_Y),
79					       SRC_SEL_Z(SQ_SEL_Z),
80					       SRC_SEL_W(SQ_SEL_W),
81					       BURST_COUNT(0),
82					       VALID_PIXEL_MODE(0),
83					       END_OF_PROGRAM(1),
84					       CF_INST(SQ_CF_INST_EXPORT_DONE),
85					       MARK(0),
86					       BARRIER(0));
87    /* 3 - padding */
88    shader[i++] = 0x00000000;
89    shader[i++] = 0x00000000;
90    /* 4/5 */
91    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
92			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
93			     FETCH_WHOLE_QUAD(0),
94			     BUFFER_ID(0),
95			     SRC_GPR(0),
96			     SRC_REL(ABSOLUTE),
97			     SRC_SEL_X(SQ_SEL_X),
98			     MEGA_FETCH_COUNT(8));
99    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
100				 DST_REL(0),
101				 DST_SEL_X(SQ_SEL_X),
102				 DST_SEL_Y(SQ_SEL_Y),
103				 DST_SEL_Z(SQ_SEL_0),
104				 DST_SEL_W(SQ_SEL_1),
105				 USE_CONST_FIELDS(0),
106				 DATA_FORMAT(FMT_32_32_FLOAT),
107				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
108				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
109				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
110    shader[i++] = VTX_DWORD2(OFFSET(0),
111#if X_BYTE_ORDER == X_BIG_ENDIAN
112			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
113#else
114			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
115#endif
116			     CONST_BUF_NO_STRIDE(0),
117			     MEGA_FETCH(1),
118			     ALT_CONST(0),
119			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
120    shader[i++] = VTX_DWORD_PAD;
121
122    return i;
123}
124
125/* solid ps --------------------------------------- */
126int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
127{
128    int i = 0;
129
130    /* 0 */
131    shader[i++] = CF_ALU_DWORD0(ADDR(2),
132				KCACHE_BANK0(0),
133				KCACHE_BANK1(0),
134				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
135    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
136				KCACHE_ADDR0(0),
137				KCACHE_ADDR1(0),
138				I_COUNT(4),
139				ALT_CONST(0),
140				CF_INST(SQ_CF_INST_ALU),
141				WHOLE_QUAD_MODE(0),
142				BARRIER(1));
143    /* 1 */
144    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
145					  TYPE(SQ_EXPORT_PIXEL),
146					  RW_GPR(0),
147					  RW_REL(ABSOLUTE),
148					  INDEX_GPR(0),
149					  ELEM_SIZE(1));
150    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
151					       SRC_SEL_Y(SQ_SEL_Y),
152					       SRC_SEL_Z(SQ_SEL_Z),
153					       SRC_SEL_W(SQ_SEL_W),
154					       BURST_COUNT(1),
155					       VALID_PIXEL_MODE(0),
156					       END_OF_PROGRAM(1),
157					       CF_INST(SQ_CF_INST_EXPORT_DONE),
158					       MARK(0),
159					       BARRIER(1));
160
161    /* 2 */
162    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
163			     SRC0_REL(ABSOLUTE),
164			     SRC0_ELEM(ELEM_X),
165			     SRC0_NEG(0),
166			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
167			     SRC1_REL(ABSOLUTE),
168			     SRC1_ELEM(ELEM_X),
169			     SRC1_NEG(0),
170			     INDEX_MODE(SQ_INDEX_AR_X),
171			     PRED_SEL(SQ_PRED_SEL_OFF),
172			     LAST(0));
173    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
174				 SRC1_ABS(0),
175				 UPDATE_EXECUTE_MASK(0),
176				 UPDATE_PRED(0),
177				 WRITE_MASK(1),
178				 OMOD(SQ_ALU_OMOD_OFF),
179				 ALU_INST(SQ_OP2_INST_MOV),
180				 BANK_SWIZZLE(SQ_ALU_VEC_012),
181				 DST_GPR(0),
182				 DST_REL(ABSOLUTE),
183				 DST_ELEM(ELEM_X),
184				 CLAMP(1));
185    /* 3 */
186    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
187			     SRC0_REL(ABSOLUTE),
188			     SRC0_ELEM(ELEM_Y),
189			     SRC0_NEG(0),
190			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
191			     SRC1_REL(ABSOLUTE),
192			     SRC1_ELEM(ELEM_Y),
193			     SRC1_NEG(0),
194			     INDEX_MODE(SQ_INDEX_AR_X),
195			     PRED_SEL(SQ_PRED_SEL_OFF),
196			     LAST(0));
197    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
198				 SRC1_ABS(0),
199				 UPDATE_EXECUTE_MASK(0),
200				 UPDATE_PRED(0),
201				 WRITE_MASK(1),
202				 OMOD(SQ_ALU_OMOD_OFF),
203				 ALU_INST(SQ_OP2_INST_MOV),
204				 BANK_SWIZZLE(SQ_ALU_VEC_012),
205				 DST_GPR(0),
206				 DST_REL(ABSOLUTE),
207				 DST_ELEM(ELEM_Y),
208				 CLAMP(1));
209    /* 4 */
210    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
211			     SRC0_REL(ABSOLUTE),
212			     SRC0_ELEM(ELEM_Z),
213			     SRC0_NEG(0),
214			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
215			     SRC1_REL(ABSOLUTE),
216			     SRC1_ELEM(ELEM_Z),
217			     SRC1_NEG(0),
218			     INDEX_MODE(SQ_INDEX_AR_X),
219			     PRED_SEL(SQ_PRED_SEL_OFF),
220			     LAST(0));
221    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
222				 SRC1_ABS(0),
223				 UPDATE_EXECUTE_MASK(0),
224				 UPDATE_PRED(0),
225				 WRITE_MASK(1),
226				 OMOD(SQ_ALU_OMOD_OFF),
227				 ALU_INST(SQ_OP2_INST_MOV),
228				 BANK_SWIZZLE(SQ_ALU_VEC_012),
229				 DST_GPR(0),
230				 DST_REL(ABSOLUTE),
231				 DST_ELEM(ELEM_Z),
232				 CLAMP(1));
233    /* 5 */
234    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
235			     SRC0_REL(ABSOLUTE),
236			     SRC0_ELEM(ELEM_W),
237			     SRC0_NEG(0),
238			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
239			     SRC1_REL(ABSOLUTE),
240			     SRC1_ELEM(ELEM_W),
241			     SRC1_NEG(0),
242			     INDEX_MODE(SQ_INDEX_AR_X),
243			     PRED_SEL(SQ_PRED_SEL_OFF),
244			     LAST(1));
245    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
246				 SRC1_ABS(0),
247				 UPDATE_EXECUTE_MASK(0),
248				 UPDATE_PRED(0),
249				 WRITE_MASK(1),
250				 OMOD(SQ_ALU_OMOD_OFF),
251				 ALU_INST(SQ_OP2_INST_MOV),
252				 BANK_SWIZZLE(SQ_ALU_VEC_012),
253				 DST_GPR(0),
254				 DST_REL(ABSOLUTE),
255				 DST_ELEM(ELEM_W),
256				 CLAMP(1));
257
258    return i;
259}
260
261/* copy vs --------------------------------------- */
262int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
263{
264    int i = 0;
265
266    /* 0 */
267    shader[i++] = CF_DWORD0(ADDR(4),
268			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
269    shader[i++] = CF_DWORD1(POP_COUNT(0),
270			    CF_CONST(0),
271			    COND(SQ_CF_COND_ACTIVE),
272			    I_COUNT(2),
273			    VALID_PIXEL_MODE(0),
274			    END_OF_PROGRAM(0),
275			    CF_INST(SQ_CF_INST_VC),
276			    WHOLE_QUAD_MODE(0),
277			    BARRIER(1));
278    /* 1 */
279    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
280					  TYPE(SQ_EXPORT_POS),
281					  RW_GPR(1),
282					  RW_REL(ABSOLUTE),
283					  INDEX_GPR(0),
284					  ELEM_SIZE(0));
285    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
286					       SRC_SEL_Y(SQ_SEL_Y),
287					       SRC_SEL_Z(SQ_SEL_Z),
288					       SRC_SEL_W(SQ_SEL_W),
289					       BURST_COUNT(0),
290					       VALID_PIXEL_MODE(0),
291					       END_OF_PROGRAM(0),
292					       CF_INST(SQ_CF_INST_EXPORT_DONE),
293					       MARK(0),
294					       BARRIER(1));
295    /* 2 */
296    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
297					  TYPE(SQ_EXPORT_PARAM),
298					  RW_GPR(0),
299					  RW_REL(ABSOLUTE),
300					  INDEX_GPR(0),
301					  ELEM_SIZE(0));
302    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
303					       SRC_SEL_Y(SQ_SEL_Y),
304					       SRC_SEL_Z(SQ_SEL_Z),
305					       SRC_SEL_W(SQ_SEL_W),
306					       BURST_COUNT(0),
307					       VALID_PIXEL_MODE(0),
308					       END_OF_PROGRAM(1),
309					       CF_INST(SQ_CF_INST_EXPORT_DONE),
310					       MARK(0),
311					       BARRIER(0));
312    /* 3 */
313    shader[i++] = 0x00000000;
314    shader[i++] = 0x00000000;
315    /* 4/5 */
316    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
317			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
318			     FETCH_WHOLE_QUAD(0),
319			     BUFFER_ID(0),
320			     SRC_GPR(0),
321			     SRC_REL(ABSOLUTE),
322			     SRC_SEL_X(SQ_SEL_X),
323			     MEGA_FETCH_COUNT(16));
324    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
325				 DST_REL(0),
326				 DST_SEL_X(SQ_SEL_X),
327				 DST_SEL_Y(SQ_SEL_Y),
328				 DST_SEL_Z(SQ_SEL_0),
329				 DST_SEL_W(SQ_SEL_1),
330				 USE_CONST_FIELDS(0),
331				 DATA_FORMAT(FMT_32_32_FLOAT),
332				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
333				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
334				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
335    shader[i++] = VTX_DWORD2(OFFSET(0),
336#if X_BYTE_ORDER == X_BIG_ENDIAN
337                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
338#else
339                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
340#endif
341			     CONST_BUF_NO_STRIDE(0),
342			     MEGA_FETCH(1),
343			     ALT_CONST(0),
344			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
345    shader[i++] = VTX_DWORD_PAD;
346    /* 6/7 */
347    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
348			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
349			     FETCH_WHOLE_QUAD(0),
350			     BUFFER_ID(0),
351			     SRC_GPR(0),
352			     SRC_REL(ABSOLUTE),
353			     SRC_SEL_X(SQ_SEL_X),
354			     MEGA_FETCH_COUNT(8));
355    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
356				 DST_REL(0),
357				 DST_SEL_X(SQ_SEL_X),
358				 DST_SEL_Y(SQ_SEL_Y),
359				 DST_SEL_Z(SQ_SEL_0),
360				 DST_SEL_W(SQ_SEL_1),
361				 USE_CONST_FIELDS(0),
362				 DATA_FORMAT(FMT_32_32_FLOAT),
363				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
364				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
365				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
366    shader[i++] = VTX_DWORD2(OFFSET(8),
367#if X_BYTE_ORDER == X_BIG_ENDIAN
368                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
369#else
370                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
371#endif
372			     CONST_BUF_NO_STRIDE(0),
373			     MEGA_FETCH(0),
374			     ALT_CONST(0),
375			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
376    shader[i++] = VTX_DWORD_PAD;
377
378    return i;
379}
380
381/* copy ps --------------------------------------- */
382int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
383{
384    int i = 0;
385
386    /* CF INST 0 */
387    shader[i++] = CF_ALU_DWORD0(ADDR(3),
388				KCACHE_BANK0(0),
389				KCACHE_BANK1(0),
390				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
391    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
392				KCACHE_ADDR0(0),
393				KCACHE_ADDR1(0),
394				I_COUNT(4),
395				ALT_CONST(0),
396				CF_INST(SQ_CF_INST_ALU),
397				WHOLE_QUAD_MODE(0),
398				BARRIER(1));
399    /* CF INST 1 */
400    shader[i++] = CF_DWORD0(ADDR(8),
401			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
402    shader[i++] = CF_DWORD1(POP_COUNT(0),
403			    CF_CONST(0),
404			    COND(SQ_CF_COND_ACTIVE),
405			    I_COUNT(1),
406			    VALID_PIXEL_MODE(0),
407			    END_OF_PROGRAM(0),
408			    CF_INST(SQ_CF_INST_TC),
409			    WHOLE_QUAD_MODE(0),
410			    BARRIER(1));
411    /* CF INST 2 */
412    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
413					  TYPE(SQ_EXPORT_PIXEL),
414					  RW_GPR(0),
415					  RW_REL(ABSOLUTE),
416					  INDEX_GPR(0),
417					  ELEM_SIZE(1));
418    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
419					       SRC_SEL_Y(SQ_SEL_Y),
420					       SRC_SEL_Z(SQ_SEL_Z),
421					       SRC_SEL_W(SQ_SEL_W),
422					       BURST_COUNT(1),
423					       VALID_PIXEL_MODE(0),
424					       END_OF_PROGRAM(1),
425					       CF_INST(SQ_CF_INST_EXPORT_DONE),
426					       MARK(0),
427					       BARRIER(1));
428
429    /* 3 interpolate tex coords */
430    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
431			     SRC0_REL(ABSOLUTE),
432			     SRC0_ELEM(ELEM_Y),
433			     SRC0_NEG(0),
434			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
435			     SRC1_REL(ABSOLUTE),
436			     SRC1_ELEM(ELEM_X),
437			     SRC1_NEG(0),
438			     INDEX_MODE(SQ_INDEX_AR_X),
439			     PRED_SEL(SQ_PRED_SEL_OFF),
440			     LAST(0));
441    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
442				 SRC1_ABS(0),
443				 UPDATE_EXECUTE_MASK(0),
444				 UPDATE_PRED(0),
445				 WRITE_MASK(1),
446				 OMOD(SQ_ALU_OMOD_OFF),
447				 ALU_INST(SQ_OP2_INST_INTERP_XY),
448				 BANK_SWIZZLE(SQ_ALU_VEC_210),
449				 DST_GPR(0),
450				 DST_REL(ABSOLUTE),
451				 DST_ELEM(ELEM_X),
452				 CLAMP(0));
453    /* 4 */
454    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
455			     SRC0_REL(ABSOLUTE),
456			     SRC0_ELEM(ELEM_X),
457			     SRC0_NEG(0),
458			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
459			     SRC1_REL(ABSOLUTE),
460			     SRC1_ELEM(ELEM_X),
461			     SRC1_NEG(0),
462			     INDEX_MODE(SQ_INDEX_AR_X),
463			     PRED_SEL(SQ_PRED_SEL_OFF),
464			     LAST(0));
465    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
466				 SRC1_ABS(0),
467				 UPDATE_EXECUTE_MASK(0),
468				 UPDATE_PRED(0),
469				 WRITE_MASK(1),
470				 OMOD(SQ_ALU_OMOD_OFF),
471				 ALU_INST(SQ_OP2_INST_INTERP_XY),
472				 BANK_SWIZZLE(SQ_ALU_VEC_210),
473				 DST_GPR(0),
474				 DST_REL(ABSOLUTE),
475				 DST_ELEM(ELEM_Y),
476				 CLAMP(0));
477    /* 5 */
478    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
479			     SRC0_REL(ABSOLUTE),
480			     SRC0_ELEM(ELEM_Y),
481			     SRC0_NEG(0),
482			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
483			     SRC1_REL(ABSOLUTE),
484			     SRC1_ELEM(ELEM_X),
485			     SRC1_NEG(0),
486			     INDEX_MODE(SQ_INDEX_AR_X),
487			     PRED_SEL(SQ_PRED_SEL_OFF),
488			     LAST(0));
489    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
490				 SRC1_ABS(0),
491				 UPDATE_EXECUTE_MASK(0),
492				 UPDATE_PRED(0),
493				 WRITE_MASK(0),
494				 OMOD(SQ_ALU_OMOD_OFF),
495				 ALU_INST(SQ_OP2_INST_INTERP_XY),
496				 BANK_SWIZZLE(SQ_ALU_VEC_210),
497				 DST_GPR(0),
498				 DST_REL(ABSOLUTE),
499				 DST_ELEM(ELEM_Z),
500				 CLAMP(0));
501    /* 6 */
502    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
503			     SRC0_REL(ABSOLUTE),
504			     SRC0_ELEM(ELEM_X),
505			     SRC0_NEG(0),
506			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
507			     SRC1_REL(ABSOLUTE),
508			     SRC1_ELEM(ELEM_X),
509			     SRC1_NEG(0),
510			     INDEX_MODE(SQ_INDEX_AR_X),
511			     PRED_SEL(SQ_PRED_SEL_OFF),
512			     LAST(1));
513    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
514				 SRC1_ABS(0),
515				 UPDATE_EXECUTE_MASK(0),
516				 UPDATE_PRED(0),
517				 WRITE_MASK(0),
518				 OMOD(SQ_ALU_OMOD_OFF),
519				 ALU_INST(SQ_OP2_INST_INTERP_XY),
520				 BANK_SWIZZLE(SQ_ALU_VEC_210),
521				 DST_GPR(0),
522				 DST_REL(ABSOLUTE),
523				 DST_ELEM(ELEM_W),
524				 CLAMP(0));
525
526    /* 7 */
527    shader[i++] = 0x00000000;
528    shader[i++] = 0x00000000;
529
530    /* 8/9 TEX INST 0 */
531    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
532			     INST_MOD(0),
533			     FETCH_WHOLE_QUAD(0),
534			     RESOURCE_ID(0),
535			     SRC_GPR(0),
536			     SRC_REL(ABSOLUTE),
537			     ALT_CONST(0),
538			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
539			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
540    shader[i++] = TEX_DWORD1(DST_GPR(0),
541			     DST_REL(ABSOLUTE),
542			     DST_SEL_X(SQ_SEL_X), /* R */
543			     DST_SEL_Y(SQ_SEL_Y), /* G */
544			     DST_SEL_Z(SQ_SEL_Z), /* B */
545			     DST_SEL_W(SQ_SEL_W), /* A */
546			     LOD_BIAS(0),
547			     COORD_TYPE_X(TEX_UNNORMALIZED),
548			     COORD_TYPE_Y(TEX_UNNORMALIZED),
549			     COORD_TYPE_Z(TEX_UNNORMALIZED),
550			     COORD_TYPE_W(TEX_UNNORMALIZED));
551    shader[i++] = TEX_DWORD2(OFFSET_X(0),
552			     OFFSET_Y(0),
553			     OFFSET_Z(0),
554			     SAMPLER_ID(0),
555			     SRC_SEL_X(SQ_SEL_X),
556			     SRC_SEL_Y(SQ_SEL_Y),
557			     SRC_SEL_Z(SQ_SEL_0),
558			     SRC_SEL_W(SQ_SEL_1));
559    shader[i++] = TEX_DWORD_PAD;
560
561    return i;
562}
563
564int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
565{
566    int i = 0;
567
568    /* 0 */
569    shader[i++] = CF_DWORD0(ADDR(6),
570			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
571    shader[i++] = CF_DWORD1(POP_COUNT(0),
572                            CF_CONST(0),
573                            COND(SQ_CF_COND_ACTIVE),
574                            I_COUNT(2),
575                            VALID_PIXEL_MODE(0),
576                            END_OF_PROGRAM(0),
577                            CF_INST(SQ_CF_INST_VC),
578                            WHOLE_QUAD_MODE(0),
579                            BARRIER(1));
580
581    /* 1 - ALU */
582    shader[i++] = CF_ALU_DWORD0(ADDR(4),
583				KCACHE_BANK0(0),
584				KCACHE_BANK1(0),
585				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
586    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
587				KCACHE_ADDR0(0),
588				KCACHE_ADDR1(0),
589				I_COUNT(2),
590				ALT_CONST(0),
591				CF_INST(SQ_CF_INST_ALU),
592				WHOLE_QUAD_MODE(0),
593				BARRIER(1));
594
595    /* 2 */
596    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
597                                          TYPE(SQ_EXPORT_POS),
598                                          RW_GPR(1),
599                                          RW_REL(ABSOLUTE),
600                                          INDEX_GPR(0),
601                                          ELEM_SIZE(3));
602    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
603                                               SRC_SEL_Y(SQ_SEL_Y),
604                                               SRC_SEL_Z(SQ_SEL_Z),
605                                               SRC_SEL_W(SQ_SEL_W),
606                                               BURST_COUNT(1),
607                                               VALID_PIXEL_MODE(0),
608                                               END_OF_PROGRAM(0),
609                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
610                                               MARK(0),
611                                               BARRIER(1));
612    /* 3 */
613    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
614                                          TYPE(SQ_EXPORT_PARAM),
615                                          RW_GPR(0),
616                                          RW_REL(ABSOLUTE),
617                                          INDEX_GPR(0),
618                                          ELEM_SIZE(3));
619    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
620                                               SRC_SEL_Y(SQ_SEL_Y),
621                                               SRC_SEL_Z(SQ_SEL_Z),
622                                               SRC_SEL_W(SQ_SEL_W),
623                                               BURST_COUNT(1),
624                                               VALID_PIXEL_MODE(0),
625                                               END_OF_PROGRAM(1),
626                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
627                                               MARK(0),
628                                               BARRIER(0));
629
630
631    /* 4 texX / w */
632    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
633                             SRC0_REL(ABSOLUTE),
634                             SRC0_ELEM(ELEM_X),
635                             SRC0_NEG(0),
636                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
637                             SRC1_REL(ABSOLUTE),
638                             SRC1_ELEM(ELEM_X),
639                             SRC1_NEG(0),
640                             INDEX_MODE(SQ_INDEX_AR_X),
641                             PRED_SEL(SQ_PRED_SEL_OFF),
642                             LAST(0));
643    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
644                                 SRC1_ABS(0),
645                                 UPDATE_EXECUTE_MASK(0),
646                                 UPDATE_PRED(0),
647                                 WRITE_MASK(1),
648                                 OMOD(SQ_ALU_OMOD_OFF),
649                                 ALU_INST(SQ_OP2_INST_MUL),
650                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
651                                 DST_GPR(0),
652                                 DST_REL(ABSOLUTE),
653                                 DST_ELEM(ELEM_X),
654                                 CLAMP(0));
655
656    /* 5 texY / h */
657    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
658                             SRC0_REL(ABSOLUTE),
659                             SRC0_ELEM(ELEM_Y),
660                             SRC0_NEG(0),
661                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
662                             SRC1_REL(ABSOLUTE),
663                             SRC1_ELEM(ELEM_Y),
664                             SRC1_NEG(0),
665                             INDEX_MODE(SQ_INDEX_AR_X),
666                             PRED_SEL(SQ_PRED_SEL_OFF),
667                             LAST(1));
668    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
669                                 SRC1_ABS(0),
670                                 UPDATE_EXECUTE_MASK(0),
671                                 UPDATE_PRED(0),
672                                 WRITE_MASK(1),
673                                 OMOD(SQ_ALU_OMOD_OFF),
674                                 ALU_INST(SQ_OP2_INST_MUL),
675                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
676                                 DST_GPR(0),
677                                 DST_REL(ABSOLUTE),
678                                 DST_ELEM(ELEM_Y),
679                                 CLAMP(0));
680
681    /* 6/7 */
682    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
683                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
684                             FETCH_WHOLE_QUAD(0),
685                             BUFFER_ID(0),
686                             SRC_GPR(0),
687                             SRC_REL(ABSOLUTE),
688                             SRC_SEL_X(SQ_SEL_X),
689                             MEGA_FETCH_COUNT(16));
690    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
691                                 DST_REL(ABSOLUTE),
692                                 DST_SEL_X(SQ_SEL_X),
693                                 DST_SEL_Y(SQ_SEL_Y),
694                                 DST_SEL_Z(SQ_SEL_0),
695                                 DST_SEL_W(SQ_SEL_1),
696                                 USE_CONST_FIELDS(0),
697                                 DATA_FORMAT(FMT_32_32_FLOAT),
698                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
699                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
700                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
701    shader[i++] = VTX_DWORD2(OFFSET(0),
702#if X_BYTE_ORDER == X_BIG_ENDIAN
703                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
704#else
705                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
706#endif
707                             CONST_BUF_NO_STRIDE(0),
708                             MEGA_FETCH(1),
709			     ALT_CONST(0),
710			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
711    shader[i++] = VTX_DWORD_PAD;
712    /* 8/9 */
713    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
714                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
715                             FETCH_WHOLE_QUAD(0),
716                             BUFFER_ID(0),
717                             SRC_GPR(0),
718                             SRC_REL(ABSOLUTE),
719                             SRC_SEL_X(SQ_SEL_X),
720                             MEGA_FETCH_COUNT(8));
721    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
722                                 DST_REL(ABSOLUTE),
723                                 DST_SEL_X(SQ_SEL_X),
724                                 DST_SEL_Y(SQ_SEL_Y),
725                                 DST_SEL_Z(SQ_SEL_0),
726                                 DST_SEL_W(SQ_SEL_1),
727                                 USE_CONST_FIELDS(0),
728                                 DATA_FORMAT(FMT_32_32_FLOAT),
729                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
730                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
731                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
732    shader[i++] = VTX_DWORD2(OFFSET(8),
733#if X_BYTE_ORDER == X_BIG_ENDIAN
734                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
735#else
736                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
737#endif
738                             CONST_BUF_NO_STRIDE(0),
739                             MEGA_FETCH(0),
740			     ALT_CONST(0),
741			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
742    shader[i++] = VTX_DWORD_PAD;
743
744    return i;
745}
746
747int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
748{
749    int i = 0;
750
751    /* 0 */
752    shader[i++] = CF_ALU_DWORD0(ADDR(5),
753				KCACHE_BANK0(0),
754				KCACHE_BANK1(0),
755				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
756    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
757				KCACHE_ADDR0(0),
758				KCACHE_ADDR1(0),
759				I_COUNT(4),
760				ALT_CONST(0),
761				CF_INST(SQ_CF_INST_ALU),
762				WHOLE_QUAD_MODE(0),
763				BARRIER(1));
764    /* 1 */
765    shader[i++] = CF_DWORD0(ADDR(21),
766			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
767    shader[i++] = CF_DWORD1(POP_COUNT(0),
768                            CF_CONST(0),
769                            COND(SQ_CF_COND_BOOL),
770                            I_COUNT(0),
771                            VALID_PIXEL_MODE(0),
772                            END_OF_PROGRAM(0),
773                            CF_INST(SQ_CF_INST_CALL),
774                            WHOLE_QUAD_MODE(0),
775                            BARRIER(0));
776    /* 2 */
777    shader[i++] = CF_DWORD0(ADDR(30),
778			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
779    shader[i++] = CF_DWORD1(POP_COUNT(0),
780                            CF_CONST(0),
781                            COND(SQ_CF_COND_NOT_BOOL),
782                            I_COUNT(0),
783                            VALID_PIXEL_MODE(0),
784                            END_OF_PROGRAM(0),
785                            CF_INST(SQ_CF_INST_CALL),
786                            WHOLE_QUAD_MODE(0),
787                            BARRIER(0));
788    /* 3 */
789    shader[i++] = CF_ALU_DWORD0(ADDR(9),
790                                KCACHE_BANK0(0),
791                                KCACHE_BANK1(0),
792                                KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
793    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
794                                KCACHE_ADDR0(0),
795                                KCACHE_ADDR1(0),
796                                I_COUNT(12),
797                                ALT_CONST(0),
798                                CF_INST(SQ_CF_INST_ALU),
799                                WHOLE_QUAD_MODE(0),
800                                BARRIER(1));
801    /* 4 */
802    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
803                                          TYPE(SQ_EXPORT_PIXEL),
804                                          RW_GPR(2),
805                                          RW_REL(ABSOLUTE),
806                                          INDEX_GPR(0),
807                                          ELEM_SIZE(3));
808    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
809                                               SRC_SEL_Y(SQ_SEL_Y),
810                                               SRC_SEL_Z(SQ_SEL_Z),
811                                               SRC_SEL_W(SQ_SEL_W),
812                                               BURST_COUNT(1),
813                                               VALID_PIXEL_MODE(0),
814                                               END_OF_PROGRAM(1),
815                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
816                                               MARK(0),
817                                               BARRIER(1));
818    /* 5 interpolate tex coords */
819    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
820			     SRC0_REL(ABSOLUTE),
821			     SRC0_ELEM(ELEM_Y),
822			     SRC0_NEG(0),
823			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
824			     SRC1_REL(ABSOLUTE),
825			     SRC1_ELEM(ELEM_X),
826			     SRC1_NEG(0),
827			     INDEX_MODE(SQ_INDEX_AR_X),
828			     PRED_SEL(SQ_PRED_SEL_OFF),
829			     LAST(0));
830    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
831				 SRC1_ABS(0),
832				 UPDATE_EXECUTE_MASK(0),
833				 UPDATE_PRED(0),
834				 WRITE_MASK(1),
835				 OMOD(SQ_ALU_OMOD_OFF),
836				 ALU_INST(SQ_OP2_INST_INTERP_XY),
837				 BANK_SWIZZLE(SQ_ALU_VEC_210),
838				 DST_GPR(0),
839				 DST_REL(ABSOLUTE),
840				 DST_ELEM(ELEM_X),
841				 CLAMP(0));
842    /* 6 */
843    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
844			     SRC0_REL(ABSOLUTE),
845			     SRC0_ELEM(ELEM_X),
846			     SRC0_NEG(0),
847			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
848			     SRC1_REL(ABSOLUTE),
849			     SRC1_ELEM(ELEM_X),
850			     SRC1_NEG(0),
851			     INDEX_MODE(SQ_INDEX_AR_X),
852			     PRED_SEL(SQ_PRED_SEL_OFF),
853			     LAST(0));
854    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
855				 SRC1_ABS(0),
856				 UPDATE_EXECUTE_MASK(0),
857				 UPDATE_PRED(0),
858				 WRITE_MASK(1),
859				 OMOD(SQ_ALU_OMOD_OFF),
860				 ALU_INST(SQ_OP2_INST_INTERP_XY),
861				 BANK_SWIZZLE(SQ_ALU_VEC_210),
862				 DST_GPR(0),
863				 DST_REL(ABSOLUTE),
864				 DST_ELEM(ELEM_Y),
865				 CLAMP(0));
866    /* 7 */
867    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
868			     SRC0_REL(ABSOLUTE),
869			     SRC0_ELEM(ELEM_Y),
870			     SRC0_NEG(0),
871			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
872			     SRC1_REL(ABSOLUTE),
873			     SRC1_ELEM(ELEM_X),
874			     SRC1_NEG(0),
875			     INDEX_MODE(SQ_INDEX_AR_X),
876			     PRED_SEL(SQ_PRED_SEL_OFF),
877			     LAST(0));
878    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
879				 SRC1_ABS(0),
880				 UPDATE_EXECUTE_MASK(0),
881				 UPDATE_PRED(0),
882				 WRITE_MASK(0),
883				 OMOD(SQ_ALU_OMOD_OFF),
884				 ALU_INST(SQ_OP2_INST_INTERP_XY),
885				 BANK_SWIZZLE(SQ_ALU_VEC_210),
886				 DST_GPR(0),
887				 DST_REL(ABSOLUTE),
888				 DST_ELEM(ELEM_Z),
889				 CLAMP(0));
890    /* 8 */
891    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
892			     SRC0_REL(ABSOLUTE),
893			     SRC0_ELEM(ELEM_X),
894			     SRC0_NEG(0),
895			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
896			     SRC1_REL(ABSOLUTE),
897			     SRC1_ELEM(ELEM_X),
898			     SRC1_NEG(0),
899			     INDEX_MODE(SQ_INDEX_AR_X),
900			     PRED_SEL(SQ_PRED_SEL_OFF),
901			     LAST(1));
902    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
903				 SRC1_ABS(0),
904				 UPDATE_EXECUTE_MASK(0),
905				 UPDATE_PRED(0),
906				 WRITE_MASK(0),
907				 OMOD(SQ_ALU_OMOD_OFF),
908				 ALU_INST(SQ_OP2_INST_INTERP_XY),
909				 BANK_SWIZZLE(SQ_ALU_VEC_210),
910				 DST_GPR(0),
911				 DST_REL(ABSOLUTE),
912				 DST_ELEM(ELEM_W),
913				 CLAMP(0));
914
915    /* 9,10,11,12 */
916    /* r2.x = MAD(c0.w, r1.x, c0.x) */
917    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
918                             SRC0_REL(ABSOLUTE),
919                             SRC0_ELEM(ELEM_W),
920                             SRC0_NEG(0),
921                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
922                             SRC1_REL(ABSOLUTE),
923                             SRC1_ELEM(ELEM_X),
924                             SRC1_NEG(0),
925                             INDEX_MODE(SQ_INDEX_LOOP),
926                             PRED_SEL(SQ_PRED_SEL_OFF),
927                             LAST(0));
928    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
929                                 SRC2_REL(ABSOLUTE),
930                                 SRC2_ELEM(ELEM_X),
931                                 SRC2_NEG(0),
932                                 ALU_INST(SQ_OP3_INST_MULADD),
933                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
934                                 DST_GPR(2),
935                                 DST_REL(ABSOLUTE),
936                                 DST_ELEM(ELEM_X),
937                                 CLAMP(0));
938    /* r2.y = MAD(c0.w, r1.x, c0.y) */
939    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
940                             SRC0_REL(ABSOLUTE),
941                             SRC0_ELEM(ELEM_W),
942                             SRC0_NEG(0),
943                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
944                             SRC1_REL(ABSOLUTE),
945                             SRC1_ELEM(ELEM_X),
946                             SRC1_NEG(0),
947                             INDEX_MODE(SQ_INDEX_LOOP),
948                             PRED_SEL(SQ_PRED_SEL_OFF),
949                             LAST(0));
950    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
951                                 SRC2_REL(ABSOLUTE),
952                                 SRC2_ELEM(ELEM_Y),
953                                 SRC2_NEG(0),
954                                 ALU_INST(SQ_OP3_INST_MULADD),
955                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
956                                 DST_GPR(2),
957                                 DST_REL(ABSOLUTE),
958                                 DST_ELEM(ELEM_Y),
959                                 CLAMP(0));
960    /* r2.z = MAD(c0.w, r1.x, c0.z) */
961    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
962                             SRC0_REL(ABSOLUTE),
963                             SRC0_ELEM(ELEM_W),
964                             SRC0_NEG(0),
965                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
966                             SRC1_REL(ABSOLUTE),
967                             SRC1_ELEM(ELEM_X),
968                             SRC1_NEG(0),
969                             INDEX_MODE(SQ_INDEX_LOOP),
970                             PRED_SEL(SQ_PRED_SEL_OFF),
971                             LAST(0));
972    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
973                                 SRC2_REL(ABSOLUTE),
974                                 SRC2_ELEM(ELEM_Z),
975                                 SRC2_NEG(0),
976                                 ALU_INST(SQ_OP3_INST_MULADD),
977                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
978                                 DST_GPR(2),
979                                 DST_REL(ABSOLUTE),
980                                 DST_ELEM(ELEM_Z),
981                                 CLAMP(0));
982    /* r2.w = MAD(0, 0, 1) */
983    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
984                             SRC0_REL(ABSOLUTE),
985                             SRC0_ELEM(ELEM_X),
986                             SRC0_NEG(0),
987                             SRC1_SEL(SQ_ALU_SRC_0),
988                             SRC1_REL(ABSOLUTE),
989                             SRC1_ELEM(ELEM_X),
990                             SRC1_NEG(0),
991                             INDEX_MODE(SQ_INDEX_LOOP),
992                             PRED_SEL(SQ_PRED_SEL_OFF),
993                             LAST(1));
994    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
995                                 SRC2_REL(ABSOLUTE),
996                                 SRC2_ELEM(ELEM_X),
997                                 SRC2_NEG(0),
998                                 ALU_INST(SQ_OP3_INST_MULADD),
999                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1000                                 DST_GPR(2),
1001                                 DST_REL(ABSOLUTE),
1002                                 DST_ELEM(ELEM_W),
1003                                 CLAMP(0));
1004
1005    /* 13,14,15,16 */
1006    /* r2.x = MAD(c1.x, r1.y, pv.x) */
1007    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1008                             SRC0_REL(ABSOLUTE),
1009                             SRC0_ELEM(ELEM_X),
1010                             SRC0_NEG(0),
1011                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1012                             SRC1_REL(ABSOLUTE),
1013                             SRC1_ELEM(ELEM_Y),
1014                             SRC1_NEG(0),
1015                             INDEX_MODE(SQ_INDEX_LOOP),
1016                             PRED_SEL(SQ_PRED_SEL_OFF),
1017                             LAST(0));
1018    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1019                                 SRC2_REL(ABSOLUTE),
1020                                 SRC2_ELEM(ELEM_X),
1021                                 SRC2_NEG(0),
1022                                 ALU_INST(SQ_OP3_INST_MULADD),
1023                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1024                                 DST_GPR(2),
1025                                 DST_REL(ABSOLUTE),
1026                                 DST_ELEM(ELEM_X),
1027                                 CLAMP(0));
1028    /* r2.y = MAD(c1.y, r1.y, pv.y) */
1029    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1030                             SRC0_REL(ABSOLUTE),
1031                             SRC0_ELEM(ELEM_Y),
1032                             SRC0_NEG(0),
1033                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1034                             SRC1_REL(ABSOLUTE),
1035                             SRC1_ELEM(ELEM_Y),
1036                             SRC1_NEG(0),
1037                             INDEX_MODE(SQ_INDEX_LOOP),
1038                             PRED_SEL(SQ_PRED_SEL_OFF),
1039                             LAST(0));
1040    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1041                                 SRC2_REL(ABSOLUTE),
1042                                 SRC2_ELEM(ELEM_Y),
1043                                 SRC2_NEG(0),
1044                                 ALU_INST(SQ_OP3_INST_MULADD),
1045                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1046                                 DST_GPR(2),
1047                                 DST_REL(ABSOLUTE),
1048                                 DST_ELEM(ELEM_Y),
1049                                 CLAMP(0));
1050    /* r2.z = MAD(c1.z, r1.y, pv.z) */
1051    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1052                             SRC0_REL(ABSOLUTE),
1053                             SRC0_ELEM(ELEM_Z),
1054                             SRC0_NEG(0),
1055                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1056                             SRC1_REL(ABSOLUTE),
1057                             SRC1_ELEM(ELEM_Y),
1058                             SRC1_NEG(0),
1059                             INDEX_MODE(SQ_INDEX_LOOP),
1060                             PRED_SEL(SQ_PRED_SEL_OFF),
1061                             LAST(0));
1062    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1063                                 SRC2_REL(ABSOLUTE),
1064                                 SRC2_ELEM(ELEM_Z),
1065                                 SRC2_NEG(0),
1066                                 ALU_INST(SQ_OP3_INST_MULADD),
1067                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1068                                 DST_GPR(2),
1069                                 DST_REL(ABSOLUTE),
1070                                 DST_ELEM(ELEM_Z),
1071                                 CLAMP(0));
1072    /* r2.w = MAD(0, 0, 1) */
1073    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1074                             SRC0_REL(ABSOLUTE),
1075                             SRC0_ELEM(ELEM_X),
1076                             SRC0_NEG(0),
1077                             SRC1_SEL(SQ_ALU_SRC_0),
1078                             SRC1_REL(ABSOLUTE),
1079                             SRC1_ELEM(ELEM_X),
1080                             SRC1_NEG(0),
1081                             INDEX_MODE(SQ_INDEX_LOOP),
1082                             PRED_SEL(SQ_PRED_SEL_OFF),
1083                             LAST(1));
1084    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1085                                 SRC2_REL(ABSOLUTE),
1086                                 SRC2_ELEM(ELEM_W),
1087                                 SRC2_NEG(0),
1088                                 ALU_INST(SQ_OP3_INST_MULADD),
1089                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1090                                 DST_GPR(2),
1091                                 DST_REL(ABSOLUTE),
1092                                 DST_ELEM(ELEM_W),
1093                                 CLAMP(0));
1094    /* 17,18,19,20 */
1095    /* r2.x = MAD(c2.x, r1.z, pv.x) */
1096    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1097                             SRC0_REL(ABSOLUTE),
1098                             SRC0_ELEM(ELEM_X),
1099                             SRC0_NEG(0),
1100                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1101                             SRC1_REL(ABSOLUTE),
1102                             SRC1_ELEM(ELEM_Z),
1103                             SRC1_NEG(0),
1104                             INDEX_MODE(SQ_INDEX_LOOP),
1105                             PRED_SEL(SQ_PRED_SEL_OFF),
1106                             LAST(0));
1107    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1108                                 SRC2_REL(ABSOLUTE),
1109                                 SRC2_ELEM(ELEM_X),
1110                                 SRC2_NEG(0),
1111                                 ALU_INST(SQ_OP3_INST_MULADD),
1112                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1113                                 DST_GPR(2),
1114                                 DST_REL(ABSOLUTE),
1115                                 DST_ELEM(ELEM_X),
1116                                 CLAMP(1));
1117    /* r2.y = MAD(c2.y, r1.z, pv.y) */
1118    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1119                             SRC0_REL(ABSOLUTE),
1120                             SRC0_ELEM(ELEM_Y),
1121                             SRC0_NEG(0),
1122                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1123                             SRC1_REL(ABSOLUTE),
1124                             SRC1_ELEM(ELEM_Z),
1125                             SRC1_NEG(0),
1126                             INDEX_MODE(SQ_INDEX_LOOP),
1127                             PRED_SEL(SQ_PRED_SEL_OFF),
1128                             LAST(0));
1129    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1130                                 SRC2_REL(ABSOLUTE),
1131                                 SRC2_ELEM(ELEM_Y),
1132                                 SRC2_NEG(0),
1133                                 ALU_INST(SQ_OP3_INST_MULADD),
1134                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1135                                 DST_GPR(2),
1136                                 DST_REL(ABSOLUTE),
1137                                 DST_ELEM(ELEM_Y),
1138                                 CLAMP(1));
1139    /* r2.z = MAD(c2.z, r1.z, pv.z) */
1140    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1141                             SRC0_REL(ABSOLUTE),
1142                             SRC0_ELEM(ELEM_Z),
1143                             SRC0_NEG(0),
1144                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1145                             SRC1_REL(ABSOLUTE),
1146                             SRC1_ELEM(ELEM_Z),
1147                             SRC1_NEG(0),
1148                             INDEX_MODE(SQ_INDEX_LOOP),
1149                             PRED_SEL(SQ_PRED_SEL_OFF),
1150                             LAST(0));
1151    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1152                                 SRC2_REL(ABSOLUTE),
1153                                 SRC2_ELEM(ELEM_Z),
1154                                 SRC2_NEG(0),
1155                                 ALU_INST(SQ_OP3_INST_MULADD),
1156                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1157                                 DST_GPR(2),
1158                                 DST_REL(ABSOLUTE),
1159                                 DST_ELEM(ELEM_Z),
1160                                 CLAMP(1));
1161    /* r2.w = MAD(0, 0, 1) */
1162    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1163                             SRC0_REL(ABSOLUTE),
1164                             SRC0_ELEM(ELEM_X),
1165                             SRC0_NEG(0),
1166                             SRC1_SEL(SQ_ALU_SRC_0),
1167                             SRC1_REL(ABSOLUTE),
1168                             SRC1_ELEM(ELEM_X),
1169                             SRC1_NEG(0),
1170                             INDEX_MODE(SQ_INDEX_LOOP),
1171                             PRED_SEL(SQ_PRED_SEL_OFF),
1172                             LAST(1));
1173    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1174                                 SRC2_REL(ABSOLUTE),
1175                                 SRC2_ELEM(ELEM_X),
1176                                 SRC2_NEG(0),
1177                                 ALU_INST(SQ_OP3_INST_MULADD),
1178                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1179                                 DST_GPR(2),
1180                                 DST_REL(ABSOLUTE),
1181                                 DST_ELEM(ELEM_W),
1182                                 CLAMP(1));
1183
1184    /* 21 */
1185    shader[i++] = CF_DWORD0(ADDR(24),
1186			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1187    shader[i++] = CF_DWORD1(POP_COUNT(0),
1188                            CF_CONST(0),
1189                            COND(SQ_CF_COND_ACTIVE),
1190                            I_COUNT(3),
1191                            VALID_PIXEL_MODE(0),
1192                            END_OF_PROGRAM(0),
1193                            CF_INST(SQ_CF_INST_TC),
1194                            WHOLE_QUAD_MODE(0),
1195                            BARRIER(1));
1196    /* 22 */
1197    shader[i++] = CF_DWORD0(ADDR(0),
1198			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1199    shader[i++] = CF_DWORD1(POP_COUNT(0),
1200			    CF_CONST(0),
1201			    COND(SQ_CF_COND_ACTIVE),
1202			    I_COUNT(0),
1203			    VALID_PIXEL_MODE(0),
1204			    END_OF_PROGRAM(0),
1205			    CF_INST(SQ_CF_INST_RETURN),
1206			    WHOLE_QUAD_MODE(0),
1207			    BARRIER(1));
1208    /* 23 */
1209    shader[i++] = 0x00000000;
1210    shader[i++] = 0x00000000;
1211    /* 24/25 */
1212    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1213                             INST_MOD(0),
1214                             FETCH_WHOLE_QUAD(0),
1215                             RESOURCE_ID(0),
1216                             SRC_GPR(0),
1217                             SRC_REL(ABSOLUTE),
1218                             ALT_CONST(0),
1219			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1220			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1221    shader[i++] = TEX_DWORD1(DST_GPR(1),
1222                             DST_REL(ABSOLUTE),
1223                             DST_SEL_X(SQ_SEL_X),
1224                             DST_SEL_Y(SQ_SEL_MASK),
1225                             DST_SEL_Z(SQ_SEL_MASK),
1226                             DST_SEL_W(SQ_SEL_1),
1227                             LOD_BIAS(0),
1228                             COORD_TYPE_X(TEX_NORMALIZED),
1229                             COORD_TYPE_Y(TEX_NORMALIZED),
1230                             COORD_TYPE_Z(TEX_NORMALIZED),
1231                             COORD_TYPE_W(TEX_NORMALIZED));
1232    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1233                             OFFSET_Y(0),
1234                             OFFSET_Z(0),
1235                             SAMPLER_ID(0),
1236                             SRC_SEL_X(SQ_SEL_X),
1237                             SRC_SEL_Y(SQ_SEL_Y),
1238                             SRC_SEL_Z(SQ_SEL_0),
1239                             SRC_SEL_W(SQ_SEL_1));
1240    shader[i++] = TEX_DWORD_PAD;
1241    /* 26/27 */
1242    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1243                             INST_MOD(0),
1244                             FETCH_WHOLE_QUAD(0),
1245                             RESOURCE_ID(1),
1246                             SRC_GPR(0),
1247                             SRC_REL(ABSOLUTE),
1248                             ALT_CONST(0),
1249			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1250			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1251    shader[i++] = TEX_DWORD1(DST_GPR(1),
1252                             DST_REL(ABSOLUTE),
1253                             DST_SEL_X(SQ_SEL_MASK),
1254                             DST_SEL_Y(SQ_SEL_MASK),
1255                             DST_SEL_Z(SQ_SEL_X),
1256                             DST_SEL_W(SQ_SEL_MASK),
1257                             LOD_BIAS(0),
1258                             COORD_TYPE_X(TEX_NORMALIZED),
1259                             COORD_TYPE_Y(TEX_NORMALIZED),
1260                             COORD_TYPE_Z(TEX_NORMALIZED),
1261                             COORD_TYPE_W(TEX_NORMALIZED));
1262    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1263                             OFFSET_Y(0),
1264                             OFFSET_Z(0),
1265                             SAMPLER_ID(1),
1266                             SRC_SEL_X(SQ_SEL_X),
1267                             SRC_SEL_Y(SQ_SEL_Y),
1268                             SRC_SEL_Z(SQ_SEL_0),
1269                             SRC_SEL_W(SQ_SEL_1));
1270    shader[i++] = TEX_DWORD_PAD;
1271    /* 28/29 */
1272    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1273                             INST_MOD(0),
1274                             FETCH_WHOLE_QUAD(0),
1275                             RESOURCE_ID(2),
1276                             SRC_GPR(0),
1277                             SRC_REL(ABSOLUTE),
1278                             ALT_CONST(0),
1279			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1280			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1281    shader[i++] = TEX_DWORD1(DST_GPR(1),
1282                             DST_REL(ABSOLUTE),
1283                             DST_SEL_X(SQ_SEL_MASK),
1284                             DST_SEL_Y(SQ_SEL_X),
1285                             DST_SEL_Z(SQ_SEL_MASK),
1286                             DST_SEL_W(SQ_SEL_MASK),
1287                             LOD_BIAS(0),
1288                             COORD_TYPE_X(TEX_NORMALIZED),
1289                             COORD_TYPE_Y(TEX_NORMALIZED),
1290                             COORD_TYPE_Z(TEX_NORMALIZED),
1291                             COORD_TYPE_W(TEX_NORMALIZED));
1292    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1293                             OFFSET_Y(0),
1294                             OFFSET_Z(0),
1295                             SAMPLER_ID(2),
1296                             SRC_SEL_X(SQ_SEL_X),
1297                             SRC_SEL_Y(SQ_SEL_Y),
1298                             SRC_SEL_Z(SQ_SEL_0),
1299                             SRC_SEL_W(SQ_SEL_1));
1300    shader[i++] = TEX_DWORD_PAD;
1301    /* 30 */
1302    shader[i++] = CF_DWORD0(ADDR(32),
1303			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1304    shader[i++] = CF_DWORD1(POP_COUNT(0),
1305                            CF_CONST(0),
1306                            COND(SQ_CF_COND_ACTIVE),
1307                            I_COUNT(1),
1308                            VALID_PIXEL_MODE(0),
1309                            END_OF_PROGRAM(0),
1310                            CF_INST(SQ_CF_INST_TC),
1311                            WHOLE_QUAD_MODE(0),
1312                            BARRIER(1));
1313    /* 31 */
1314    shader[i++] = CF_DWORD0(ADDR(0),
1315			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1316    shader[i++] = CF_DWORD1(POP_COUNT(0),
1317			    CF_CONST(0),
1318			    COND(SQ_CF_COND_ACTIVE),
1319			    I_COUNT(0),
1320			    VALID_PIXEL_MODE(0),
1321			    END_OF_PROGRAM(0),
1322			    CF_INST(SQ_CF_INST_RETURN),
1323			    WHOLE_QUAD_MODE(0),
1324			    BARRIER(1));
1325    /* 32/33 */
1326    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1327                             INST_MOD(0),
1328                             FETCH_WHOLE_QUAD(0),
1329                             RESOURCE_ID(0),
1330                             SRC_GPR(0),
1331                             SRC_REL(ABSOLUTE),
1332                             ALT_CONST(0),
1333                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1334                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1335    shader[i++] = TEX_DWORD1(DST_GPR(1),
1336                             DST_REL(ABSOLUTE),
1337                             DST_SEL_X(SQ_SEL_X),
1338                             DST_SEL_Y(SQ_SEL_Y),
1339                             DST_SEL_Z(SQ_SEL_Z),
1340                             DST_SEL_W(SQ_SEL_1),
1341                             LOD_BIAS(0),
1342                             COORD_TYPE_X(TEX_NORMALIZED),
1343                             COORD_TYPE_Y(TEX_NORMALIZED),
1344                             COORD_TYPE_Z(TEX_NORMALIZED),
1345                             COORD_TYPE_W(TEX_NORMALIZED));
1346    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1347                             OFFSET_Y(0),
1348                             OFFSET_Z(0),
1349                             SAMPLER_ID(0),
1350                             SRC_SEL_X(SQ_SEL_X),
1351                             SRC_SEL_Y(SQ_SEL_Y),
1352                             SRC_SEL_Z(SQ_SEL_0),
1353                             SRC_SEL_W(SQ_SEL_1));
1354    shader[i++] = TEX_DWORD_PAD;
1355
1356    return i;
1357}
1358
1359/* comp vs --------------------------------------- */
1360int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1361{
1362    int i = 0;
1363
1364    /* 0 */
1365    shader[i++] = CF_DWORD0(ADDR(3),
1366			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1367    shader[i++] = CF_DWORD1(POP_COUNT(0),
1368                            CF_CONST(0),
1369                            COND(SQ_CF_COND_BOOL),
1370                            I_COUNT(0),
1371                            VALID_PIXEL_MODE(0),
1372                            END_OF_PROGRAM(0),
1373                            CF_INST(SQ_CF_INST_CALL),
1374                            WHOLE_QUAD_MODE(0),
1375                            BARRIER(0));
1376    /* 1 */
1377    shader[i++] = CF_DWORD0(ADDR(9),
1378			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1379    shader[i++] = CF_DWORD1(POP_COUNT(0),
1380                            CF_CONST(0),
1381                            COND(SQ_CF_COND_NOT_BOOL),
1382                            I_COUNT(0),
1383                            VALID_PIXEL_MODE(0),
1384                            END_OF_PROGRAM(0),
1385                            CF_INST(SQ_CF_INST_CALL),
1386                            WHOLE_QUAD_MODE(0),
1387                            BARRIER(0));
1388    /* 2 */
1389    shader[i++] = CF_DWORD0(ADDR(0),
1390                            JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1391    shader[i++] = CF_DWORD1(POP_COUNT(0),
1392                            CF_CONST(0),
1393                            COND(SQ_CF_COND_ACTIVE),
1394                            I_COUNT(0),
1395                            VALID_PIXEL_MODE(0),
1396                            END_OF_PROGRAM(1),
1397                            CF_INST(SQ_CF_INST_NOP),
1398                            WHOLE_QUAD_MODE(0),
1399                            BARRIER(1));
1400    /* 3 - mask sub */
1401    shader[i++] = CF_DWORD0(ADDR(44),
1402			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1403    shader[i++] = CF_DWORD1(POP_COUNT(0),
1404			    CF_CONST(0),
1405			    COND(SQ_CF_COND_ACTIVE),
1406			    I_COUNT(3),
1407			    VALID_PIXEL_MODE(0),
1408			    END_OF_PROGRAM(0),
1409			    CF_INST(SQ_CF_INST_VC),
1410			    WHOLE_QUAD_MODE(0),
1411			    BARRIER(1));
1412
1413    /* 4 - ALU */
1414    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1415				KCACHE_BANK0(0),
1416				KCACHE_BANK1(0),
1417				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1418    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1419				KCACHE_ADDR0(0),
1420				KCACHE_ADDR1(0),
1421				I_COUNT(20),
1422				ALT_CONST(0),
1423				CF_INST(SQ_CF_INST_ALU),
1424				WHOLE_QUAD_MODE(0),
1425				BARRIER(1));
1426
1427    /* 5 - dst */
1428    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1429					  TYPE(SQ_EXPORT_POS),
1430					  RW_GPR(2),
1431					  RW_REL(ABSOLUTE),
1432					  INDEX_GPR(0),
1433					  ELEM_SIZE(0));
1434    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1435					       SRC_SEL_Y(SQ_SEL_Y),
1436					       SRC_SEL_Z(SQ_SEL_0),
1437					       SRC_SEL_W(SQ_SEL_1),
1438					       BURST_COUNT(1),
1439					       VALID_PIXEL_MODE(0),
1440					       END_OF_PROGRAM(0),
1441					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1442					       MARK(0),
1443					       BARRIER(1));
1444    /* 6 - src */
1445    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1446					  TYPE(SQ_EXPORT_PARAM),
1447					  RW_GPR(1),
1448					  RW_REL(ABSOLUTE),
1449					  INDEX_GPR(0),
1450					  ELEM_SIZE(0));
1451    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1452					       SRC_SEL_Y(SQ_SEL_Y),
1453					       SRC_SEL_Z(SQ_SEL_0),
1454					       SRC_SEL_W(SQ_SEL_1),
1455					       BURST_COUNT(1),
1456					       VALID_PIXEL_MODE(0),
1457					       END_OF_PROGRAM(0),
1458					       CF_INST(SQ_CF_INST_EXPORT),
1459					       MARK(0),
1460					       BARRIER(0));
1461    /* 7 - mask */
1462    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1463					  TYPE(SQ_EXPORT_PARAM),
1464					  RW_GPR(0),
1465					  RW_REL(ABSOLUTE),
1466					  INDEX_GPR(0),
1467					  ELEM_SIZE(0));
1468    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1469					       SRC_SEL_Y(SQ_SEL_Y),
1470					       SRC_SEL_Z(SQ_SEL_0),
1471					       SRC_SEL_W(SQ_SEL_1),
1472					       BURST_COUNT(1),
1473					       VALID_PIXEL_MODE(0),
1474					       END_OF_PROGRAM(0),
1475					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1476					       WHOLE_QUAD_MODE(0),
1477					       BARRIER(0));
1478    /* 8 */
1479    shader[i++] = CF_DWORD0(ADDR(0),
1480			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1481    shader[i++] = CF_DWORD1(POP_COUNT(0),
1482			    CF_CONST(0),
1483			    COND(SQ_CF_COND_ACTIVE),
1484			    I_COUNT(0),
1485			    VALID_PIXEL_MODE(0),
1486			    END_OF_PROGRAM(0),
1487			    CF_INST(SQ_CF_INST_RETURN),
1488			    WHOLE_QUAD_MODE(0),
1489			    BARRIER(1));
1490    /* 9 - non-mask sub */
1491    shader[i++] = CF_DWORD0(ADDR(50),
1492			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1493    shader[i++] = CF_DWORD1(POP_COUNT(0),
1494			    CF_CONST(0),
1495			    COND(SQ_CF_COND_ACTIVE),
1496			    I_COUNT(2),
1497			    VALID_PIXEL_MODE(0),
1498			    END_OF_PROGRAM(0),
1499			    CF_INST(SQ_CF_INST_VC),
1500			    WHOLE_QUAD_MODE(0),
1501			    BARRIER(1));
1502
1503    /* 10 - ALU */
1504    shader[i++] = CF_ALU_DWORD0(ADDR(34),
1505				KCACHE_BANK0(0),
1506				KCACHE_BANK1(0),
1507				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1508    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1509				KCACHE_ADDR0(0),
1510				KCACHE_ADDR1(0),
1511				I_COUNT(10),
1512				ALT_CONST(0),
1513				CF_INST(SQ_CF_INST_ALU),
1514				WHOLE_QUAD_MODE(0),
1515				BARRIER(1));
1516
1517    /* 11 - dst */
1518    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1519					  TYPE(SQ_EXPORT_POS),
1520					  RW_GPR(1),
1521					  RW_REL(ABSOLUTE),
1522					  INDEX_GPR(0),
1523					  ELEM_SIZE(0));
1524    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1525					       SRC_SEL_Y(SQ_SEL_Y),
1526					       SRC_SEL_Z(SQ_SEL_0),
1527					       SRC_SEL_W(SQ_SEL_1),
1528					       BURST_COUNT(0),
1529					       VALID_PIXEL_MODE(0),
1530					       END_OF_PROGRAM(0),
1531					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1532					       MARK(0),
1533					       BARRIER(1));
1534    /* 12 - src */
1535    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1536					  TYPE(SQ_EXPORT_PARAM),
1537					  RW_GPR(0),
1538					  RW_REL(ABSOLUTE),
1539					  INDEX_GPR(0),
1540					  ELEM_SIZE(0));
1541    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1542					       SRC_SEL_Y(SQ_SEL_Y),
1543					       SRC_SEL_Z(SQ_SEL_0),
1544					       SRC_SEL_W(SQ_SEL_1),
1545					       BURST_COUNT(0),
1546					       VALID_PIXEL_MODE(0),
1547					       END_OF_PROGRAM(0),
1548					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1549					       MARK(0),
1550					       BARRIER(0));
1551    /* 13 */
1552    shader[i++] = CF_DWORD0(ADDR(0),
1553			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1554    shader[i++] = CF_DWORD1(POP_COUNT(0),
1555			    CF_CONST(0),
1556			    COND(SQ_CF_COND_ACTIVE),
1557			    I_COUNT(0),
1558			    VALID_PIXEL_MODE(0),
1559			    END_OF_PROGRAM(0),
1560			    CF_INST(SQ_CF_INST_RETURN),
1561			    WHOLE_QUAD_MODE(0),
1562			    BARRIER(1));
1563
1564    /* 14 srcX.x DOT4 - mask */
1565    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1566                             SRC0_REL(ABSOLUTE),
1567                             SRC0_ELEM(ELEM_X),
1568                             SRC0_NEG(0),
1569                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1570                             SRC1_REL(ABSOLUTE),
1571                             SRC1_ELEM(ELEM_X),
1572                             SRC1_NEG(0),
1573                             INDEX_MODE(SQ_INDEX_LOOP),
1574                             PRED_SEL(SQ_PRED_SEL_OFF),
1575                             LAST(0));
1576    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1577                                 SRC1_ABS(0),
1578                                 UPDATE_EXECUTE_MASK(0),
1579                                 UPDATE_PRED(0),
1580                                 WRITE_MASK(1),
1581                                 OMOD(SQ_ALU_OMOD_OFF),
1582                                 ALU_INST(SQ_OP2_INST_DOT4),
1583                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1584                                 DST_GPR(3),
1585                                 DST_REL(ABSOLUTE),
1586                                 DST_ELEM(ELEM_X),
1587                                 CLAMP(0));
1588
1589    /* 15 srcX.y DOT4 - mask */
1590    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1591                             SRC0_REL(ABSOLUTE),
1592                             SRC0_ELEM(ELEM_Y),
1593                             SRC0_NEG(0),
1594                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1595                             SRC1_REL(ABSOLUTE),
1596                             SRC1_ELEM(ELEM_Y),
1597                             SRC1_NEG(0),
1598                             INDEX_MODE(SQ_INDEX_LOOP),
1599                             PRED_SEL(SQ_PRED_SEL_OFF),
1600                             LAST(0));
1601    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1602                                 SRC1_ABS(0),
1603                                 UPDATE_EXECUTE_MASK(0),
1604                                 UPDATE_PRED(0),
1605                                 WRITE_MASK(0),
1606                                 OMOD(SQ_ALU_OMOD_OFF),
1607                                 ALU_INST(SQ_OP2_INST_DOT4),
1608                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1609                                 DST_GPR(3),
1610                                 DST_REL(ABSOLUTE),
1611                                 DST_ELEM(ELEM_Y),
1612                                 CLAMP(0));
1613
1614    /* 16 srcX.z DOT4 - mask */
1615    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1616                             SRC0_REL(ABSOLUTE),
1617                             SRC0_ELEM(ELEM_Z),
1618                             SRC0_NEG(0),
1619                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1620                             SRC1_REL(ABSOLUTE),
1621                             SRC1_ELEM(ELEM_Z),
1622                             SRC1_NEG(0),
1623                             INDEX_MODE(SQ_INDEX_LOOP),
1624                             PRED_SEL(SQ_PRED_SEL_OFF),
1625                             LAST(0));
1626    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1627                                 SRC1_ABS(0),
1628                                 UPDATE_EXECUTE_MASK(0),
1629                                 UPDATE_PRED(0),
1630                                 WRITE_MASK(0),
1631                                 OMOD(SQ_ALU_OMOD_OFF),
1632                                 ALU_INST(SQ_OP2_INST_DOT4),
1633                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1634                                 DST_GPR(3),
1635                                 DST_REL(ABSOLUTE),
1636                                 DST_ELEM(ELEM_Z),
1637                                 CLAMP(0));
1638
1639    /* 17 srcX.w DOT4 - mask */
1640    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1641                             SRC0_REL(ABSOLUTE),
1642                             SRC0_ELEM(ELEM_W),
1643                             SRC0_NEG(0),
1644                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1645                             SRC1_REL(ABSOLUTE),
1646                             SRC1_ELEM(ELEM_W),
1647                             SRC1_NEG(0),
1648                             INDEX_MODE(SQ_INDEX_LOOP),
1649                             PRED_SEL(SQ_PRED_SEL_OFF),
1650                             LAST(1));
1651    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1652                                 SRC1_ABS(0),
1653                                 UPDATE_EXECUTE_MASK(0),
1654                                 UPDATE_PRED(0),
1655                                 WRITE_MASK(0),
1656                                 OMOD(SQ_ALU_OMOD_OFF),
1657                                 ALU_INST(SQ_OP2_INST_DOT4),
1658                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1659                                 DST_GPR(3),
1660                                 DST_REL(ABSOLUTE),
1661                                 DST_ELEM(ELEM_W),
1662                                 CLAMP(0));
1663
1664    /* 18 srcY.x DOT4 - mask */
1665    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1666                             SRC0_REL(ABSOLUTE),
1667                             SRC0_ELEM(ELEM_X),
1668                             SRC0_NEG(0),
1669                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1670                             SRC1_REL(ABSOLUTE),
1671                             SRC1_ELEM(ELEM_X),
1672                             SRC1_NEG(0),
1673                             INDEX_MODE(SQ_INDEX_LOOP),
1674                             PRED_SEL(SQ_PRED_SEL_OFF),
1675                             LAST(0));
1676    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1677                                 SRC1_ABS(0),
1678                                 UPDATE_EXECUTE_MASK(0),
1679                                 UPDATE_PRED(0),
1680                                 WRITE_MASK(0),
1681                                 OMOD(SQ_ALU_OMOD_OFF),
1682                                 ALU_INST(SQ_OP2_INST_DOT4),
1683                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1684                                 DST_GPR(3),
1685                                 DST_REL(ABSOLUTE),
1686                                 DST_ELEM(ELEM_X),
1687                                 CLAMP(0));
1688
1689    /* 19 srcY.y DOT4 - mask */
1690    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1691                             SRC0_REL(ABSOLUTE),
1692                             SRC0_ELEM(ELEM_Y),
1693                             SRC0_NEG(0),
1694                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1695                             SRC1_REL(ABSOLUTE),
1696                             SRC1_ELEM(ELEM_Y),
1697                             SRC1_NEG(0),
1698                             INDEX_MODE(SQ_INDEX_LOOP),
1699                             PRED_SEL(SQ_PRED_SEL_OFF),
1700                             LAST(0));
1701    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1702                                 SRC1_ABS(0),
1703                                 UPDATE_EXECUTE_MASK(0),
1704                                 UPDATE_PRED(0),
1705                                 WRITE_MASK(1),
1706                                 OMOD(SQ_ALU_OMOD_OFF),
1707                                 ALU_INST(SQ_OP2_INST_DOT4),
1708                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1709                                 DST_GPR(3),
1710                                 DST_REL(ABSOLUTE),
1711                                 DST_ELEM(ELEM_Y),
1712                                 CLAMP(0));
1713
1714    /* 20 srcY.z DOT4 - mask */
1715    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1716                             SRC0_REL(ABSOLUTE),
1717                             SRC0_ELEM(ELEM_Z),
1718                             SRC0_NEG(0),
1719                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1720                             SRC1_REL(ABSOLUTE),
1721                             SRC1_ELEM(ELEM_Z),
1722                             SRC1_NEG(0),
1723                             INDEX_MODE(SQ_INDEX_LOOP),
1724                             PRED_SEL(SQ_PRED_SEL_OFF),
1725                             LAST(0));
1726    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1727                                 SRC1_ABS(0),
1728                                 UPDATE_EXECUTE_MASK(0),
1729                                 UPDATE_PRED(0),
1730                                 WRITE_MASK(0),
1731                                 OMOD(SQ_ALU_OMOD_OFF),
1732                                 ALU_INST(SQ_OP2_INST_DOT4),
1733                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1734                                 DST_GPR(3),
1735                                 DST_REL(ABSOLUTE),
1736                                 DST_ELEM(ELEM_Z),
1737                                 CLAMP(0));
1738
1739    /* 21 srcY.w DOT4 - mask */
1740    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1741                             SRC0_REL(ABSOLUTE),
1742                             SRC0_ELEM(ELEM_W),
1743                             SRC0_NEG(0),
1744                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1745                             SRC1_REL(ABSOLUTE),
1746                             SRC1_ELEM(ELEM_W),
1747                             SRC1_NEG(0),
1748                             INDEX_MODE(SQ_INDEX_LOOP),
1749                             PRED_SEL(SQ_PRED_SEL_OFF),
1750                             LAST(1));
1751    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1752                                 SRC1_ABS(0),
1753                                 UPDATE_EXECUTE_MASK(0),
1754                                 UPDATE_PRED(0),
1755                                 WRITE_MASK(0),
1756                                 OMOD(SQ_ALU_OMOD_OFF),
1757                                 ALU_INST(SQ_OP2_INST_DOT4),
1758                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1759                                 DST_GPR(3),
1760                                 DST_REL(ABSOLUTE),
1761                                 DST_ELEM(ELEM_W),
1762                                 CLAMP(0));
1763
1764    /* 22 maskX.x DOT4 - mask */
1765    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1766                             SRC0_REL(ABSOLUTE),
1767                             SRC0_ELEM(ELEM_X),
1768                             SRC0_NEG(0),
1769                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1770                             SRC1_REL(ABSOLUTE),
1771                             SRC1_ELEM(ELEM_X),
1772                             SRC1_NEG(0),
1773                             INDEX_MODE(SQ_INDEX_LOOP),
1774                             PRED_SEL(SQ_PRED_SEL_OFF),
1775                             LAST(0));
1776    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1777                                 SRC1_ABS(0),
1778                                 UPDATE_EXECUTE_MASK(0),
1779                                 UPDATE_PRED(0),
1780                                 WRITE_MASK(1),
1781                                 OMOD(SQ_ALU_OMOD_OFF),
1782                                 ALU_INST(SQ_OP2_INST_DOT4),
1783                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1784                                 DST_GPR(4),
1785                                 DST_REL(ABSOLUTE),
1786                                 DST_ELEM(ELEM_X),
1787                                 CLAMP(0));
1788
1789    /* 23 maskX.y DOT4 - mask */
1790    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1791                             SRC0_REL(ABSOLUTE),
1792                             SRC0_ELEM(ELEM_Y),
1793                             SRC0_NEG(0),
1794                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1795                             SRC1_REL(ABSOLUTE),
1796                             SRC1_ELEM(ELEM_Y),
1797                             SRC1_NEG(0),
1798                             INDEX_MODE(SQ_INDEX_LOOP),
1799                             PRED_SEL(SQ_PRED_SEL_OFF),
1800                             LAST(0));
1801    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1802                                 SRC1_ABS(0),
1803                                 UPDATE_EXECUTE_MASK(0),
1804                                 UPDATE_PRED(0),
1805                                 WRITE_MASK(0),
1806                                 OMOD(SQ_ALU_OMOD_OFF),
1807                                 ALU_INST(SQ_OP2_INST_DOT4),
1808                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1809                                 DST_GPR(4),
1810                                 DST_REL(ABSOLUTE),
1811                                 DST_ELEM(ELEM_Y),
1812                                 CLAMP(0));
1813
1814    /* 24 maskX.z DOT4 - mask */
1815    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1816                             SRC0_REL(ABSOLUTE),
1817                             SRC0_ELEM(ELEM_Z),
1818                             SRC0_NEG(0),
1819                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1820                             SRC1_REL(ABSOLUTE),
1821                             SRC1_ELEM(ELEM_Z),
1822                             SRC1_NEG(0),
1823                             INDEX_MODE(SQ_INDEX_LOOP),
1824                             PRED_SEL(SQ_PRED_SEL_OFF),
1825                             LAST(0));
1826    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1827                                 SRC1_ABS(0),
1828                                 UPDATE_EXECUTE_MASK(0),
1829                                 UPDATE_PRED(0),
1830                                 WRITE_MASK(0),
1831                                 OMOD(SQ_ALU_OMOD_OFF),
1832                                 ALU_INST(SQ_OP2_INST_DOT4),
1833                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1834                                 DST_GPR(4),
1835                                 DST_REL(ABSOLUTE),
1836                                 DST_ELEM(ELEM_Z),
1837                                 CLAMP(0));
1838
1839    /* 25 maskX.w DOT4 - mask */
1840    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1841                             SRC0_REL(ABSOLUTE),
1842                             SRC0_ELEM(ELEM_W),
1843                             SRC0_NEG(0),
1844                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1845                             SRC1_REL(ABSOLUTE),
1846                             SRC1_ELEM(ELEM_W),
1847                             SRC1_NEG(0),
1848                             INDEX_MODE(SQ_INDEX_LOOP),
1849                             PRED_SEL(SQ_PRED_SEL_OFF),
1850                             LAST(1));
1851    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1852                                 SRC1_ABS(0),
1853                                 UPDATE_EXECUTE_MASK(0),
1854                                 UPDATE_PRED(0),
1855                                 WRITE_MASK(0),
1856                                 OMOD(SQ_ALU_OMOD_OFF),
1857                                 ALU_INST(SQ_OP2_INST_DOT4),
1858                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1859                                 DST_GPR(4),
1860                                 DST_REL(ABSOLUTE),
1861                                 DST_ELEM(ELEM_W),
1862                                 CLAMP(0));
1863
1864    /* 26 maskY.x DOT4 - mask */
1865    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1866                             SRC0_REL(ABSOLUTE),
1867                             SRC0_ELEM(ELEM_X),
1868                             SRC0_NEG(0),
1869                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1870                             SRC1_REL(ABSOLUTE),
1871                             SRC1_ELEM(ELEM_X),
1872                             SRC1_NEG(0),
1873                             INDEX_MODE(SQ_INDEX_LOOP),
1874                             PRED_SEL(SQ_PRED_SEL_OFF),
1875                             LAST(0));
1876    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1877                                 SRC1_ABS(0),
1878                                 UPDATE_EXECUTE_MASK(0),
1879                                 UPDATE_PRED(0),
1880                                 WRITE_MASK(0),
1881                                 OMOD(SQ_ALU_OMOD_OFF),
1882                                 ALU_INST(SQ_OP2_INST_DOT4),
1883                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1884                                 DST_GPR(4),
1885                                 DST_REL(ABSOLUTE),
1886                                 DST_ELEM(ELEM_X),
1887                                 CLAMP(0));
1888
1889    /* 27 maskY.y DOT4 - mask */
1890    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1891                             SRC0_REL(ABSOLUTE),
1892                             SRC0_ELEM(ELEM_Y),
1893                             SRC0_NEG(0),
1894                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1895                             SRC1_REL(ABSOLUTE),
1896                             SRC1_ELEM(ELEM_Y),
1897                             SRC1_NEG(0),
1898                             INDEX_MODE(SQ_INDEX_LOOP),
1899                             PRED_SEL(SQ_PRED_SEL_OFF),
1900                             LAST(0));
1901    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1902                                 SRC1_ABS(0),
1903                                 UPDATE_EXECUTE_MASK(0),
1904                                 UPDATE_PRED(0),
1905                                 WRITE_MASK(1),
1906                                 OMOD(SQ_ALU_OMOD_OFF),
1907                                 ALU_INST(SQ_OP2_INST_DOT4),
1908                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1909                                 DST_GPR(4),
1910                                 DST_REL(ABSOLUTE),
1911                                 DST_ELEM(ELEM_Y),
1912                                 CLAMP(0));
1913
1914    /* 28 maskY.z DOT4 - mask */
1915    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1916                             SRC0_REL(ABSOLUTE),
1917                             SRC0_ELEM(ELEM_Z),
1918                             SRC0_NEG(0),
1919                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1920                             SRC1_REL(ABSOLUTE),
1921                             SRC1_ELEM(ELEM_Z),
1922                             SRC1_NEG(0),
1923                             INDEX_MODE(SQ_INDEX_LOOP),
1924                             PRED_SEL(SQ_PRED_SEL_OFF),
1925                             LAST(0));
1926    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1927                                 SRC1_ABS(0),
1928                                 UPDATE_EXECUTE_MASK(0),
1929                                 UPDATE_PRED(0),
1930                                 WRITE_MASK(0),
1931                                 OMOD(SQ_ALU_OMOD_OFF),
1932                                 ALU_INST(SQ_OP2_INST_DOT4),
1933                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1934                                 DST_GPR(4),
1935                                 DST_REL(ABSOLUTE),
1936                                 DST_ELEM(ELEM_Z),
1937                                 CLAMP(0));
1938
1939    /* 29 maskY.w DOT4 - mask */
1940    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1941                             SRC0_REL(ABSOLUTE),
1942                             SRC0_ELEM(ELEM_W),
1943                             SRC0_NEG(0),
1944                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1945                             SRC1_REL(ABSOLUTE),
1946                             SRC1_ELEM(ELEM_W),
1947                             SRC1_NEG(0),
1948                             INDEX_MODE(SQ_INDEX_LOOP),
1949                             PRED_SEL(SQ_PRED_SEL_OFF),
1950                             LAST(1));
1951    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1952                                 SRC1_ABS(0),
1953                                 UPDATE_EXECUTE_MASK(0),
1954                                 UPDATE_PRED(0),
1955                                 WRITE_MASK(0),
1956                                 OMOD(SQ_ALU_OMOD_OFF),
1957                                 ALU_INST(SQ_OP2_INST_DOT4),
1958                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1959                                 DST_GPR(4),
1960                                 DST_REL(ABSOLUTE),
1961                                 DST_ELEM(ELEM_W),
1962                                 CLAMP(0));
1963
1964    /* 30 srcX / w */
1965    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1966                             SRC0_REL(ABSOLUTE),
1967                             SRC0_ELEM(ELEM_X),
1968                             SRC0_NEG(0),
1969                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1970                             SRC1_REL(ABSOLUTE),
1971                             SRC1_ELEM(ELEM_W),
1972                             SRC1_NEG(0),
1973                             INDEX_MODE(SQ_INDEX_AR_X),
1974                             PRED_SEL(SQ_PRED_SEL_OFF),
1975                             LAST(1));
1976    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1977                                 SRC1_ABS(0),
1978                                 UPDATE_EXECUTE_MASK(0),
1979                                 UPDATE_PRED(0),
1980                                 WRITE_MASK(1),
1981                                 OMOD(SQ_ALU_OMOD_OFF),
1982                                 ALU_INST(SQ_OP2_INST_MUL),
1983                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1984                                 DST_GPR(1),
1985                                 DST_REL(ABSOLUTE),
1986                                 DST_ELEM(ELEM_X),
1987                                 CLAMP(0));
1988
1989    /* 31 srcY / h */
1990    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1991                             SRC0_REL(ABSOLUTE),
1992                             SRC0_ELEM(ELEM_Y),
1993                             SRC0_NEG(0),
1994                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1995                             SRC1_REL(ABSOLUTE),
1996                             SRC1_ELEM(ELEM_W),
1997                             SRC1_NEG(0),
1998                             INDEX_MODE(SQ_INDEX_AR_X),
1999                             PRED_SEL(SQ_PRED_SEL_OFF),
2000                             LAST(1));
2001    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2002                                 SRC1_ABS(0),
2003                                 UPDATE_EXECUTE_MASK(0),
2004                                 UPDATE_PRED(0),
2005                                 WRITE_MASK(1),
2006                                 OMOD(SQ_ALU_OMOD_OFF),
2007                                 ALU_INST(SQ_OP2_INST_MUL),
2008                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2009                                 DST_GPR(1),
2010                                 DST_REL(ABSOLUTE),
2011                                 DST_ELEM(ELEM_Y),
2012                                 CLAMP(0));
2013
2014    /* 32 maskX / w */
2015    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2016                             SRC0_REL(ABSOLUTE),
2017                             SRC0_ELEM(ELEM_X),
2018                             SRC0_NEG(0),
2019                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
2020                             SRC1_REL(ABSOLUTE),
2021                             SRC1_ELEM(ELEM_W),
2022                             SRC1_NEG(0),
2023                             INDEX_MODE(SQ_INDEX_AR_X),
2024                             PRED_SEL(SQ_PRED_SEL_OFF),
2025                             LAST(1));
2026    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2027                                 SRC1_ABS(0),
2028                                 UPDATE_EXECUTE_MASK(0),
2029                                 UPDATE_PRED(0),
2030                                 WRITE_MASK(1),
2031                                 OMOD(SQ_ALU_OMOD_OFF),
2032                                 ALU_INST(SQ_OP2_INST_MUL),
2033                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2034                                 DST_GPR(0),
2035                                 DST_REL(ABSOLUTE),
2036                                 DST_ELEM(ELEM_X),
2037                                 CLAMP(0));
2038
2039    /* 33 maskY / h */
2040    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2041                             SRC0_REL(ABSOLUTE),
2042                             SRC0_ELEM(ELEM_Y),
2043                             SRC0_NEG(0),
2044                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
2045                             SRC1_REL(ABSOLUTE),
2046                             SRC1_ELEM(ELEM_W),
2047                             SRC1_NEG(0),
2048                             INDEX_MODE(SQ_INDEX_AR_X),
2049                             PRED_SEL(SQ_PRED_SEL_OFF),
2050                             LAST(1));
2051    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2052                                 SRC1_ABS(0),
2053                                 UPDATE_EXECUTE_MASK(0),
2054                                 UPDATE_PRED(0),
2055                                 WRITE_MASK(1),
2056                                 OMOD(SQ_ALU_OMOD_OFF),
2057                                 ALU_INST(SQ_OP2_INST_MUL),
2058                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2059                                 DST_GPR(0),
2060                                 DST_REL(ABSOLUTE),
2061                                 DST_ELEM(ELEM_Y),
2062                                 CLAMP(0));
2063
2064    /* 34 srcX.x DOT4 - non-mask */
2065    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2066                             SRC0_REL(ABSOLUTE),
2067                             SRC0_ELEM(ELEM_X),
2068                             SRC0_NEG(0),
2069                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2070                             SRC1_REL(ABSOLUTE),
2071                             SRC1_ELEM(ELEM_X),
2072                             SRC1_NEG(0),
2073                             INDEX_MODE(SQ_INDEX_LOOP),
2074                             PRED_SEL(SQ_PRED_SEL_OFF),
2075                             LAST(0));
2076    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2077                                 SRC1_ABS(0),
2078                                 UPDATE_EXECUTE_MASK(0),
2079                                 UPDATE_PRED(0),
2080                                 WRITE_MASK(1),
2081                                 OMOD(SQ_ALU_OMOD_OFF),
2082                                 ALU_INST(SQ_OP2_INST_DOT4),
2083                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2084                                 DST_GPR(2),
2085                                 DST_REL(ABSOLUTE),
2086                                 DST_ELEM(ELEM_X),
2087                                 CLAMP(0));
2088
2089    /* 35 srcX.y DOT4 - non-mask */
2090    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2091                             SRC0_REL(ABSOLUTE),
2092                             SRC0_ELEM(ELEM_Y),
2093                             SRC0_NEG(0),
2094                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2095                             SRC1_REL(ABSOLUTE),
2096                             SRC1_ELEM(ELEM_Y),
2097                             SRC1_NEG(0),
2098                             INDEX_MODE(SQ_INDEX_LOOP),
2099                             PRED_SEL(SQ_PRED_SEL_OFF),
2100                             LAST(0));
2101    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2102                                 SRC1_ABS(0),
2103                                 UPDATE_EXECUTE_MASK(0),
2104                                 UPDATE_PRED(0),
2105                                 WRITE_MASK(0),
2106                                 OMOD(SQ_ALU_OMOD_OFF),
2107                                 ALU_INST(SQ_OP2_INST_DOT4),
2108                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2109                                 DST_GPR(2),
2110                                 DST_REL(ABSOLUTE),
2111                                 DST_ELEM(ELEM_Y),
2112                                 CLAMP(0));
2113
2114    /* 36 srcX.z DOT4 - non-mask */
2115    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2116                             SRC0_REL(ABSOLUTE),
2117                             SRC0_ELEM(ELEM_Z),
2118                             SRC0_NEG(0),
2119                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2120                             SRC1_REL(ABSOLUTE),
2121                             SRC1_ELEM(ELEM_Z),
2122                             SRC1_NEG(0),
2123                             INDEX_MODE(SQ_INDEX_LOOP),
2124                             PRED_SEL(SQ_PRED_SEL_OFF),
2125                             LAST(0));
2126    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2127                                 SRC1_ABS(0),
2128                                 UPDATE_EXECUTE_MASK(0),
2129                                 UPDATE_PRED(0),
2130                                 WRITE_MASK(0),
2131                                 OMOD(SQ_ALU_OMOD_OFF),
2132                                 ALU_INST(SQ_OP2_INST_DOT4),
2133                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2134                                 DST_GPR(2),
2135                                 DST_REL(ABSOLUTE),
2136                                 DST_ELEM(ELEM_Z),
2137                                 CLAMP(0));
2138
2139    /* 37 srcX.w DOT4 - non-mask */
2140    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2141                             SRC0_REL(ABSOLUTE),
2142                             SRC0_ELEM(ELEM_W),
2143                             SRC0_NEG(0),
2144                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2145                             SRC1_REL(ABSOLUTE),
2146                             SRC1_ELEM(ELEM_W),
2147                             SRC1_NEG(0),
2148                             INDEX_MODE(SQ_INDEX_LOOP),
2149                             PRED_SEL(SQ_PRED_SEL_OFF),
2150                             LAST(1));
2151    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2152                                 SRC1_ABS(0),
2153                                 UPDATE_EXECUTE_MASK(0),
2154                                 UPDATE_PRED(0),
2155                                 WRITE_MASK(0),
2156                                 OMOD(SQ_ALU_OMOD_OFF),
2157                                 ALU_INST(SQ_OP2_INST_DOT4),
2158                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2159                                 DST_GPR(2),
2160                                 DST_REL(ABSOLUTE),
2161                                 DST_ELEM(ELEM_W),
2162                                 CLAMP(0));
2163
2164    /* 38 srcY.x DOT4 - non-mask */
2165    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2166                             SRC0_REL(ABSOLUTE),
2167                             SRC0_ELEM(ELEM_X),
2168                             SRC0_NEG(0),
2169                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2170                             SRC1_REL(ABSOLUTE),
2171                             SRC1_ELEM(ELEM_X),
2172                             SRC1_NEG(0),
2173                             INDEX_MODE(SQ_INDEX_LOOP),
2174                             PRED_SEL(SQ_PRED_SEL_OFF),
2175                             LAST(0));
2176    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2177                                 SRC1_ABS(0),
2178                                 UPDATE_EXECUTE_MASK(0),
2179                                 UPDATE_PRED(0),
2180                                 WRITE_MASK(0),
2181                                 OMOD(SQ_ALU_OMOD_OFF),
2182                                 ALU_INST(SQ_OP2_INST_DOT4),
2183                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2184                                 DST_GPR(2),
2185                                 DST_REL(ABSOLUTE),
2186                                 DST_ELEM(ELEM_X),
2187                                 CLAMP(0));
2188
2189    /* 39 srcY.y DOT4 - non-mask */
2190    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2191                             SRC0_REL(ABSOLUTE),
2192                             SRC0_ELEM(ELEM_Y),
2193                             SRC0_NEG(0),
2194                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2195                             SRC1_REL(ABSOLUTE),
2196                             SRC1_ELEM(ELEM_Y),
2197                             SRC1_NEG(0),
2198                             INDEX_MODE(SQ_INDEX_LOOP),
2199                             PRED_SEL(SQ_PRED_SEL_OFF),
2200                             LAST(0));
2201    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2202                                 SRC1_ABS(0),
2203                                 UPDATE_EXECUTE_MASK(0),
2204                                 UPDATE_PRED(0),
2205                                 WRITE_MASK(1),
2206                                 OMOD(SQ_ALU_OMOD_OFF),
2207                                 ALU_INST(SQ_OP2_INST_DOT4),
2208                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2209                                 DST_GPR(2),
2210                                 DST_REL(ABSOLUTE),
2211                                 DST_ELEM(ELEM_Y),
2212                                 CLAMP(0));
2213
2214    /* 40 srcY.z DOT4 - non-mask */
2215    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2216                             SRC0_REL(ABSOLUTE),
2217                             SRC0_ELEM(ELEM_Z),
2218                             SRC0_NEG(0),
2219                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2220                             SRC1_REL(ABSOLUTE),
2221                             SRC1_ELEM(ELEM_Z),
2222                             SRC1_NEG(0),
2223                             INDEX_MODE(SQ_INDEX_LOOP),
2224                             PRED_SEL(SQ_PRED_SEL_OFF),
2225                             LAST(0));
2226    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2227                                 SRC1_ABS(0),
2228                                 UPDATE_EXECUTE_MASK(0),
2229                                 UPDATE_PRED(0),
2230                                 WRITE_MASK(0),
2231                                 OMOD(SQ_ALU_OMOD_OFF),
2232                                 ALU_INST(SQ_OP2_INST_DOT4),
2233                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2234                                 DST_GPR(2),
2235                                 DST_REL(ABSOLUTE),
2236                                 DST_ELEM(ELEM_Z),
2237                                 CLAMP(0));
2238
2239    /* 41 srcY.w DOT4 - non-mask */
2240    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2241                             SRC0_REL(ABSOLUTE),
2242                             SRC0_ELEM(ELEM_W),
2243                             SRC0_NEG(0),
2244                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2245                             SRC1_REL(ABSOLUTE),
2246                             SRC1_ELEM(ELEM_W),
2247                             SRC1_NEG(0),
2248                             INDEX_MODE(SQ_INDEX_LOOP),
2249                             PRED_SEL(SQ_PRED_SEL_OFF),
2250                             LAST(1));
2251    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2252                                 SRC1_ABS(0),
2253                                 UPDATE_EXECUTE_MASK(0),
2254                                 UPDATE_PRED(0),
2255                                 WRITE_MASK(0),
2256                                 OMOD(SQ_ALU_OMOD_OFF),
2257                                 ALU_INST(SQ_OP2_INST_DOT4),
2258                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2259                                 DST_GPR(2),
2260                                 DST_REL(ABSOLUTE),
2261                                 DST_ELEM(ELEM_W),
2262                                 CLAMP(0));
2263
2264    /* 42 srcX / w */
2265    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2266                             SRC0_REL(ABSOLUTE),
2267                             SRC0_ELEM(ELEM_X),
2268                             SRC0_NEG(0),
2269                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2270                             SRC1_REL(ABSOLUTE),
2271                             SRC1_ELEM(ELEM_W),
2272                             SRC1_NEG(0),
2273                             INDEX_MODE(SQ_INDEX_AR_X),
2274                             PRED_SEL(SQ_PRED_SEL_OFF),
2275                             LAST(1));
2276    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2277                                 SRC1_ABS(0),
2278                                 UPDATE_EXECUTE_MASK(0),
2279                                 UPDATE_PRED(0),
2280                                 WRITE_MASK(1),
2281                                 OMOD(SQ_ALU_OMOD_OFF),
2282                                 ALU_INST(SQ_OP2_INST_MUL),
2283                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2284                                 DST_GPR(0),
2285                                 DST_REL(ABSOLUTE),
2286                                 DST_ELEM(ELEM_X),
2287                                 CLAMP(0));
2288
2289    /* 43 srcY / h */
2290    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2291                             SRC0_REL(ABSOLUTE),
2292                             SRC0_ELEM(ELEM_Y),
2293                             SRC0_NEG(0),
2294                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2295                             SRC1_REL(ABSOLUTE),
2296                             SRC1_ELEM(ELEM_W),
2297                             SRC1_NEG(0),
2298                             INDEX_MODE(SQ_INDEX_AR_X),
2299                             PRED_SEL(SQ_PRED_SEL_OFF),
2300                             LAST(1));
2301    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2302                                 SRC1_ABS(0),
2303                                 UPDATE_EXECUTE_MASK(0),
2304                                 UPDATE_PRED(0),
2305                                 WRITE_MASK(1),
2306                                 OMOD(SQ_ALU_OMOD_OFF),
2307                                 ALU_INST(SQ_OP2_INST_MUL),
2308                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2309                                 DST_GPR(0),
2310                                 DST_REL(ABSOLUTE),
2311                                 DST_ELEM(ELEM_Y),
2312                                 CLAMP(0));
2313
2314    /* mask vfetch - 44/45 - dst */
2315    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2316			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2317			     FETCH_WHOLE_QUAD(0),
2318			     BUFFER_ID(0),
2319			     SRC_GPR(0),
2320			     SRC_REL(ABSOLUTE),
2321			     SRC_SEL_X(SQ_SEL_X),
2322			     MEGA_FETCH_COUNT(24));
2323    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2324				 DST_REL(0),
2325				 DST_SEL_X(SQ_SEL_X),
2326				 DST_SEL_Y(SQ_SEL_Y),
2327				 DST_SEL_Z(SQ_SEL_0),
2328				 DST_SEL_W(SQ_SEL_1),
2329				 USE_CONST_FIELDS(0),
2330				 DATA_FORMAT(FMT_32_32_FLOAT),
2331				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2332				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2333				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2334    shader[i++] = VTX_DWORD2(OFFSET(0),
2335#if X_BYTE_ORDER == X_BIG_ENDIAN
2336                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2337#else
2338                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2339#endif
2340			     CONST_BUF_NO_STRIDE(0),
2341			     MEGA_FETCH(1),
2342			     ALT_CONST(0),
2343			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2344    shader[i++] = VTX_DWORD_PAD;
2345    /* 46/47 - src */
2346    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2347			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2348			     FETCH_WHOLE_QUAD(0),
2349			     BUFFER_ID(0),
2350			     SRC_GPR(0),
2351			     SRC_REL(ABSOLUTE),
2352			     SRC_SEL_X(SQ_SEL_X),
2353			     MEGA_FETCH_COUNT(8));
2354    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2355				 DST_REL(0),
2356				 DST_SEL_X(SQ_SEL_X),
2357				 DST_SEL_Y(SQ_SEL_Y),
2358				 DST_SEL_Z(SQ_SEL_1),
2359				 DST_SEL_W(SQ_SEL_0),
2360				 USE_CONST_FIELDS(0),
2361				 DATA_FORMAT(FMT_32_32_FLOAT),
2362				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2363				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2364				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2365    shader[i++] = VTX_DWORD2(OFFSET(8),
2366#if X_BYTE_ORDER == X_BIG_ENDIAN
2367                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2368#else
2369                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2370#endif
2371			     CONST_BUF_NO_STRIDE(0),
2372			     MEGA_FETCH(0),
2373			     ALT_CONST(0),
2374			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2375    shader[i++] = VTX_DWORD_PAD;
2376    /* 48/49 - mask */
2377    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2378			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2379			     FETCH_WHOLE_QUAD(0),
2380			     BUFFER_ID(0),
2381			     SRC_GPR(0),
2382			     SRC_REL(ABSOLUTE),
2383			     SRC_SEL_X(SQ_SEL_X),
2384			     MEGA_FETCH_COUNT(8));
2385    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2386				 DST_REL(0),
2387				 DST_SEL_X(SQ_SEL_X),
2388				 DST_SEL_Y(SQ_SEL_Y),
2389				 DST_SEL_Z(SQ_SEL_1),
2390				 DST_SEL_W(SQ_SEL_0),
2391				 USE_CONST_FIELDS(0),
2392				 DATA_FORMAT(FMT_32_32_FLOAT),
2393				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2394				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2395				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2396    shader[i++] = VTX_DWORD2(OFFSET(16),
2397#if X_BYTE_ORDER == X_BIG_ENDIAN
2398                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2399#else
2400                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2401#endif
2402			     CONST_BUF_NO_STRIDE(0),
2403			     MEGA_FETCH(0),
2404			     ALT_CONST(0),
2405			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2406    shader[i++] = VTX_DWORD_PAD;
2407
2408    /* no mask vfetch - 50/51 - dst */
2409    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2410			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2411			     FETCH_WHOLE_QUAD(0),
2412			     BUFFER_ID(0),
2413			     SRC_GPR(0),
2414			     SRC_REL(ABSOLUTE),
2415			     SRC_SEL_X(SQ_SEL_X),
2416			     MEGA_FETCH_COUNT(16));
2417    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2418				 DST_REL(0),
2419				 DST_SEL_X(SQ_SEL_X),
2420				 DST_SEL_Y(SQ_SEL_Y),
2421				 DST_SEL_Z(SQ_SEL_0),
2422				 DST_SEL_W(SQ_SEL_1),
2423				 USE_CONST_FIELDS(0),
2424				 DATA_FORMAT(FMT_32_32_FLOAT),
2425				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2426				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2427				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2428    shader[i++] = VTX_DWORD2(OFFSET(0),
2429#if X_BYTE_ORDER == X_BIG_ENDIAN
2430                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2431#else
2432                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2433#endif
2434			     CONST_BUF_NO_STRIDE(0),
2435			     MEGA_FETCH(1),
2436			     ALT_CONST(0),
2437			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2438    shader[i++] = VTX_DWORD_PAD;
2439    /* 52/53 - src */
2440    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2441			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2442			     FETCH_WHOLE_QUAD(0),
2443			     BUFFER_ID(0),
2444			     SRC_GPR(0),
2445			     SRC_REL(ABSOLUTE),
2446			     SRC_SEL_X(SQ_SEL_X),
2447			     MEGA_FETCH_COUNT(8));
2448    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2449				 DST_REL(0),
2450				 DST_SEL_X(SQ_SEL_X),
2451				 DST_SEL_Y(SQ_SEL_Y),
2452				 DST_SEL_Z(SQ_SEL_1),
2453				 DST_SEL_W(SQ_SEL_0),
2454				 USE_CONST_FIELDS(0),
2455				 DATA_FORMAT(FMT_32_32_FLOAT),
2456				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2457				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2458				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2459    shader[i++] = VTX_DWORD2(OFFSET(8),
2460#if X_BYTE_ORDER == X_BIG_ENDIAN
2461                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2462#else
2463                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2464#endif
2465			     CONST_BUF_NO_STRIDE(0),
2466			     MEGA_FETCH(0),
2467                             ALT_CONST(0),
2468                             BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2469    shader[i++] = VTX_DWORD_PAD;
2470
2471    return i;
2472}
2473
2474/* comp ps --------------------------------------- */
2475int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t *shader)
2476{
2477    int i = 0;
2478
2479    /* 0 */
2480    /* call interp-fetch-mask if boolean1 == true */
2481    shader[i++] = CF_DWORD0(ADDR(11),
2482			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2483    shader[i++] = CF_DWORD1(POP_COUNT(0),
2484                            CF_CONST(1),
2485                            COND(SQ_CF_COND_BOOL),
2486                            I_COUNT(0),
2487                            VALID_PIXEL_MODE(0),
2488                            END_OF_PROGRAM(0),
2489                            CF_INST(SQ_CF_INST_CALL),
2490                            WHOLE_QUAD_MODE(0),
2491                            BARRIER(0));
2492
2493    /* 1 */
2494    /* call read-constant-mask if boolean1 == false */
2495    shader[i++] = CF_DWORD0(ADDR(14),
2496			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2497    shader[i++] = CF_DWORD1(POP_COUNT(0),
2498                            CF_CONST(1),
2499                            COND(SQ_CF_COND_NOT_BOOL),
2500                            I_COUNT(0),
2501                            VALID_PIXEL_MODE(0),
2502                            END_OF_PROGRAM(0),
2503                            CF_INST(SQ_CF_INST_CALL),
2504                            WHOLE_QUAD_MODE(0),
2505                            BARRIER(0));
2506
2507    /* 2 */
2508    /* call interp-fetch-src if boolean0 == true */
2509    shader[i++] = CF_DWORD0(ADDR(6),
2510			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2511    shader[i++] = CF_DWORD1(POP_COUNT(0),
2512                            CF_CONST(0),
2513                            COND(SQ_CF_COND_BOOL),
2514                            I_COUNT(0),
2515                            VALID_PIXEL_MODE(0),
2516                            END_OF_PROGRAM(0),
2517                            CF_INST(SQ_CF_INST_CALL),
2518                            WHOLE_QUAD_MODE(0),
2519                            BARRIER(0));
2520
2521    /* 3 */
2522    /* call read-constant-src if boolean0 == false */
2523    shader[i++] = CF_DWORD0(ADDR(9),
2524			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2525    shader[i++] = CF_DWORD1(POP_COUNT(0),
2526                            CF_CONST(0),
2527                            COND(SQ_CF_COND_NOT_BOOL),
2528                            I_COUNT(0),
2529                            VALID_PIXEL_MODE(0),
2530                            END_OF_PROGRAM(0),
2531                            CF_INST(SQ_CF_INST_CALL),
2532                            WHOLE_QUAD_MODE(0),
2533                            BARRIER(0));
2534    /* 4 */
2535    /* src IN mask (GPR2 := GPR1 .* GPR0) */
2536    shader[i++] = CF_ALU_DWORD0(ADDR(16),
2537				KCACHE_BANK0(0),
2538				KCACHE_BANK1(0),
2539				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2540    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2541				KCACHE_ADDR0(0),
2542				KCACHE_ADDR1(0),
2543				I_COUNT(4),
2544				ALT_CONST(0),
2545				CF_INST(SQ_CF_INST_ALU),
2546				WHOLE_QUAD_MODE(0),
2547				BARRIER(1));
2548
2549    /* 5 */
2550    /* export pixel data */
2551    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2552					  TYPE(SQ_EXPORT_PIXEL),
2553					  RW_GPR(0),
2554					  RW_REL(ABSOLUTE),
2555					  INDEX_GPR(0),
2556					  ELEM_SIZE(1));
2557    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2558					       SRC_SEL_Y(SQ_SEL_Y),
2559					       SRC_SEL_Z(SQ_SEL_Z),
2560					       SRC_SEL_W(SQ_SEL_W),
2561					       BURST_COUNT(1),
2562					       VALID_PIXEL_MODE(0),
2563					       END_OF_PROGRAM(1),
2564					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2565					       MARK(0),
2566					       BARRIER(1));
2567
2568    /* subroutine interp-fetch-src */
2569
2570    /* 6 */
2571    /* interpolate src */
2572    shader[i++] = CF_ALU_DWORD0(ADDR(20),
2573				KCACHE_BANK0(0),
2574				KCACHE_BANK1(0),
2575				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2576    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2577				KCACHE_ADDR0(0),
2578				KCACHE_ADDR1(0),
2579				I_COUNT(4),
2580				ALT_CONST(0),
2581				CF_INST(SQ_CF_INST_ALU),
2582				WHOLE_QUAD_MODE(0),
2583				BARRIER(1));
2584
2585    /* 7 */
2586    /* texture fetch src into GPR0 */
2587    shader[i++] = CF_DWORD0(ADDR(24),
2588			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2589    shader[i++] = CF_DWORD1(POP_COUNT(0),
2590			    CF_CONST(0),
2591			    COND(SQ_CF_COND_ACTIVE),
2592			    I_COUNT(1),
2593			    VALID_PIXEL_MODE(0),
2594			    END_OF_PROGRAM(0),
2595			    CF_INST(SQ_CF_INST_TC),
2596			    WHOLE_QUAD_MODE(0),
2597			    BARRIER(1));
2598
2599    /* 8 */
2600    /* return */
2601    shader[i++] = CF_DWORD0(ADDR(0),
2602			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2603    shader[i++] = CF_DWORD1(POP_COUNT(0),
2604			    CF_CONST(0),
2605			    COND(SQ_CF_COND_ACTIVE),
2606			    I_COUNT(0),
2607			    VALID_PIXEL_MODE(0),
2608			    END_OF_PROGRAM(0),
2609			    CF_INST(SQ_CF_INST_RETURN),
2610			    WHOLE_QUAD_MODE(0),
2611			    BARRIER(0));
2612
2613    /* subroutine read-constant-src */
2614
2615    /* 9 */
2616    /* read constants into GPR0 */
2617    shader[i++] = CF_ALU_DWORD0(ADDR(26),
2618				KCACHE_BANK0(0),
2619				KCACHE_BANK1(0),
2620				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2621    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2622				KCACHE_ADDR0(0),
2623				KCACHE_ADDR1(0),
2624				I_COUNT(4),
2625				ALT_CONST(1),
2626				CF_INST(SQ_CF_INST_ALU),
2627				WHOLE_QUAD_MODE(0),
2628				BARRIER(1));
2629
2630    /* 10 */
2631    /* return */
2632    shader[i++] = CF_DWORD0(ADDR(0),
2633			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2634    shader[i++] = CF_DWORD1(POP_COUNT(0),
2635			    CF_CONST(0),
2636			    COND(SQ_CF_COND_ACTIVE),
2637			    I_COUNT(0),
2638			    VALID_PIXEL_MODE(0),
2639			    END_OF_PROGRAM(0),
2640			    CF_INST(SQ_CF_INST_RETURN),
2641			    WHOLE_QUAD_MODE(0),
2642			    BARRIER(0));
2643
2644    /* subroutine interp-fetch-mask */
2645
2646    /* 11 */
2647    /* interpolate mask */
2648    shader[i++] = CF_ALU_DWORD0(ADDR(30),
2649				KCACHE_BANK0(0),
2650				KCACHE_BANK1(0),
2651				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2652    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2653				KCACHE_ADDR0(0),
2654				KCACHE_ADDR1(0),
2655				I_COUNT(4),
2656				ALT_CONST(0),
2657				CF_INST(SQ_CF_INST_ALU),
2658				WHOLE_QUAD_MODE(0),
2659				BARRIER(1));
2660
2661    /* 12 */
2662    /* texture fetch mask into GPR1 */
2663    shader[i++] = CF_DWORD0(ADDR(34),
2664			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2665    shader[i++] = CF_DWORD1(POP_COUNT(0),
2666			    CF_CONST(0),
2667			    COND(SQ_CF_COND_ACTIVE),
2668			    I_COUNT(1),
2669			    VALID_PIXEL_MODE(0),
2670			    END_OF_PROGRAM(0),
2671			    CF_INST(SQ_CF_INST_TC),
2672			    WHOLE_QUAD_MODE(0),
2673			    BARRIER(1));
2674
2675    /* 13 */
2676    /* return */
2677    shader[i++] = CF_DWORD0(ADDR(0),
2678			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2679    shader[i++] = CF_DWORD1(POP_COUNT(0),
2680			    CF_CONST(0),
2681			    COND(SQ_CF_COND_ACTIVE),
2682			    I_COUNT(0),
2683			    VALID_PIXEL_MODE(0),
2684			    END_OF_PROGRAM(0),
2685			    CF_INST(SQ_CF_INST_RETURN),
2686			    WHOLE_QUAD_MODE(0),
2687			    BARRIER(0));
2688
2689    /* subroutine read-constant-src */
2690
2691    /* 14 */
2692    /* read constants into GPR1 */
2693    shader[i++] = CF_ALU_DWORD0(ADDR(36),
2694				KCACHE_BANK0(0),
2695				KCACHE_BANK1(0),
2696				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2697    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2698				KCACHE_ADDR0(0),
2699				KCACHE_ADDR1(0),
2700				I_COUNT(4),
2701				ALT_CONST(1),
2702				CF_INST(SQ_CF_INST_ALU),
2703				WHOLE_QUAD_MODE(0),
2704				BARRIER(1));
2705
2706    /* 15 */
2707    /* return */
2708    shader[i++] = CF_DWORD0(ADDR(0),
2709			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2710    shader[i++] = CF_DWORD1(POP_COUNT(0),
2711			    CF_CONST(0),
2712			    COND(SQ_CF_COND_ACTIVE),
2713			    I_COUNT(0),
2714			    VALID_PIXEL_MODE(0),
2715			    END_OF_PROGRAM(0),
2716			    CF_INST(SQ_CF_INST_RETURN),
2717			    WHOLE_QUAD_MODE(0),
2718			    BARRIER(0));
2719
2720    /* ALU clauses */
2721
2722    /* 16 */
2723    /* MUL gpr[0].x gpr[0].x gpr[1].x */
2724    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2725			     SRC0_REL(ABSOLUTE),
2726			     SRC0_ELEM(ELEM_X),
2727			     SRC0_NEG(0),
2728			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2729			     SRC1_REL(ABSOLUTE),
2730			     SRC1_ELEM(ELEM_X),
2731			     SRC1_NEG(0),
2732			     INDEX_MODE(SQ_INDEX_LOOP),
2733			     PRED_SEL(SQ_PRED_SEL_OFF),
2734			     LAST(0));
2735    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2736				 SRC1_ABS(0),
2737				 UPDATE_EXECUTE_MASK(0),
2738				 UPDATE_PRED(0),
2739				 WRITE_MASK(1),
2740				 OMOD(SQ_ALU_OMOD_OFF),
2741				 ALU_INST(SQ_OP2_INST_MUL),
2742				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2743				 DST_GPR(0),
2744				 DST_REL(ABSOLUTE),
2745				 DST_ELEM(ELEM_X),
2746				 CLAMP(1));
2747
2748    /* 17 */
2749    /* MUL gpr[0].y gpr[0].y gpr[1].y */
2750    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2751			     SRC0_REL(ABSOLUTE),
2752			     SRC0_ELEM(ELEM_Y),
2753			     SRC0_NEG(0),
2754			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2755			     SRC1_REL(ABSOLUTE),
2756			     SRC1_ELEM(ELEM_Y),
2757			     SRC1_NEG(0),
2758			     INDEX_MODE(SQ_INDEX_LOOP),
2759			     PRED_SEL(SQ_PRED_SEL_OFF),
2760			     LAST(0));
2761    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2762				 SRC1_ABS(0),
2763				 UPDATE_EXECUTE_MASK(0),
2764				 UPDATE_PRED(0),
2765				 WRITE_MASK(1),
2766				 OMOD(SQ_ALU_OMOD_OFF),
2767				 ALU_INST(SQ_OP2_INST_MUL),
2768				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2769				 DST_GPR(0),
2770				 DST_REL(ABSOLUTE),
2771				 DST_ELEM(ELEM_Y),
2772				 CLAMP(1));
2773    /* 18 */
2774    /* MUL gpr[0].z gpr[0].z gpr[1].z */
2775    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2776			     SRC0_REL(ABSOLUTE),
2777			     SRC0_ELEM(ELEM_Z),
2778			     SRC0_NEG(0),
2779			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2780			     SRC1_REL(ABSOLUTE),
2781			     SRC1_ELEM(ELEM_Z),
2782			     SRC1_NEG(0),
2783			     INDEX_MODE(SQ_INDEX_LOOP),
2784			     PRED_SEL(SQ_PRED_SEL_OFF),
2785			     LAST(0));
2786    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2787				 SRC1_ABS(0),
2788				 UPDATE_EXECUTE_MASK(0),
2789				 UPDATE_PRED(0),
2790				 WRITE_MASK(1),
2791				 OMOD(SQ_ALU_OMOD_OFF),
2792				 ALU_INST(SQ_OP2_INST_MUL),
2793				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2794				 DST_GPR(0),
2795				 DST_REL(ABSOLUTE),
2796				 DST_ELEM(ELEM_Z),
2797				 CLAMP(1));
2798    /* 19 */
2799    /* MUL gpr[0].w gpr[0].w gpr[1].w */
2800    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2801			     SRC0_REL(ABSOLUTE),
2802			     SRC0_ELEM(ELEM_W),
2803			     SRC0_NEG(0),
2804			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2805			     SRC1_REL(ABSOLUTE),
2806			     SRC1_ELEM(ELEM_W),
2807			     SRC1_NEG(0),
2808			     INDEX_MODE(SQ_INDEX_LOOP),
2809			     PRED_SEL(SQ_PRED_SEL_OFF),
2810			     LAST(1));
2811    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2812				 SRC1_ABS(0),
2813				 UPDATE_EXECUTE_MASK(0),
2814				 UPDATE_PRED(0),
2815				 WRITE_MASK(1),
2816				 OMOD(SQ_ALU_OMOD_OFF),
2817				 ALU_INST(SQ_OP2_INST_MUL),
2818				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2819				 DST_GPR(0),
2820				 DST_REL(ABSOLUTE),
2821				 DST_ELEM(ELEM_W),
2822				 CLAMP(1));
2823
2824    /* 20 */
2825    /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */
2826    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2827			     SRC0_REL(ABSOLUTE),
2828			     SRC0_ELEM(ELEM_Y),
2829			     SRC0_NEG(0),
2830			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2831			     SRC1_REL(ABSOLUTE),
2832			     SRC1_ELEM(ELEM_X),
2833			     SRC1_NEG(0),
2834			     INDEX_MODE(SQ_INDEX_AR_X),
2835			     PRED_SEL(SQ_PRED_SEL_OFF),
2836			     LAST(0));
2837    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2838				 SRC1_ABS(0),
2839				 UPDATE_EXECUTE_MASK(0),
2840				 UPDATE_PRED(0),
2841				 WRITE_MASK(1),
2842				 OMOD(SQ_ALU_OMOD_OFF),
2843				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2844				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2845				 DST_GPR(0),
2846				 DST_REL(ABSOLUTE),
2847				 DST_ELEM(ELEM_X),
2848				 CLAMP(0));
2849    /* 21 */
2850    /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */
2851    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2852			     SRC0_REL(ABSOLUTE),
2853			     SRC0_ELEM(ELEM_X),
2854			     SRC0_NEG(0),
2855			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2856			     SRC1_REL(ABSOLUTE),
2857			     SRC1_ELEM(ELEM_X),
2858			     SRC1_NEG(0),
2859			     INDEX_MODE(SQ_INDEX_AR_X),
2860			     PRED_SEL(SQ_PRED_SEL_OFF),
2861			     LAST(0));
2862    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2863				 SRC1_ABS(0),
2864				 UPDATE_EXECUTE_MASK(0),
2865				 UPDATE_PRED(0),
2866				 WRITE_MASK(1),
2867				 OMOD(SQ_ALU_OMOD_OFF),
2868				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2869				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2870				 DST_GPR(0),
2871				 DST_REL(ABSOLUTE),
2872				 DST_ELEM(ELEM_Y),
2873				 CLAMP(0));
2874    /* 22 */
2875    /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */
2876    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2877			     SRC0_REL(ABSOLUTE),
2878			     SRC0_ELEM(ELEM_Y),
2879			     SRC0_NEG(0),
2880			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2881			     SRC1_REL(ABSOLUTE),
2882			     SRC1_ELEM(ELEM_X),
2883			     SRC1_NEG(0),
2884			     INDEX_MODE(SQ_INDEX_AR_X),
2885			     PRED_SEL(SQ_PRED_SEL_OFF),
2886			     LAST(0));
2887    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2888				 SRC1_ABS(0),
2889				 UPDATE_EXECUTE_MASK(0),
2890				 UPDATE_PRED(0),
2891				 WRITE_MASK(0),
2892				 OMOD(SQ_ALU_OMOD_OFF),
2893				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2894				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2895				 DST_GPR(0),
2896				 DST_REL(ABSOLUTE),
2897				 DST_ELEM(ELEM_Z),
2898				 CLAMP(0));
2899
2900    /* 23 */
2901    /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */
2902    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2903			     SRC0_REL(ABSOLUTE),
2904			     SRC0_ELEM(ELEM_X),
2905			     SRC0_NEG(0),
2906			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2907			     SRC1_REL(ABSOLUTE),
2908			     SRC1_ELEM(ELEM_X),
2909			     SRC1_NEG(0),
2910			     INDEX_MODE(SQ_INDEX_AR_X),
2911			     PRED_SEL(SQ_PRED_SEL_OFF),
2912			     LAST(1));
2913    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2914				 SRC1_ABS(0),
2915				 UPDATE_EXECUTE_MASK(0),
2916				 UPDATE_PRED(0),
2917				 WRITE_MASK(0),
2918				 OMOD(SQ_ALU_OMOD_OFF),
2919				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2920				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2921				 DST_GPR(0),
2922				 DST_REL(ABSOLUTE),
2923				 DST_ELEM(ELEM_W),
2924				 CLAMP(0));
2925
2926    /* 24/25 */
2927    /* SAMPLE RID=0 GPR0, GPR0 */
2928    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2929			     INST_MOD(0),
2930			     FETCH_WHOLE_QUAD(0),
2931			     RESOURCE_ID(0),
2932			     SRC_GPR(0),
2933			     SRC_REL(ABSOLUTE),
2934			     ALT_CONST(0),
2935			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
2936			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
2937    shader[i++] = TEX_DWORD1(DST_GPR(0),
2938			     DST_REL(ABSOLUTE),
2939			     DST_SEL_X(SQ_SEL_X),
2940			     DST_SEL_Y(SQ_SEL_Y),
2941			     DST_SEL_Z(SQ_SEL_Z),
2942			     DST_SEL_W(SQ_SEL_W),
2943			     LOD_BIAS(0),
2944			     COORD_TYPE_X(TEX_NORMALIZED),
2945			     COORD_TYPE_Y(TEX_NORMALIZED),
2946			     COORD_TYPE_Z(TEX_NORMALIZED),
2947			     COORD_TYPE_W(TEX_NORMALIZED));
2948    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2949			     OFFSET_Y(0),
2950			     OFFSET_Z(0),
2951			     SAMPLER_ID(0),
2952			     SRC_SEL_X(SQ_SEL_X),
2953			     SRC_SEL_Y(SQ_SEL_Y),
2954			     SRC_SEL_Z(SQ_SEL_0),
2955			     SRC_SEL_W(SQ_SEL_1));
2956    shader[i++] = TEX_DWORD_PAD;
2957
2958    /* 26 */
2959    /* MOV GPR0.x, KC4.x */
2960    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
2961			     SRC0_REL(ABSOLUTE),
2962			     SRC0_ELEM(ELEM_X),
2963			     SRC0_NEG(0),
2964			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2965			     SRC1_REL(ABSOLUTE),
2966			     SRC1_ELEM(ELEM_X),
2967			     SRC1_NEG(0),
2968			     INDEX_MODE(SQ_INDEX_AR_X),
2969			     PRED_SEL(SQ_PRED_SEL_OFF),
2970			     LAST(0));
2971    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2972				 SRC1_ABS(0),
2973				 UPDATE_EXECUTE_MASK(0),
2974				 UPDATE_PRED(0),
2975				 WRITE_MASK(1),
2976				 OMOD(SQ_ALU_OMOD_OFF),
2977				 ALU_INST(SQ_OP2_INST_MOV),
2978				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2979				 DST_GPR(0),
2980				 DST_REL(ABSOLUTE),
2981				 DST_ELEM(ELEM_X),
2982				 CLAMP(1));
2983
2984    /* 27 */
2985    /* MOV GPR0.y, KC4.y */
2986    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
2987			     SRC0_REL(ABSOLUTE),
2988			     SRC0_ELEM(ELEM_Y),
2989			     SRC0_NEG(0),
2990			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2991			     SRC1_REL(ABSOLUTE),
2992			     SRC1_ELEM(ELEM_X),
2993			     SRC1_NEG(0),
2994			     INDEX_MODE(SQ_INDEX_AR_X),
2995			     PRED_SEL(SQ_PRED_SEL_OFF),
2996			     LAST(0));
2997    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2998				 SRC1_ABS(0),
2999				 UPDATE_EXECUTE_MASK(0),
3000				 UPDATE_PRED(0),
3001				 WRITE_MASK(1),
3002				 OMOD(SQ_ALU_OMOD_OFF),
3003				 ALU_INST(SQ_OP2_INST_MOV),
3004				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3005				 DST_GPR(0),
3006				 DST_REL(ABSOLUTE),
3007				 DST_ELEM(ELEM_Y),
3008				 CLAMP(1));
3009
3010    /* 28  */
3011    /* MOV GPR0.z, KC4.z */
3012    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
3013			     SRC0_REL(ABSOLUTE),
3014			     SRC0_ELEM(ELEM_Z),
3015			     SRC0_NEG(0),
3016			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3017			     SRC1_REL(ABSOLUTE),
3018			     SRC1_ELEM(ELEM_X),
3019			     SRC1_NEG(0),
3020			     INDEX_MODE(SQ_INDEX_AR_X),
3021			     PRED_SEL(SQ_PRED_SEL_OFF),
3022			     LAST(0));
3023    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3024				 SRC1_ABS(0),
3025				 UPDATE_EXECUTE_MASK(0),
3026				 UPDATE_PRED(0),
3027				 WRITE_MASK(1),
3028				 OMOD(SQ_ALU_OMOD_OFF),
3029				 ALU_INST(SQ_OP2_INST_MOV),
3030				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3031				 DST_GPR(0),
3032				 DST_REL(ABSOLUTE),
3033				 DST_ELEM(ELEM_Z),
3034				 CLAMP(1));
3035
3036    /* 29 */
3037    /* MOV GPR0.w, KC4.w */
3038    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
3039			     SRC0_REL(ABSOLUTE),
3040			     SRC0_ELEM(ELEM_W),
3041			     SRC0_NEG(0),
3042			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3043			     SRC1_REL(ABSOLUTE),
3044			     SRC1_ELEM(ELEM_X),
3045			     SRC1_NEG(0),
3046			     INDEX_MODE(SQ_INDEX_AR_X),
3047			     PRED_SEL(SQ_PRED_SEL_OFF),
3048			     LAST(1));
3049    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3050				 SRC1_ABS(0),
3051				 UPDATE_EXECUTE_MASK(0),
3052				 UPDATE_PRED(0),
3053				 WRITE_MASK(1),
3054				 OMOD(SQ_ALU_OMOD_OFF),
3055				 ALU_INST(SQ_OP2_INST_MOV),
3056				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3057				 DST_GPR(0),
3058				 DST_REL(ABSOLUTE),
3059				 DST_ELEM(ELEM_W),
3060				 CLAMP(1));
3061
3062    /* 30 */
3063    /* INTERP_XY GPR1.x, PARAM1 */
3064    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3065			     SRC0_REL(ABSOLUTE),
3066			     SRC0_ELEM(ELEM_Y),
3067			     SRC0_NEG(0),
3068			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3069			     SRC1_REL(ABSOLUTE),
3070			     SRC1_ELEM(ELEM_X),
3071			     SRC1_NEG(0),
3072			     INDEX_MODE(SQ_INDEX_AR_X),
3073			     PRED_SEL(SQ_PRED_SEL_OFF),
3074			     LAST(0));
3075    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3076				 SRC1_ABS(0),
3077				 UPDATE_EXECUTE_MASK(0),
3078				 UPDATE_PRED(0),
3079				 WRITE_MASK(1),
3080				 OMOD(SQ_ALU_OMOD_OFF),
3081				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3082				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3083				 DST_GPR(1),
3084				 DST_REL(ABSOLUTE),
3085				 DST_ELEM(ELEM_X),
3086				 CLAMP(0));
3087    /* 31 */
3088    /* INTERP_XY GPR1.y, PARAM1 */
3089    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3090			     SRC0_REL(ABSOLUTE),
3091			     SRC0_ELEM(ELEM_X),
3092			     SRC0_NEG(0),
3093			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3094			     SRC1_REL(ABSOLUTE),
3095			     SRC1_ELEM(ELEM_X),
3096			     SRC1_NEG(0),
3097			     INDEX_MODE(SQ_INDEX_AR_X),
3098			     PRED_SEL(SQ_PRED_SEL_OFF),
3099			     LAST(0));
3100    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3101				 SRC1_ABS(0),
3102				 UPDATE_EXECUTE_MASK(0),
3103				 UPDATE_PRED(0),
3104				 WRITE_MASK(1),
3105				 OMOD(SQ_ALU_OMOD_OFF),
3106				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3107				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3108				 DST_GPR(1),
3109				 DST_REL(ABSOLUTE),
3110				 DST_ELEM(ELEM_Y),
3111				 CLAMP(0));
3112    /* 32 */
3113    /* INTERP_XY GPR1.z, PARAM1 */
3114    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3115			     SRC0_REL(ABSOLUTE),
3116			     SRC0_ELEM(ELEM_Y),
3117			     SRC0_NEG(0),
3118			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3119			     SRC1_REL(ABSOLUTE),
3120			     SRC1_ELEM(ELEM_X),
3121			     SRC1_NEG(0),
3122			     INDEX_MODE(SQ_INDEX_AR_X),
3123			     PRED_SEL(SQ_PRED_SEL_OFF),
3124			     LAST(0));
3125    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3126				 SRC1_ABS(0),
3127				 UPDATE_EXECUTE_MASK(0),
3128				 UPDATE_PRED(0),
3129				 WRITE_MASK(0),
3130				 OMOD(SQ_ALU_OMOD_OFF),
3131				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3132				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3133				 DST_GPR(1),
3134				 DST_REL(ABSOLUTE),
3135				 DST_ELEM(ELEM_Z),
3136				 CLAMP(0));
3137    /* 33 */
3138    /* INTERP_XY GPR1.w, PARAM1 */
3139    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3140			     SRC0_REL(ABSOLUTE),
3141			     SRC0_ELEM(ELEM_X),
3142			     SRC0_NEG(0),
3143			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3144			     SRC1_REL(ABSOLUTE),
3145			     SRC1_ELEM(ELEM_X),
3146			     SRC1_NEG(0),
3147			     INDEX_MODE(SQ_INDEX_AR_X),
3148			     PRED_SEL(SQ_PRED_SEL_OFF),
3149			     LAST(1));
3150    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3151				 SRC1_ABS(0),
3152				 UPDATE_EXECUTE_MASK(0),
3153				 UPDATE_PRED(0),
3154				 WRITE_MASK(0),
3155				 OMOD(SQ_ALU_OMOD_OFF),
3156				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3157				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3158				 DST_GPR(1),
3159				 DST_REL(ABSOLUTE),
3160				 DST_ELEM(ELEM_W),
3161				 CLAMP(0));
3162
3163    /* 34/35 */
3164    /* SAMPLE RID=1 GPR1, GPR1 */
3165    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3166			     INST_MOD(0),
3167			     FETCH_WHOLE_QUAD(0),
3168			     RESOURCE_ID(1),
3169			     SRC_GPR(1),
3170			     SRC_REL(ABSOLUTE),
3171			     ALT_CONST(0),
3172			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3173			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3174    shader[i++] = TEX_DWORD1(DST_GPR(1),
3175			     DST_REL(ABSOLUTE),
3176			     DST_SEL_X(SQ_SEL_X),
3177			     DST_SEL_Y(SQ_SEL_Y),
3178			     DST_SEL_Z(SQ_SEL_Z),
3179			     DST_SEL_W(SQ_SEL_W),
3180			     LOD_BIAS(0),
3181			     COORD_TYPE_X(TEX_NORMALIZED),
3182			     COORD_TYPE_Y(TEX_NORMALIZED),
3183			     COORD_TYPE_Z(TEX_NORMALIZED),
3184			     COORD_TYPE_W(TEX_NORMALIZED));
3185    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3186			     OFFSET_Y(0),
3187			     OFFSET_Z(0),
3188			     SAMPLER_ID(1),
3189			     SRC_SEL_X(SQ_SEL_X),
3190			     SRC_SEL_Y(SQ_SEL_Y),
3191			     SRC_SEL_Z(SQ_SEL_0),
3192			     SRC_SEL_W(SQ_SEL_1));
3193    shader[i++] = TEX_DWORD_PAD;
3194
3195    /* 36 */
3196    /* MOV GPR1.x, KC5.x */
3197    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3198			     SRC0_REL(ABSOLUTE),
3199			     SRC0_ELEM(ELEM_X),
3200			     SRC0_NEG(0),
3201			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3202			     SRC1_REL(ABSOLUTE),
3203			     SRC1_ELEM(ELEM_X),
3204			     SRC1_NEG(0),
3205			     INDEX_MODE(SQ_INDEX_AR_X),
3206			     PRED_SEL(SQ_PRED_SEL_OFF),
3207			     LAST(0));
3208    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3209				 SRC1_ABS(0),
3210				 UPDATE_EXECUTE_MASK(0),
3211				 UPDATE_PRED(0),
3212				 WRITE_MASK(1),
3213				 OMOD(SQ_ALU_OMOD_OFF),
3214				 ALU_INST(SQ_OP2_INST_MOV),
3215				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3216				 DST_GPR(1),
3217				 DST_REL(ABSOLUTE),
3218				 DST_ELEM(ELEM_X),
3219				 CLAMP(1));
3220
3221    /* 37 */
3222    /* MOV GPR1.y, KC5.y */
3223    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3224			     SRC0_REL(ABSOLUTE),
3225			     SRC0_ELEM(ELEM_Y),
3226			     SRC0_NEG(0),
3227			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3228			     SRC1_REL(ABSOLUTE),
3229			     SRC1_ELEM(ELEM_X),
3230			     SRC1_NEG(0),
3231			     INDEX_MODE(SQ_INDEX_AR_X),
3232			     PRED_SEL(SQ_PRED_SEL_OFF),
3233			     LAST(0));
3234    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3235				 SRC1_ABS(0),
3236				 UPDATE_EXECUTE_MASK(0),
3237				 UPDATE_PRED(0),
3238				 WRITE_MASK(1),
3239				 OMOD(SQ_ALU_OMOD_OFF),
3240				 ALU_INST(SQ_OP2_INST_MOV),
3241				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3242				 DST_GPR(1),
3243				 DST_REL(ABSOLUTE),
3244				 DST_ELEM(ELEM_Y),
3245				 CLAMP(1));
3246
3247    /* 38 */
3248    /* MOV GPR1.z, KC5.z */
3249    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3250			     SRC0_REL(ABSOLUTE),
3251			     SRC0_ELEM(ELEM_Z),
3252			     SRC0_NEG(0),
3253			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3254			     SRC1_REL(ABSOLUTE),
3255			     SRC1_ELEM(ELEM_X),
3256			     SRC1_NEG(0),
3257			     INDEX_MODE(SQ_INDEX_AR_X),
3258			     PRED_SEL(SQ_PRED_SEL_OFF),
3259			     LAST(0));
3260    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3261				 SRC1_ABS(0),
3262				 UPDATE_EXECUTE_MASK(0),
3263				 UPDATE_PRED(0),
3264				 WRITE_MASK(1),
3265				 OMOD(SQ_ALU_OMOD_OFF),
3266				 ALU_INST(SQ_OP2_INST_MOV),
3267				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3268				 DST_GPR(1),
3269				 DST_REL(ABSOLUTE),
3270				 DST_ELEM(ELEM_Z),
3271				 CLAMP(1));
3272
3273    /* 39 */
3274    /* MOV GPR1.w, KC5.w */
3275    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3276			     SRC0_REL(ABSOLUTE),
3277			     SRC0_ELEM(ELEM_W),
3278			     SRC0_NEG(0),
3279			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3280			     SRC1_REL(ABSOLUTE),
3281			     SRC1_ELEM(ELEM_X),
3282			     SRC1_NEG(0),
3283			     INDEX_MODE(SQ_INDEX_AR_X),
3284			     PRED_SEL(SQ_PRED_SEL_OFF),
3285			     LAST(1));
3286    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3287				 SRC1_ABS(0),
3288				 UPDATE_EXECUTE_MASK(0),
3289				 UPDATE_PRED(0),
3290				 WRITE_MASK(1),
3291				 OMOD(SQ_ALU_OMOD_OFF),
3292				 ALU_INST(SQ_OP2_INST_MOV),
3293				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3294				 DST_GPR(1),
3295				 DST_REL(ABSOLUTE),
3296				 DST_ELEM(ELEM_W),
3297				 CLAMP(1));
3298
3299    return i;
3300}
3301