evergreen_shader.c revision 921a55d8
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#ifdef XF86DRM_MODE
32
33#include "xf86.h"
34
35#include "evergreen_shader.h"
36#include "evergreen_reg.h"
37
38/* solid vs --------------------------------------- */
39int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
40{
41    int i = 0;
42
43    /* 0 */
44    shader[i++] = CF_DWORD0(ADDR(4),
45			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
46    shader[i++] = CF_DWORD1(POP_COUNT(0),
47			    CF_CONST(0),
48			    COND(SQ_CF_COND_ACTIVE),
49			    I_COUNT(1),
50			    VALID_PIXEL_MODE(0),
51			    END_OF_PROGRAM(0),
52			    CF_INST(SQ_CF_INST_VC),
53			    WHOLE_QUAD_MODE(0),
54			    BARRIER(1));
55    /* 1 */
56    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
57					  TYPE(SQ_EXPORT_POS),
58					  RW_GPR(1),
59					  RW_REL(ABSOLUTE),
60					  INDEX_GPR(0),
61					  ELEM_SIZE(0));
62    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
63					       SRC_SEL_Y(SQ_SEL_Y),
64					       SRC_SEL_Z(SQ_SEL_Z),
65					       SRC_SEL_W(SQ_SEL_W),
66					       BURST_COUNT(1),
67					       VALID_PIXEL_MODE(0),
68					       END_OF_PROGRAM(0),
69					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70					       MARK(0),
71					       BARRIER(1));
72    /* 2 - always export a param whether it's used or not */
73    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74					  TYPE(SQ_EXPORT_PARAM),
75					  RW_GPR(0),
76					  RW_REL(ABSOLUTE),
77					  INDEX_GPR(0),
78					  ELEM_SIZE(0));
79    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80					       SRC_SEL_Y(SQ_SEL_Y),
81					       SRC_SEL_Z(SQ_SEL_Z),
82					       SRC_SEL_W(SQ_SEL_W),
83					       BURST_COUNT(0),
84					       VALID_PIXEL_MODE(0),
85					       END_OF_PROGRAM(1),
86					       CF_INST(SQ_CF_INST_EXPORT_DONE),
87					       MARK(0),
88					       BARRIER(0));
89    /* 3 - padding */
90    shader[i++] = 0x00000000;
91    shader[i++] = 0x00000000;
92    /* 4/5 */
93    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
94			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
95			     FETCH_WHOLE_QUAD(0),
96			     BUFFER_ID(0),
97			     SRC_GPR(0),
98			     SRC_REL(ABSOLUTE),
99			     SRC_SEL_X(SQ_SEL_X),
100			     MEGA_FETCH_COUNT(8));
101    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
102				 DST_REL(0),
103				 DST_SEL_X(SQ_SEL_X),
104				 DST_SEL_Y(SQ_SEL_Y),
105				 DST_SEL_Z(SQ_SEL_0),
106				 DST_SEL_W(SQ_SEL_1),
107				 USE_CONST_FIELDS(0),
108				 DATA_FORMAT(FMT_32_32_FLOAT),
109				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
110				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
111				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
112    shader[i++] = VTX_DWORD2(OFFSET(0),
113			     ENDIAN_SWAP(ENDIAN_NONE),
114			     CONST_BUF_NO_STRIDE(0),
115			     MEGA_FETCH(1),
116			     ALT_CONST(0),
117			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
118    shader[i++] = VTX_DWORD_PAD;
119
120    return i;
121}
122
123/* solid ps --------------------------------------- */
124int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
125{
126    int i = 0;
127
128    /* 0 */
129    shader[i++] = CF_ALU_DWORD0(ADDR(2),
130				KCACHE_BANK0(0),
131				KCACHE_BANK1(0),
132				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
133    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
134				KCACHE_ADDR0(0),
135				KCACHE_ADDR1(0),
136				I_COUNT(4),
137				ALT_CONST(0),
138				CF_INST(SQ_CF_INST_ALU),
139				WHOLE_QUAD_MODE(0),
140				BARRIER(1));
141    /* 1 */
142    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
143					  TYPE(SQ_EXPORT_PIXEL),
144					  RW_GPR(0),
145					  RW_REL(ABSOLUTE),
146					  INDEX_GPR(0),
147					  ELEM_SIZE(1));
148    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
149					       SRC_SEL_Y(SQ_SEL_Y),
150					       SRC_SEL_Z(SQ_SEL_Z),
151					       SRC_SEL_W(SQ_SEL_W),
152					       BURST_COUNT(1),
153					       VALID_PIXEL_MODE(0),
154					       END_OF_PROGRAM(1),
155					       CF_INST(SQ_CF_INST_EXPORT_DONE),
156					       MARK(0),
157					       BARRIER(1));
158
159    /* 2 */
160    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
161			     SRC0_REL(ABSOLUTE),
162			     SRC0_ELEM(ELEM_X),
163			     SRC0_NEG(0),
164			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
165			     SRC1_REL(ABSOLUTE),
166			     SRC1_ELEM(ELEM_X),
167			     SRC1_NEG(0),
168			     INDEX_MODE(SQ_INDEX_AR_X),
169			     PRED_SEL(SQ_PRED_SEL_OFF),
170			     LAST(0));
171    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
172				 SRC1_ABS(0),
173				 UPDATE_EXECUTE_MASK(0),
174				 UPDATE_PRED(0),
175				 WRITE_MASK(1),
176				 OMOD(SQ_ALU_OMOD_OFF),
177				 ALU_INST(SQ_OP2_INST_MOV),
178				 BANK_SWIZZLE(SQ_ALU_VEC_012),
179				 DST_GPR(0),
180				 DST_REL(ABSOLUTE),
181				 DST_ELEM(ELEM_X),
182				 CLAMP(1));
183    /* 3 */
184    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
185			     SRC0_REL(ABSOLUTE),
186			     SRC0_ELEM(ELEM_Y),
187			     SRC0_NEG(0),
188			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
189			     SRC1_REL(ABSOLUTE),
190			     SRC1_ELEM(ELEM_Y),
191			     SRC1_NEG(0),
192			     INDEX_MODE(SQ_INDEX_AR_X),
193			     PRED_SEL(SQ_PRED_SEL_OFF),
194			     LAST(0));
195    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
196				 SRC1_ABS(0),
197				 UPDATE_EXECUTE_MASK(0),
198				 UPDATE_PRED(0),
199				 WRITE_MASK(1),
200				 OMOD(SQ_ALU_OMOD_OFF),
201				 ALU_INST(SQ_OP2_INST_MOV),
202				 BANK_SWIZZLE(SQ_ALU_VEC_012),
203				 DST_GPR(0),
204				 DST_REL(ABSOLUTE),
205				 DST_ELEM(ELEM_Y),
206				 CLAMP(1));
207    /* 4 */
208    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
209			     SRC0_REL(ABSOLUTE),
210			     SRC0_ELEM(ELEM_Z),
211			     SRC0_NEG(0),
212			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
213			     SRC1_REL(ABSOLUTE),
214			     SRC1_ELEM(ELEM_Z),
215			     SRC1_NEG(0),
216			     INDEX_MODE(SQ_INDEX_AR_X),
217			     PRED_SEL(SQ_PRED_SEL_OFF),
218			     LAST(0));
219    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
220				 SRC1_ABS(0),
221				 UPDATE_EXECUTE_MASK(0),
222				 UPDATE_PRED(0),
223				 WRITE_MASK(1),
224				 OMOD(SQ_ALU_OMOD_OFF),
225				 ALU_INST(SQ_OP2_INST_MOV),
226				 BANK_SWIZZLE(SQ_ALU_VEC_012),
227				 DST_GPR(0),
228				 DST_REL(ABSOLUTE),
229				 DST_ELEM(ELEM_Z),
230				 CLAMP(1));
231    /* 5 */
232    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
233			     SRC0_REL(ABSOLUTE),
234			     SRC0_ELEM(ELEM_W),
235			     SRC0_NEG(0),
236			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
237			     SRC1_REL(ABSOLUTE),
238			     SRC1_ELEM(ELEM_W),
239			     SRC1_NEG(0),
240			     INDEX_MODE(SQ_INDEX_AR_X),
241			     PRED_SEL(SQ_PRED_SEL_OFF),
242			     LAST(1));
243    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
244				 SRC1_ABS(0),
245				 UPDATE_EXECUTE_MASK(0),
246				 UPDATE_PRED(0),
247				 WRITE_MASK(1),
248				 OMOD(SQ_ALU_OMOD_OFF),
249				 ALU_INST(SQ_OP2_INST_MOV),
250				 BANK_SWIZZLE(SQ_ALU_VEC_012),
251				 DST_GPR(0),
252				 DST_REL(ABSOLUTE),
253				 DST_ELEM(ELEM_W),
254				 CLAMP(1));
255
256    return i;
257}
258
259/* copy vs --------------------------------------- */
260int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
261{
262    int i = 0;
263
264    /* 0 */
265    shader[i++] = CF_DWORD0(ADDR(4),
266			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
267    shader[i++] = CF_DWORD1(POP_COUNT(0),
268			    CF_CONST(0),
269			    COND(SQ_CF_COND_ACTIVE),
270			    I_COUNT(2),
271			    VALID_PIXEL_MODE(0),
272			    END_OF_PROGRAM(0),
273			    CF_INST(SQ_CF_INST_VC),
274			    WHOLE_QUAD_MODE(0),
275			    BARRIER(1));
276    /* 1 */
277    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
278					  TYPE(SQ_EXPORT_POS),
279					  RW_GPR(1),
280					  RW_REL(ABSOLUTE),
281					  INDEX_GPR(0),
282					  ELEM_SIZE(0));
283    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
284					       SRC_SEL_Y(SQ_SEL_Y),
285					       SRC_SEL_Z(SQ_SEL_Z),
286					       SRC_SEL_W(SQ_SEL_W),
287					       BURST_COUNT(0),
288					       VALID_PIXEL_MODE(0),
289					       END_OF_PROGRAM(0),
290					       CF_INST(SQ_CF_INST_EXPORT_DONE),
291					       MARK(0),
292					       BARRIER(1));
293    /* 2 */
294    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
295					  TYPE(SQ_EXPORT_PARAM),
296					  RW_GPR(0),
297					  RW_REL(ABSOLUTE),
298					  INDEX_GPR(0),
299					  ELEM_SIZE(0));
300    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
301					       SRC_SEL_Y(SQ_SEL_Y),
302					       SRC_SEL_Z(SQ_SEL_Z),
303					       SRC_SEL_W(SQ_SEL_W),
304					       BURST_COUNT(0),
305					       VALID_PIXEL_MODE(0),
306					       END_OF_PROGRAM(1),
307					       CF_INST(SQ_CF_INST_EXPORT_DONE),
308					       MARK(0),
309					       BARRIER(0));
310    /* 3 */
311    shader[i++] = 0x00000000;
312    shader[i++] = 0x00000000;
313    /* 4/5 */
314    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
315			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
316			     FETCH_WHOLE_QUAD(0),
317			     BUFFER_ID(0),
318			     SRC_GPR(0),
319			     SRC_REL(ABSOLUTE),
320			     SRC_SEL_X(SQ_SEL_X),
321			     MEGA_FETCH_COUNT(16));
322    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
323				 DST_REL(0),
324				 DST_SEL_X(SQ_SEL_X),
325				 DST_SEL_Y(SQ_SEL_Y),
326				 DST_SEL_Z(SQ_SEL_0),
327				 DST_SEL_W(SQ_SEL_1),
328				 USE_CONST_FIELDS(0),
329				 DATA_FORMAT(FMT_32_32_FLOAT),
330				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
331				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
332				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
333    shader[i++] = VTX_DWORD2(OFFSET(0),
334			     ENDIAN_SWAP(ENDIAN_NONE),
335			     CONST_BUF_NO_STRIDE(0),
336			     MEGA_FETCH(1),
337			     ALT_CONST(0),
338			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
339    shader[i++] = VTX_DWORD_PAD;
340    /* 6/7 */
341    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
342			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
343			     FETCH_WHOLE_QUAD(0),
344			     BUFFER_ID(0),
345			     SRC_GPR(0),
346			     SRC_REL(ABSOLUTE),
347			     SRC_SEL_X(SQ_SEL_X),
348			     MEGA_FETCH_COUNT(8));
349    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
350				 DST_REL(0),
351				 DST_SEL_X(SQ_SEL_X),
352				 DST_SEL_Y(SQ_SEL_Y),
353				 DST_SEL_Z(SQ_SEL_0),
354				 DST_SEL_W(SQ_SEL_1),
355				 USE_CONST_FIELDS(0),
356				 DATA_FORMAT(FMT_32_32_FLOAT),
357				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
358				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
359				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
360    shader[i++] = VTX_DWORD2(OFFSET(8),
361			     ENDIAN_SWAP(ENDIAN_NONE),
362			     CONST_BUF_NO_STRIDE(0),
363			     MEGA_FETCH(0),
364			     ALT_CONST(0),
365			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
366    shader[i++] = VTX_DWORD_PAD;
367
368    return i;
369}
370
371/* copy ps --------------------------------------- */
372int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
373{
374    int i = 0;
375
376    /* CF INST 0 */
377    shader[i++] = CF_ALU_DWORD0(ADDR(3),
378				KCACHE_BANK0(0),
379				KCACHE_BANK1(0),
380				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
381    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
382				KCACHE_ADDR0(0),
383				KCACHE_ADDR1(0),
384				I_COUNT(4),
385				ALT_CONST(0),
386				CF_INST(SQ_CF_INST_ALU),
387				WHOLE_QUAD_MODE(0),
388				BARRIER(1));
389    /* CF INST 1 */
390    shader[i++] = CF_DWORD0(ADDR(8),
391			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
392    shader[i++] = CF_DWORD1(POP_COUNT(0),
393			    CF_CONST(0),
394			    COND(SQ_CF_COND_ACTIVE),
395			    I_COUNT(1),
396			    VALID_PIXEL_MODE(0),
397			    END_OF_PROGRAM(0),
398			    CF_INST(SQ_CF_INST_TC),
399			    WHOLE_QUAD_MODE(0),
400			    BARRIER(1));
401    /* CF INST 2 */
402    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
403					  TYPE(SQ_EXPORT_PIXEL),
404					  RW_GPR(0),
405					  RW_REL(ABSOLUTE),
406					  INDEX_GPR(0),
407					  ELEM_SIZE(1));
408    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
409					       SRC_SEL_Y(SQ_SEL_Y),
410					       SRC_SEL_Z(SQ_SEL_Z),
411					       SRC_SEL_W(SQ_SEL_W),
412					       BURST_COUNT(1),
413					       VALID_PIXEL_MODE(0),
414					       END_OF_PROGRAM(1),
415					       CF_INST(SQ_CF_INST_EXPORT_DONE),
416					       MARK(0),
417					       BARRIER(1));
418
419    /* 3 interpolate tex coords */
420    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
421			     SRC0_REL(ABSOLUTE),
422			     SRC0_ELEM(ELEM_Y),
423			     SRC0_NEG(0),
424			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
425			     SRC1_REL(ABSOLUTE),
426			     SRC1_ELEM(ELEM_X),
427			     SRC1_NEG(0),
428			     INDEX_MODE(SQ_INDEX_AR_X),
429			     PRED_SEL(SQ_PRED_SEL_OFF),
430			     LAST(0));
431    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
432				 SRC1_ABS(0),
433				 UPDATE_EXECUTE_MASK(0),
434				 UPDATE_PRED(0),
435				 WRITE_MASK(1),
436				 OMOD(SQ_ALU_OMOD_OFF),
437				 ALU_INST(SQ_OP2_INST_INTERP_XY),
438				 BANK_SWIZZLE(SQ_ALU_VEC_210),
439				 DST_GPR(0),
440				 DST_REL(ABSOLUTE),
441				 DST_ELEM(ELEM_X),
442				 CLAMP(0));
443    /* 4 */
444    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
445			     SRC0_REL(ABSOLUTE),
446			     SRC0_ELEM(ELEM_X),
447			     SRC0_NEG(0),
448			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
449			     SRC1_REL(ABSOLUTE),
450			     SRC1_ELEM(ELEM_X),
451			     SRC1_NEG(0),
452			     INDEX_MODE(SQ_INDEX_AR_X),
453			     PRED_SEL(SQ_PRED_SEL_OFF),
454			     LAST(0));
455    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
456				 SRC1_ABS(0),
457				 UPDATE_EXECUTE_MASK(0),
458				 UPDATE_PRED(0),
459				 WRITE_MASK(1),
460				 OMOD(SQ_ALU_OMOD_OFF),
461				 ALU_INST(SQ_OP2_INST_INTERP_XY),
462				 BANK_SWIZZLE(SQ_ALU_VEC_210),
463				 DST_GPR(0),
464				 DST_REL(ABSOLUTE),
465				 DST_ELEM(ELEM_Y),
466				 CLAMP(0));
467    /* 5 */
468    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
469			     SRC0_REL(ABSOLUTE),
470			     SRC0_ELEM(ELEM_Y),
471			     SRC0_NEG(0),
472			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
473			     SRC1_REL(ABSOLUTE),
474			     SRC1_ELEM(ELEM_X),
475			     SRC1_NEG(0),
476			     INDEX_MODE(SQ_INDEX_AR_X),
477			     PRED_SEL(SQ_PRED_SEL_OFF),
478			     LAST(0));
479    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
480				 SRC1_ABS(0),
481				 UPDATE_EXECUTE_MASK(0),
482				 UPDATE_PRED(0),
483				 WRITE_MASK(0),
484				 OMOD(SQ_ALU_OMOD_OFF),
485				 ALU_INST(SQ_OP2_INST_INTERP_XY),
486				 BANK_SWIZZLE(SQ_ALU_VEC_210),
487				 DST_GPR(0),
488				 DST_REL(ABSOLUTE),
489				 DST_ELEM(ELEM_Z),
490				 CLAMP(0));
491    /* 6 */
492    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
493			     SRC0_REL(ABSOLUTE),
494			     SRC0_ELEM(ELEM_X),
495			     SRC0_NEG(0),
496			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
497			     SRC1_REL(ABSOLUTE),
498			     SRC1_ELEM(ELEM_X),
499			     SRC1_NEG(0),
500			     INDEX_MODE(SQ_INDEX_AR_X),
501			     PRED_SEL(SQ_PRED_SEL_OFF),
502			     LAST(1));
503    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
504				 SRC1_ABS(0),
505				 UPDATE_EXECUTE_MASK(0),
506				 UPDATE_PRED(0),
507				 WRITE_MASK(0),
508				 OMOD(SQ_ALU_OMOD_OFF),
509				 ALU_INST(SQ_OP2_INST_INTERP_XY),
510				 BANK_SWIZZLE(SQ_ALU_VEC_210),
511				 DST_GPR(0),
512				 DST_REL(ABSOLUTE),
513				 DST_ELEM(ELEM_W),
514				 CLAMP(0));
515
516    /* 7 */
517    shader[i++] = 0x00000000;
518    shader[i++] = 0x00000000;
519
520    /* 8/9 TEX INST 0 */
521    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
522			     INST_MOD(0),
523			     FETCH_WHOLE_QUAD(0),
524			     RESOURCE_ID(0),
525			     SRC_GPR(0),
526			     SRC_REL(ABSOLUTE),
527			     ALT_CONST(0),
528			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
529			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
530    shader[i++] = TEX_DWORD1(DST_GPR(0),
531			     DST_REL(ABSOLUTE),
532			     DST_SEL_X(SQ_SEL_X), /* R */
533			     DST_SEL_Y(SQ_SEL_Y), /* G */
534			     DST_SEL_Z(SQ_SEL_Z), /* B */
535			     DST_SEL_W(SQ_SEL_W), /* A */
536			     LOD_BIAS(0),
537			     COORD_TYPE_X(TEX_UNNORMALIZED),
538			     COORD_TYPE_Y(TEX_UNNORMALIZED),
539			     COORD_TYPE_Z(TEX_UNNORMALIZED),
540			     COORD_TYPE_W(TEX_UNNORMALIZED));
541    shader[i++] = TEX_DWORD2(OFFSET_X(0),
542			     OFFSET_Y(0),
543			     OFFSET_Z(0),
544			     SAMPLER_ID(0),
545			     SRC_SEL_X(SQ_SEL_X),
546			     SRC_SEL_Y(SQ_SEL_Y),
547			     SRC_SEL_Z(SQ_SEL_0),
548			     SRC_SEL_W(SQ_SEL_1));
549    shader[i++] = TEX_DWORD_PAD;
550
551    return i;
552}
553
554int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
555{
556    int i = 0;
557
558    /* 0 */
559    shader[i++] = CF_DWORD0(ADDR(6),
560			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
561    shader[i++] = CF_DWORD1(POP_COUNT(0),
562                            CF_CONST(0),
563                            COND(SQ_CF_COND_ACTIVE),
564                            I_COUNT(2),
565                            VALID_PIXEL_MODE(0),
566                            END_OF_PROGRAM(0),
567                            CF_INST(SQ_CF_INST_VC),
568                            WHOLE_QUAD_MODE(0),
569                            BARRIER(1));
570
571    /* 1 - ALU */
572    shader[i++] = CF_ALU_DWORD0(ADDR(4),
573				KCACHE_BANK0(0),
574				KCACHE_BANK1(0),
575				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
576    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
577				KCACHE_ADDR0(0),
578				KCACHE_ADDR1(0),
579				I_COUNT(2),
580				ALT_CONST(0),
581				CF_INST(SQ_CF_INST_ALU),
582				WHOLE_QUAD_MODE(0),
583				BARRIER(1));
584
585    /* 2 */
586    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
587                                          TYPE(SQ_EXPORT_POS),
588                                          RW_GPR(1),
589                                          RW_REL(ABSOLUTE),
590                                          INDEX_GPR(0),
591                                          ELEM_SIZE(3));
592    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
593                                               SRC_SEL_Y(SQ_SEL_Y),
594                                               SRC_SEL_Z(SQ_SEL_Z),
595                                               SRC_SEL_W(SQ_SEL_W),
596                                               BURST_COUNT(1),
597                                               VALID_PIXEL_MODE(0),
598                                               END_OF_PROGRAM(0),
599                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
600                                               MARK(0),
601                                               BARRIER(1));
602    /* 3 */
603    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
604                                          TYPE(SQ_EXPORT_PARAM),
605                                          RW_GPR(0),
606                                          RW_REL(ABSOLUTE),
607                                          INDEX_GPR(0),
608                                          ELEM_SIZE(3));
609    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
610                                               SRC_SEL_Y(SQ_SEL_Y),
611                                               SRC_SEL_Z(SQ_SEL_Z),
612                                               SRC_SEL_W(SQ_SEL_W),
613                                               BURST_COUNT(1),
614                                               VALID_PIXEL_MODE(0),
615                                               END_OF_PROGRAM(1),
616                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
617                                               MARK(0),
618                                               BARRIER(0));
619
620
621    /* 4 texX / w */
622    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
623                             SRC0_REL(ABSOLUTE),
624                             SRC0_ELEM(ELEM_X),
625                             SRC0_NEG(0),
626                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
627                             SRC1_REL(ABSOLUTE),
628                             SRC1_ELEM(ELEM_X),
629                             SRC1_NEG(0),
630                             INDEX_MODE(SQ_INDEX_AR_X),
631                             PRED_SEL(SQ_PRED_SEL_OFF),
632                             LAST(0));
633    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
634                                 SRC1_ABS(0),
635                                 UPDATE_EXECUTE_MASK(0),
636                                 UPDATE_PRED(0),
637                                 WRITE_MASK(1),
638                                 OMOD(SQ_ALU_OMOD_OFF),
639                                 ALU_INST(SQ_OP2_INST_MUL),
640                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
641                                 DST_GPR(0),
642                                 DST_REL(ABSOLUTE),
643                                 DST_ELEM(ELEM_X),
644                                 CLAMP(0));
645
646    /* 5 texY / h */
647    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
648                             SRC0_REL(ABSOLUTE),
649                             SRC0_ELEM(ELEM_Y),
650                             SRC0_NEG(0),
651                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
652                             SRC1_REL(ABSOLUTE),
653                             SRC1_ELEM(ELEM_Y),
654                             SRC1_NEG(0),
655                             INDEX_MODE(SQ_INDEX_AR_X),
656                             PRED_SEL(SQ_PRED_SEL_OFF),
657                             LAST(1));
658    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
659                                 SRC1_ABS(0),
660                                 UPDATE_EXECUTE_MASK(0),
661                                 UPDATE_PRED(0),
662                                 WRITE_MASK(1),
663                                 OMOD(SQ_ALU_OMOD_OFF),
664                                 ALU_INST(SQ_OP2_INST_MUL),
665                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
666                                 DST_GPR(0),
667                                 DST_REL(ABSOLUTE),
668                                 DST_ELEM(ELEM_Y),
669                                 CLAMP(0));
670
671    /* 6/7 */
672    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
673                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
674                             FETCH_WHOLE_QUAD(0),
675                             BUFFER_ID(0),
676                             SRC_GPR(0),
677                             SRC_REL(ABSOLUTE),
678                             SRC_SEL_X(SQ_SEL_X),
679                             MEGA_FETCH_COUNT(16));
680    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
681                                 DST_REL(ABSOLUTE),
682                                 DST_SEL_X(SQ_SEL_X),
683                                 DST_SEL_Y(SQ_SEL_Y),
684                                 DST_SEL_Z(SQ_SEL_0),
685                                 DST_SEL_W(SQ_SEL_1),
686                                 USE_CONST_FIELDS(0),
687                                 DATA_FORMAT(FMT_32_32_FLOAT),
688                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
689                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
690                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
691    shader[i++] = VTX_DWORD2(OFFSET(0),
692                             ENDIAN_SWAP(ENDIAN_NONE),
693                             CONST_BUF_NO_STRIDE(0),
694                             MEGA_FETCH(1),
695			     ALT_CONST(0),
696			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
697    shader[i++] = VTX_DWORD_PAD;
698    /* 8/9 */
699    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
700                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
701                             FETCH_WHOLE_QUAD(0),
702                             BUFFER_ID(0),
703                             SRC_GPR(0),
704                             SRC_REL(ABSOLUTE),
705                             SRC_SEL_X(SQ_SEL_X),
706                             MEGA_FETCH_COUNT(8));
707    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
708                                 DST_REL(ABSOLUTE),
709                                 DST_SEL_X(SQ_SEL_X),
710                                 DST_SEL_Y(SQ_SEL_Y),
711                                 DST_SEL_Z(SQ_SEL_0),
712                                 DST_SEL_W(SQ_SEL_1),
713                                 USE_CONST_FIELDS(0),
714                                 DATA_FORMAT(FMT_32_32_FLOAT),
715                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
716                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
717                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
718    shader[i++] = VTX_DWORD2(OFFSET(8),
719                             ENDIAN_SWAP(ENDIAN_NONE),
720                             CONST_BUF_NO_STRIDE(0),
721                             MEGA_FETCH(0),
722			     ALT_CONST(0),
723			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
724    shader[i++] = VTX_DWORD_PAD;
725
726    return i;
727}
728
729int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
730{
731    int i = 0;
732
733    /* 0 */
734    shader[i++] = CF_ALU_DWORD0(ADDR(5),
735				KCACHE_BANK0(0),
736				KCACHE_BANK1(0),
737				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
738    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
739				KCACHE_ADDR0(0),
740				KCACHE_ADDR1(0),
741				I_COUNT(4),
742				ALT_CONST(0),
743				CF_INST(SQ_CF_INST_ALU),
744				WHOLE_QUAD_MODE(0),
745				BARRIER(1));
746    /* 1 */
747    shader[i++] = CF_DWORD0(ADDR(21),
748			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
749    shader[i++] = CF_DWORD1(POP_COUNT(0),
750                            CF_CONST(0),
751                            COND(SQ_CF_COND_BOOL),
752                            I_COUNT(0),
753                            VALID_PIXEL_MODE(0),
754                            END_OF_PROGRAM(0),
755                            CF_INST(SQ_CF_INST_CALL),
756                            WHOLE_QUAD_MODE(0),
757                            BARRIER(0));
758    /* 2 */
759    shader[i++] = CF_DWORD0(ADDR(30),
760			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
761    shader[i++] = CF_DWORD1(POP_COUNT(0),
762                            CF_CONST(0),
763                            COND(SQ_CF_COND_NOT_BOOL),
764                            I_COUNT(0),
765                            VALID_PIXEL_MODE(0),
766                            END_OF_PROGRAM(0),
767                            CF_INST(SQ_CF_INST_CALL),
768                            WHOLE_QUAD_MODE(0),
769                            BARRIER(0));
770    /* 3 */
771    shader[i++] = CF_ALU_DWORD0(ADDR(9),
772                                KCACHE_BANK0(0),
773                                KCACHE_BANK1(0),
774                                KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
775    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
776                                KCACHE_ADDR0(0),
777                                KCACHE_ADDR1(0),
778                                I_COUNT(12),
779                                ALT_CONST(0),
780                                CF_INST(SQ_CF_INST_ALU),
781                                WHOLE_QUAD_MODE(0),
782                                BARRIER(1));
783    /* 4 */
784    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
785                                          TYPE(SQ_EXPORT_PIXEL),
786                                          RW_GPR(2),
787                                          RW_REL(ABSOLUTE),
788                                          INDEX_GPR(0),
789                                          ELEM_SIZE(3));
790    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
791                                               SRC_SEL_Y(SQ_SEL_Y),
792                                               SRC_SEL_Z(SQ_SEL_Z),
793                                               SRC_SEL_W(SQ_SEL_W),
794                                               BURST_COUNT(1),
795                                               VALID_PIXEL_MODE(0),
796                                               END_OF_PROGRAM(1),
797                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
798                                               MARK(0),
799                                               BARRIER(1));
800    /* 5 interpolate tex coords */
801    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
802			     SRC0_REL(ABSOLUTE),
803			     SRC0_ELEM(ELEM_Y),
804			     SRC0_NEG(0),
805			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
806			     SRC1_REL(ABSOLUTE),
807			     SRC1_ELEM(ELEM_X),
808			     SRC1_NEG(0),
809			     INDEX_MODE(SQ_INDEX_AR_X),
810			     PRED_SEL(SQ_PRED_SEL_OFF),
811			     LAST(0));
812    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
813				 SRC1_ABS(0),
814				 UPDATE_EXECUTE_MASK(0),
815				 UPDATE_PRED(0),
816				 WRITE_MASK(1),
817				 OMOD(SQ_ALU_OMOD_OFF),
818				 ALU_INST(SQ_OP2_INST_INTERP_XY),
819				 BANK_SWIZZLE(SQ_ALU_VEC_210),
820				 DST_GPR(0),
821				 DST_REL(ABSOLUTE),
822				 DST_ELEM(ELEM_X),
823				 CLAMP(0));
824    /* 6 */
825    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
826			     SRC0_REL(ABSOLUTE),
827			     SRC0_ELEM(ELEM_X),
828			     SRC0_NEG(0),
829			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
830			     SRC1_REL(ABSOLUTE),
831			     SRC1_ELEM(ELEM_X),
832			     SRC1_NEG(0),
833			     INDEX_MODE(SQ_INDEX_AR_X),
834			     PRED_SEL(SQ_PRED_SEL_OFF),
835			     LAST(0));
836    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
837				 SRC1_ABS(0),
838				 UPDATE_EXECUTE_MASK(0),
839				 UPDATE_PRED(0),
840				 WRITE_MASK(1),
841				 OMOD(SQ_ALU_OMOD_OFF),
842				 ALU_INST(SQ_OP2_INST_INTERP_XY),
843				 BANK_SWIZZLE(SQ_ALU_VEC_210),
844				 DST_GPR(0),
845				 DST_REL(ABSOLUTE),
846				 DST_ELEM(ELEM_Y),
847				 CLAMP(0));
848    /* 7 */
849    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
850			     SRC0_REL(ABSOLUTE),
851			     SRC0_ELEM(ELEM_Y),
852			     SRC0_NEG(0),
853			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
854			     SRC1_REL(ABSOLUTE),
855			     SRC1_ELEM(ELEM_X),
856			     SRC1_NEG(0),
857			     INDEX_MODE(SQ_INDEX_AR_X),
858			     PRED_SEL(SQ_PRED_SEL_OFF),
859			     LAST(0));
860    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
861				 SRC1_ABS(0),
862				 UPDATE_EXECUTE_MASK(0),
863				 UPDATE_PRED(0),
864				 WRITE_MASK(0),
865				 OMOD(SQ_ALU_OMOD_OFF),
866				 ALU_INST(SQ_OP2_INST_INTERP_XY),
867				 BANK_SWIZZLE(SQ_ALU_VEC_210),
868				 DST_GPR(0),
869				 DST_REL(ABSOLUTE),
870				 DST_ELEM(ELEM_Z),
871				 CLAMP(0));
872    /* 8 */
873    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
874			     SRC0_REL(ABSOLUTE),
875			     SRC0_ELEM(ELEM_X),
876			     SRC0_NEG(0),
877			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
878			     SRC1_REL(ABSOLUTE),
879			     SRC1_ELEM(ELEM_X),
880			     SRC1_NEG(0),
881			     INDEX_MODE(SQ_INDEX_AR_X),
882			     PRED_SEL(SQ_PRED_SEL_OFF),
883			     LAST(1));
884    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
885				 SRC1_ABS(0),
886				 UPDATE_EXECUTE_MASK(0),
887				 UPDATE_PRED(0),
888				 WRITE_MASK(0),
889				 OMOD(SQ_ALU_OMOD_OFF),
890				 ALU_INST(SQ_OP2_INST_INTERP_XY),
891				 BANK_SWIZZLE(SQ_ALU_VEC_210),
892				 DST_GPR(0),
893				 DST_REL(ABSOLUTE),
894				 DST_ELEM(ELEM_W),
895				 CLAMP(0));
896
897    /* 9,10,11,12 */
898    /* r2.x = MAD(c0.w, r1.x, c0.x) */
899    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
900                             SRC0_REL(ABSOLUTE),
901                             SRC0_ELEM(ELEM_W),
902                             SRC0_NEG(0),
903                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
904                             SRC1_REL(ABSOLUTE),
905                             SRC1_ELEM(ELEM_X),
906                             SRC1_NEG(0),
907                             INDEX_MODE(SQ_INDEX_LOOP),
908                             PRED_SEL(SQ_PRED_SEL_OFF),
909                             LAST(0));
910    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
911                                 SRC2_REL(ABSOLUTE),
912                                 SRC2_ELEM(ELEM_X),
913                                 SRC2_NEG(0),
914                                 ALU_INST(SQ_OP3_INST_MULADD),
915                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
916                                 DST_GPR(2),
917                                 DST_REL(ABSOLUTE),
918                                 DST_ELEM(ELEM_X),
919                                 CLAMP(0));
920    /* r2.y = MAD(c0.w, r1.x, c0.y) */
921    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
922                             SRC0_REL(ABSOLUTE),
923                             SRC0_ELEM(ELEM_W),
924                             SRC0_NEG(0),
925                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
926                             SRC1_REL(ABSOLUTE),
927                             SRC1_ELEM(ELEM_X),
928                             SRC1_NEG(0),
929                             INDEX_MODE(SQ_INDEX_LOOP),
930                             PRED_SEL(SQ_PRED_SEL_OFF),
931                             LAST(0));
932    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
933                                 SRC2_REL(ABSOLUTE),
934                                 SRC2_ELEM(ELEM_Y),
935                                 SRC2_NEG(0),
936                                 ALU_INST(SQ_OP3_INST_MULADD),
937                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
938                                 DST_GPR(2),
939                                 DST_REL(ABSOLUTE),
940                                 DST_ELEM(ELEM_Y),
941                                 CLAMP(0));
942    /* r2.z = MAD(c0.w, r1.x, c0.z) */
943    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
944                             SRC0_REL(ABSOLUTE),
945                             SRC0_ELEM(ELEM_W),
946                             SRC0_NEG(0),
947                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
948                             SRC1_REL(ABSOLUTE),
949                             SRC1_ELEM(ELEM_X),
950                             SRC1_NEG(0),
951                             INDEX_MODE(SQ_INDEX_LOOP),
952                             PRED_SEL(SQ_PRED_SEL_OFF),
953                             LAST(0));
954    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
955                                 SRC2_REL(ABSOLUTE),
956                                 SRC2_ELEM(ELEM_Z),
957                                 SRC2_NEG(0),
958                                 ALU_INST(SQ_OP3_INST_MULADD),
959                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
960                                 DST_GPR(2),
961                                 DST_REL(ABSOLUTE),
962                                 DST_ELEM(ELEM_Z),
963                                 CLAMP(0));
964    /* r2.w = MAD(0, 0, 1) */
965    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
966                             SRC0_REL(ABSOLUTE),
967                             SRC0_ELEM(ELEM_X),
968                             SRC0_NEG(0),
969                             SRC1_SEL(SQ_ALU_SRC_0),
970                             SRC1_REL(ABSOLUTE),
971                             SRC1_ELEM(ELEM_X),
972                             SRC1_NEG(0),
973                             INDEX_MODE(SQ_INDEX_LOOP),
974                             PRED_SEL(SQ_PRED_SEL_OFF),
975                             LAST(1));
976    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
977                                 SRC2_REL(ABSOLUTE),
978                                 SRC2_ELEM(ELEM_X),
979                                 SRC2_NEG(0),
980                                 ALU_INST(SQ_OP3_INST_MULADD),
981                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
982                                 DST_GPR(2),
983                                 DST_REL(ABSOLUTE),
984                                 DST_ELEM(ELEM_W),
985                                 CLAMP(0));
986
987    /* 13,14,15,16 */
988    /* r2.x = MAD(c1.x, r1.y, pv.x) */
989    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
990                             SRC0_REL(ABSOLUTE),
991                             SRC0_ELEM(ELEM_X),
992                             SRC0_NEG(0),
993                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
994                             SRC1_REL(ABSOLUTE),
995                             SRC1_ELEM(ELEM_Y),
996                             SRC1_NEG(0),
997                             INDEX_MODE(SQ_INDEX_LOOP),
998                             PRED_SEL(SQ_PRED_SEL_OFF),
999                             LAST(0));
1000    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1001                                 SRC2_REL(ABSOLUTE),
1002                                 SRC2_ELEM(ELEM_X),
1003                                 SRC2_NEG(0),
1004                                 ALU_INST(SQ_OP3_INST_MULADD),
1005                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1006                                 DST_GPR(2),
1007                                 DST_REL(ABSOLUTE),
1008                                 DST_ELEM(ELEM_X),
1009                                 CLAMP(0));
1010    /* r2.y = MAD(c1.y, r1.y, pv.y) */
1011    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1012                             SRC0_REL(ABSOLUTE),
1013                             SRC0_ELEM(ELEM_Y),
1014                             SRC0_NEG(0),
1015                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1016                             SRC1_REL(ABSOLUTE),
1017                             SRC1_ELEM(ELEM_Y),
1018                             SRC1_NEG(0),
1019                             INDEX_MODE(SQ_INDEX_LOOP),
1020                             PRED_SEL(SQ_PRED_SEL_OFF),
1021                             LAST(0));
1022    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1023                                 SRC2_REL(ABSOLUTE),
1024                                 SRC2_ELEM(ELEM_Y),
1025                                 SRC2_NEG(0),
1026                                 ALU_INST(SQ_OP3_INST_MULADD),
1027                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1028                                 DST_GPR(2),
1029                                 DST_REL(ABSOLUTE),
1030                                 DST_ELEM(ELEM_Y),
1031                                 CLAMP(0));
1032    /* r2.z = MAD(c1.z, r1.y, pv.z) */
1033    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1034                             SRC0_REL(ABSOLUTE),
1035                             SRC0_ELEM(ELEM_Z),
1036                             SRC0_NEG(0),
1037                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1038                             SRC1_REL(ABSOLUTE),
1039                             SRC1_ELEM(ELEM_Y),
1040                             SRC1_NEG(0),
1041                             INDEX_MODE(SQ_INDEX_LOOP),
1042                             PRED_SEL(SQ_PRED_SEL_OFF),
1043                             LAST(0));
1044    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1045                                 SRC2_REL(ABSOLUTE),
1046                                 SRC2_ELEM(ELEM_Z),
1047                                 SRC2_NEG(0),
1048                                 ALU_INST(SQ_OP3_INST_MULADD),
1049                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1050                                 DST_GPR(2),
1051                                 DST_REL(ABSOLUTE),
1052                                 DST_ELEM(ELEM_Z),
1053                                 CLAMP(0));
1054    /* r2.w = MAD(0, 0, 1) */
1055    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1056                             SRC0_REL(ABSOLUTE),
1057                             SRC0_ELEM(ELEM_X),
1058                             SRC0_NEG(0),
1059                             SRC1_SEL(SQ_ALU_SRC_0),
1060                             SRC1_REL(ABSOLUTE),
1061                             SRC1_ELEM(ELEM_X),
1062                             SRC1_NEG(0),
1063                             INDEX_MODE(SQ_INDEX_LOOP),
1064                             PRED_SEL(SQ_PRED_SEL_OFF),
1065                             LAST(1));
1066    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1067                                 SRC2_REL(ABSOLUTE),
1068                                 SRC2_ELEM(ELEM_W),
1069                                 SRC2_NEG(0),
1070                                 ALU_INST(SQ_OP3_INST_MULADD),
1071                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1072                                 DST_GPR(2),
1073                                 DST_REL(ABSOLUTE),
1074                                 DST_ELEM(ELEM_W),
1075                                 CLAMP(0));
1076    /* 17,18,19,20 */
1077    /* r2.x = MAD(c2.x, r1.z, pv.x) */
1078    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1079                             SRC0_REL(ABSOLUTE),
1080                             SRC0_ELEM(ELEM_X),
1081                             SRC0_NEG(0),
1082                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1083                             SRC1_REL(ABSOLUTE),
1084                             SRC1_ELEM(ELEM_Z),
1085                             SRC1_NEG(0),
1086                             INDEX_MODE(SQ_INDEX_LOOP),
1087                             PRED_SEL(SQ_PRED_SEL_OFF),
1088                             LAST(0));
1089    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1090                                 SRC2_REL(ABSOLUTE),
1091                                 SRC2_ELEM(ELEM_X),
1092                                 SRC2_NEG(0),
1093                                 ALU_INST(SQ_OP3_INST_MULADD),
1094                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1095                                 DST_GPR(2),
1096                                 DST_REL(ABSOLUTE),
1097                                 DST_ELEM(ELEM_X),
1098                                 CLAMP(1));
1099    /* r2.y = MAD(c2.y, r1.z, pv.y) */
1100    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1101                             SRC0_REL(ABSOLUTE),
1102                             SRC0_ELEM(ELEM_Y),
1103                             SRC0_NEG(0),
1104                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1105                             SRC1_REL(ABSOLUTE),
1106                             SRC1_ELEM(ELEM_Z),
1107                             SRC1_NEG(0),
1108                             INDEX_MODE(SQ_INDEX_LOOP),
1109                             PRED_SEL(SQ_PRED_SEL_OFF),
1110                             LAST(0));
1111    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1112                                 SRC2_REL(ABSOLUTE),
1113                                 SRC2_ELEM(ELEM_Y),
1114                                 SRC2_NEG(0),
1115                                 ALU_INST(SQ_OP3_INST_MULADD),
1116                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1117                                 DST_GPR(2),
1118                                 DST_REL(ABSOLUTE),
1119                                 DST_ELEM(ELEM_Y),
1120                                 CLAMP(1));
1121    /* r2.z = MAD(c2.z, r1.z, pv.z) */
1122    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1123                             SRC0_REL(ABSOLUTE),
1124                             SRC0_ELEM(ELEM_Z),
1125                             SRC0_NEG(0),
1126                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1127                             SRC1_REL(ABSOLUTE),
1128                             SRC1_ELEM(ELEM_Z),
1129                             SRC1_NEG(0),
1130                             INDEX_MODE(SQ_INDEX_LOOP),
1131                             PRED_SEL(SQ_PRED_SEL_OFF),
1132                             LAST(0));
1133    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1134                                 SRC2_REL(ABSOLUTE),
1135                                 SRC2_ELEM(ELEM_Z),
1136                                 SRC2_NEG(0),
1137                                 ALU_INST(SQ_OP3_INST_MULADD),
1138                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1139                                 DST_GPR(2),
1140                                 DST_REL(ABSOLUTE),
1141                                 DST_ELEM(ELEM_Z),
1142                                 CLAMP(1));
1143    /* r2.w = MAD(0, 0, 1) */
1144    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1145                             SRC0_REL(ABSOLUTE),
1146                             SRC0_ELEM(ELEM_X),
1147                             SRC0_NEG(0),
1148                             SRC1_SEL(SQ_ALU_SRC_0),
1149                             SRC1_REL(ABSOLUTE),
1150                             SRC1_ELEM(ELEM_X),
1151                             SRC1_NEG(0),
1152                             INDEX_MODE(SQ_INDEX_LOOP),
1153                             PRED_SEL(SQ_PRED_SEL_OFF),
1154                             LAST(1));
1155    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1156                                 SRC2_REL(ABSOLUTE),
1157                                 SRC2_ELEM(ELEM_X),
1158                                 SRC2_NEG(0),
1159                                 ALU_INST(SQ_OP3_INST_MULADD),
1160                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1161                                 DST_GPR(2),
1162                                 DST_REL(ABSOLUTE),
1163                                 DST_ELEM(ELEM_W),
1164                                 CLAMP(1));
1165
1166    /* 21 */
1167    shader[i++] = CF_DWORD0(ADDR(24),
1168			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1169    shader[i++] = CF_DWORD1(POP_COUNT(0),
1170                            CF_CONST(0),
1171                            COND(SQ_CF_COND_ACTIVE),
1172                            I_COUNT(3),
1173                            VALID_PIXEL_MODE(0),
1174                            END_OF_PROGRAM(0),
1175                            CF_INST(SQ_CF_INST_TC),
1176                            WHOLE_QUAD_MODE(0),
1177                            BARRIER(1));
1178    /* 22 */
1179    shader[i++] = CF_DWORD0(ADDR(0),
1180			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1181    shader[i++] = CF_DWORD1(POP_COUNT(0),
1182			    CF_CONST(0),
1183			    COND(SQ_CF_COND_ACTIVE),
1184			    I_COUNT(0),
1185			    VALID_PIXEL_MODE(0),
1186			    END_OF_PROGRAM(0),
1187			    CF_INST(SQ_CF_INST_RETURN),
1188			    WHOLE_QUAD_MODE(0),
1189			    BARRIER(1));
1190    /* 23 */
1191    shader[i++] = 0x00000000;
1192    shader[i++] = 0x00000000;
1193    /* 24/25 */
1194    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1195                             INST_MOD(0),
1196                             FETCH_WHOLE_QUAD(0),
1197                             RESOURCE_ID(0),
1198                             SRC_GPR(0),
1199                             SRC_REL(ABSOLUTE),
1200                             ALT_CONST(0),
1201			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1202			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1203    shader[i++] = TEX_DWORD1(DST_GPR(1),
1204                             DST_REL(ABSOLUTE),
1205                             DST_SEL_X(SQ_SEL_X),
1206                             DST_SEL_Y(SQ_SEL_MASK),
1207                             DST_SEL_Z(SQ_SEL_MASK),
1208                             DST_SEL_W(SQ_SEL_1),
1209                             LOD_BIAS(0),
1210                             COORD_TYPE_X(TEX_NORMALIZED),
1211                             COORD_TYPE_Y(TEX_NORMALIZED),
1212                             COORD_TYPE_Z(TEX_NORMALIZED),
1213                             COORD_TYPE_W(TEX_NORMALIZED));
1214    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1215                             OFFSET_Y(0),
1216                             OFFSET_Z(0),
1217                             SAMPLER_ID(0),
1218                             SRC_SEL_X(SQ_SEL_X),
1219                             SRC_SEL_Y(SQ_SEL_Y),
1220                             SRC_SEL_Z(SQ_SEL_0),
1221                             SRC_SEL_W(SQ_SEL_1));
1222    shader[i++] = TEX_DWORD_PAD;
1223    /* 26/27 */
1224    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1225                             INST_MOD(0),
1226                             FETCH_WHOLE_QUAD(0),
1227                             RESOURCE_ID(1),
1228                             SRC_GPR(0),
1229                             SRC_REL(ABSOLUTE),
1230                             ALT_CONST(0),
1231			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1232			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1233    shader[i++] = TEX_DWORD1(DST_GPR(1),
1234                             DST_REL(ABSOLUTE),
1235                             DST_SEL_X(SQ_SEL_MASK),
1236                             DST_SEL_Y(SQ_SEL_MASK),
1237                             DST_SEL_Z(SQ_SEL_X),
1238                             DST_SEL_W(SQ_SEL_MASK),
1239                             LOD_BIAS(0),
1240                             COORD_TYPE_X(TEX_NORMALIZED),
1241                             COORD_TYPE_Y(TEX_NORMALIZED),
1242                             COORD_TYPE_Z(TEX_NORMALIZED),
1243                             COORD_TYPE_W(TEX_NORMALIZED));
1244    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1245                             OFFSET_Y(0),
1246                             OFFSET_Z(0),
1247                             SAMPLER_ID(1),
1248                             SRC_SEL_X(SQ_SEL_X),
1249                             SRC_SEL_Y(SQ_SEL_Y),
1250                             SRC_SEL_Z(SQ_SEL_0),
1251                             SRC_SEL_W(SQ_SEL_1));
1252    shader[i++] = TEX_DWORD_PAD;
1253    /* 28/29 */
1254    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1255                             INST_MOD(0),
1256                             FETCH_WHOLE_QUAD(0),
1257                             RESOURCE_ID(2),
1258                             SRC_GPR(0),
1259                             SRC_REL(ABSOLUTE),
1260                             ALT_CONST(0),
1261			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1262			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1263    shader[i++] = TEX_DWORD1(DST_GPR(1),
1264                             DST_REL(ABSOLUTE),
1265                             DST_SEL_X(SQ_SEL_MASK),
1266                             DST_SEL_Y(SQ_SEL_X),
1267                             DST_SEL_Z(SQ_SEL_MASK),
1268                             DST_SEL_W(SQ_SEL_MASK),
1269                             LOD_BIAS(0),
1270                             COORD_TYPE_X(TEX_NORMALIZED),
1271                             COORD_TYPE_Y(TEX_NORMALIZED),
1272                             COORD_TYPE_Z(TEX_NORMALIZED),
1273                             COORD_TYPE_W(TEX_NORMALIZED));
1274    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1275                             OFFSET_Y(0),
1276                             OFFSET_Z(0),
1277                             SAMPLER_ID(2),
1278                             SRC_SEL_X(SQ_SEL_X),
1279                             SRC_SEL_Y(SQ_SEL_Y),
1280                             SRC_SEL_Z(SQ_SEL_0),
1281                             SRC_SEL_W(SQ_SEL_1));
1282    shader[i++] = TEX_DWORD_PAD;
1283    /* 30 */
1284    shader[i++] = CF_DWORD0(ADDR(32),
1285			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1286    shader[i++] = CF_DWORD1(POP_COUNT(0),
1287                            CF_CONST(0),
1288                            COND(SQ_CF_COND_ACTIVE),
1289                            I_COUNT(2),
1290                            VALID_PIXEL_MODE(0),
1291                            END_OF_PROGRAM(0),
1292                            CF_INST(SQ_CF_INST_TC),
1293                            WHOLE_QUAD_MODE(0),
1294                            BARRIER(1));
1295    /* 31 */
1296    shader[i++] = CF_DWORD0(ADDR(0),
1297			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1298    shader[i++] = CF_DWORD1(POP_COUNT(0),
1299			    CF_CONST(0),
1300			    COND(SQ_CF_COND_ACTIVE),
1301			    I_COUNT(0),
1302			    VALID_PIXEL_MODE(0),
1303			    END_OF_PROGRAM(0),
1304			    CF_INST(SQ_CF_INST_RETURN),
1305			    WHOLE_QUAD_MODE(0),
1306			    BARRIER(1));
1307    /* 32/33 */
1308    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1309                             INST_MOD(0),
1310                             FETCH_WHOLE_QUAD(0),
1311                             RESOURCE_ID(0),
1312                             SRC_GPR(0),
1313                             SRC_REL(ABSOLUTE),
1314                             ALT_CONST(0),
1315                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1316                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1317    shader[i++] = TEX_DWORD1(DST_GPR(1),
1318                             DST_REL(ABSOLUTE),
1319                             DST_SEL_X(SQ_SEL_X),
1320                             DST_SEL_Y(SQ_SEL_MASK),
1321                             DST_SEL_Z(SQ_SEL_MASK),
1322                             DST_SEL_W(SQ_SEL_1),
1323                             LOD_BIAS(0),
1324                             COORD_TYPE_X(TEX_NORMALIZED),
1325                             COORD_TYPE_Y(TEX_NORMALIZED),
1326                             COORD_TYPE_Z(TEX_NORMALIZED),
1327                             COORD_TYPE_W(TEX_NORMALIZED));
1328    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1329                             OFFSET_Y(0),
1330                             OFFSET_Z(0),
1331                             SAMPLER_ID(0),
1332                             SRC_SEL_X(SQ_SEL_X),
1333                             SRC_SEL_Y(SQ_SEL_Y),
1334                             SRC_SEL_Z(SQ_SEL_0),
1335                             SRC_SEL_W(SQ_SEL_1));
1336    shader[i++] = TEX_DWORD_PAD;
1337    /* 34/35 */
1338    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1339                             INST_MOD(0),
1340                             FETCH_WHOLE_QUAD(0),
1341                             RESOURCE_ID(1),
1342                             SRC_GPR(0),
1343                             SRC_REL(ABSOLUTE),
1344                             ALT_CONST(0),
1345                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1346                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1347    shader[i++] = TEX_DWORD1(DST_GPR(1),
1348                             DST_REL(ABSOLUTE),
1349                             DST_SEL_X(SQ_SEL_MASK),
1350                             DST_SEL_Y(SQ_SEL_X),
1351                             DST_SEL_Z(SQ_SEL_Y),
1352                             DST_SEL_W(SQ_SEL_MASK),
1353                             LOD_BIAS(0),
1354                             COORD_TYPE_X(TEX_NORMALIZED),
1355                             COORD_TYPE_Y(TEX_NORMALIZED),
1356                             COORD_TYPE_Z(TEX_NORMALIZED),
1357                             COORD_TYPE_W(TEX_NORMALIZED));
1358    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1359                             OFFSET_Y(0),
1360                             OFFSET_Z(0),
1361                             SAMPLER_ID(1),
1362                             SRC_SEL_X(SQ_SEL_X),
1363                             SRC_SEL_Y(SQ_SEL_Y),
1364                             SRC_SEL_Z(SQ_SEL_0),
1365                             SRC_SEL_W(SQ_SEL_1));
1366    shader[i++] = TEX_DWORD_PAD;
1367
1368    return i;
1369}
1370
1371/* comp vs --------------------------------------- */
1372int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1373{
1374    int i = 0;
1375
1376    /* 0 */
1377    shader[i++] = CF_DWORD0(ADDR(3),
1378			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1379    shader[i++] = CF_DWORD1(POP_COUNT(0),
1380                            CF_CONST(0),
1381                            COND(SQ_CF_COND_BOOL),
1382                            I_COUNT(0),
1383                            VALID_PIXEL_MODE(0),
1384                            END_OF_PROGRAM(0),
1385                            CF_INST(SQ_CF_INST_CALL),
1386                            WHOLE_QUAD_MODE(0),
1387                            BARRIER(0));
1388    /* 1 */
1389    shader[i++] = CF_DWORD0(ADDR(9),
1390			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1391    shader[i++] = CF_DWORD1(POP_COUNT(0),
1392                            CF_CONST(0),
1393                            COND(SQ_CF_COND_NOT_BOOL),
1394                            I_COUNT(0),
1395                            VALID_PIXEL_MODE(0),
1396                            END_OF_PROGRAM(0),
1397                            CF_INST(SQ_CF_INST_CALL),
1398                            WHOLE_QUAD_MODE(0),
1399                            BARRIER(0));
1400    /* 2 */
1401    shader[i++] = CF_DWORD0(ADDR(0),
1402                            JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1403    shader[i++] = CF_DWORD1(POP_COUNT(0),
1404                            CF_CONST(0),
1405                            COND(SQ_CF_COND_ACTIVE),
1406                            I_COUNT(0),
1407                            VALID_PIXEL_MODE(0),
1408                            END_OF_PROGRAM(1),
1409                            CF_INST(SQ_CF_INST_NOP),
1410                            WHOLE_QUAD_MODE(0),
1411                            BARRIER(1));
1412    /* 3 - mask sub */
1413    shader[i++] = CF_DWORD0(ADDR(44),
1414			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1415    shader[i++] = CF_DWORD1(POP_COUNT(0),
1416			    CF_CONST(0),
1417			    COND(SQ_CF_COND_ACTIVE),
1418			    I_COUNT(3),
1419			    VALID_PIXEL_MODE(0),
1420			    END_OF_PROGRAM(0),
1421			    CF_INST(SQ_CF_INST_VC),
1422			    WHOLE_QUAD_MODE(0),
1423			    BARRIER(1));
1424
1425    /* 4 - ALU */
1426    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1427				KCACHE_BANK0(0),
1428				KCACHE_BANK1(0),
1429				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1430    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1431				KCACHE_ADDR0(0),
1432				KCACHE_ADDR1(0),
1433				I_COUNT(20),
1434				ALT_CONST(0),
1435				CF_INST(SQ_CF_INST_ALU),
1436				WHOLE_QUAD_MODE(0),
1437				BARRIER(1));
1438
1439    /* 5 - dst */
1440    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1441					  TYPE(SQ_EXPORT_POS),
1442					  RW_GPR(2),
1443					  RW_REL(ABSOLUTE),
1444					  INDEX_GPR(0),
1445					  ELEM_SIZE(0));
1446    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1447					       SRC_SEL_Y(SQ_SEL_Y),
1448					       SRC_SEL_Z(SQ_SEL_0),
1449					       SRC_SEL_W(SQ_SEL_1),
1450					       BURST_COUNT(1),
1451					       VALID_PIXEL_MODE(0),
1452					       END_OF_PROGRAM(0),
1453					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1454					       MARK(0),
1455					       BARRIER(1));
1456    /* 6 - src */
1457    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1458					  TYPE(SQ_EXPORT_PARAM),
1459					  RW_GPR(1),
1460					  RW_REL(ABSOLUTE),
1461					  INDEX_GPR(0),
1462					  ELEM_SIZE(0));
1463    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1464					       SRC_SEL_Y(SQ_SEL_Y),
1465					       SRC_SEL_Z(SQ_SEL_0),
1466					       SRC_SEL_W(SQ_SEL_1),
1467					       BURST_COUNT(1),
1468					       VALID_PIXEL_MODE(0),
1469					       END_OF_PROGRAM(0),
1470					       CF_INST(SQ_CF_INST_EXPORT),
1471					       MARK(0),
1472					       BARRIER(0));
1473    /* 7 - mask */
1474    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1475					  TYPE(SQ_EXPORT_PARAM),
1476					  RW_GPR(0),
1477					  RW_REL(ABSOLUTE),
1478					  INDEX_GPR(0),
1479					  ELEM_SIZE(0));
1480    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1481					       SRC_SEL_Y(SQ_SEL_Y),
1482					       SRC_SEL_Z(SQ_SEL_0),
1483					       SRC_SEL_W(SQ_SEL_1),
1484					       BURST_COUNT(1),
1485					       VALID_PIXEL_MODE(0),
1486					       END_OF_PROGRAM(0),
1487					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1488					       WHOLE_QUAD_MODE(0),
1489					       BARRIER(0));
1490    /* 8 */
1491    shader[i++] = CF_DWORD0(ADDR(0),
1492			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1493    shader[i++] = CF_DWORD1(POP_COUNT(0),
1494			    CF_CONST(0),
1495			    COND(SQ_CF_COND_ACTIVE),
1496			    I_COUNT(0),
1497			    VALID_PIXEL_MODE(0),
1498			    END_OF_PROGRAM(0),
1499			    CF_INST(SQ_CF_INST_RETURN),
1500			    WHOLE_QUAD_MODE(0),
1501			    BARRIER(1));
1502    /* 9 - non-mask sub */
1503    shader[i++] = CF_DWORD0(ADDR(50),
1504			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1505    shader[i++] = CF_DWORD1(POP_COUNT(0),
1506			    CF_CONST(0),
1507			    COND(SQ_CF_COND_ACTIVE),
1508			    I_COUNT(2),
1509			    VALID_PIXEL_MODE(0),
1510			    END_OF_PROGRAM(0),
1511			    CF_INST(SQ_CF_INST_VC),
1512			    WHOLE_QUAD_MODE(0),
1513			    BARRIER(1));
1514
1515    /* 10 - ALU */
1516    shader[i++] = CF_ALU_DWORD0(ADDR(34),
1517				KCACHE_BANK0(0),
1518				KCACHE_BANK1(0),
1519				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1520    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1521				KCACHE_ADDR0(0),
1522				KCACHE_ADDR1(0),
1523				I_COUNT(10),
1524				ALT_CONST(0),
1525				CF_INST(SQ_CF_INST_ALU),
1526				WHOLE_QUAD_MODE(0),
1527				BARRIER(1));
1528
1529    /* 11 - dst */
1530    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1531					  TYPE(SQ_EXPORT_POS),
1532					  RW_GPR(1),
1533					  RW_REL(ABSOLUTE),
1534					  INDEX_GPR(0),
1535					  ELEM_SIZE(0));
1536    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1537					       SRC_SEL_Y(SQ_SEL_Y),
1538					       SRC_SEL_Z(SQ_SEL_0),
1539					       SRC_SEL_W(SQ_SEL_1),
1540					       BURST_COUNT(0),
1541					       VALID_PIXEL_MODE(0),
1542					       END_OF_PROGRAM(0),
1543					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1544					       MARK(0),
1545					       BARRIER(1));
1546    /* 12 - src */
1547    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1548					  TYPE(SQ_EXPORT_PARAM),
1549					  RW_GPR(0),
1550					  RW_REL(ABSOLUTE),
1551					  INDEX_GPR(0),
1552					  ELEM_SIZE(0));
1553    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1554					       SRC_SEL_Y(SQ_SEL_Y),
1555					       SRC_SEL_Z(SQ_SEL_0),
1556					       SRC_SEL_W(SQ_SEL_1),
1557					       BURST_COUNT(0),
1558					       VALID_PIXEL_MODE(0),
1559					       END_OF_PROGRAM(0),
1560					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1561					       MARK(0),
1562					       BARRIER(0));
1563    /* 13 */
1564    shader[i++] = CF_DWORD0(ADDR(0),
1565			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1566    shader[i++] = CF_DWORD1(POP_COUNT(0),
1567			    CF_CONST(0),
1568			    COND(SQ_CF_COND_ACTIVE),
1569			    I_COUNT(0),
1570			    VALID_PIXEL_MODE(0),
1571			    END_OF_PROGRAM(0),
1572			    CF_INST(SQ_CF_INST_RETURN),
1573			    WHOLE_QUAD_MODE(0),
1574			    BARRIER(1));
1575
1576    /* 14 srcX.x DOT4 - mask */
1577    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1578                             SRC0_REL(ABSOLUTE),
1579                             SRC0_ELEM(ELEM_X),
1580                             SRC0_NEG(0),
1581                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1582                             SRC1_REL(ABSOLUTE),
1583                             SRC1_ELEM(ELEM_X),
1584                             SRC1_NEG(0),
1585                             INDEX_MODE(SQ_INDEX_LOOP),
1586                             PRED_SEL(SQ_PRED_SEL_OFF),
1587                             LAST(0));
1588    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1589                                 SRC1_ABS(0),
1590                                 UPDATE_EXECUTE_MASK(0),
1591                                 UPDATE_PRED(0),
1592                                 WRITE_MASK(1),
1593                                 OMOD(SQ_ALU_OMOD_OFF),
1594                                 ALU_INST(SQ_OP2_INST_DOT4),
1595                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1596                                 DST_GPR(3),
1597                                 DST_REL(ABSOLUTE),
1598                                 DST_ELEM(ELEM_X),
1599                                 CLAMP(0));
1600
1601    /* 15 srcX.y DOT4 - mask */
1602    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1603                             SRC0_REL(ABSOLUTE),
1604                             SRC0_ELEM(ELEM_Y),
1605                             SRC0_NEG(0),
1606                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1607                             SRC1_REL(ABSOLUTE),
1608                             SRC1_ELEM(ELEM_Y),
1609                             SRC1_NEG(0),
1610                             INDEX_MODE(SQ_INDEX_LOOP),
1611                             PRED_SEL(SQ_PRED_SEL_OFF),
1612                             LAST(0));
1613    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1614                                 SRC1_ABS(0),
1615                                 UPDATE_EXECUTE_MASK(0),
1616                                 UPDATE_PRED(0),
1617                                 WRITE_MASK(0),
1618                                 OMOD(SQ_ALU_OMOD_OFF),
1619                                 ALU_INST(SQ_OP2_INST_DOT4),
1620                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1621                                 DST_GPR(3),
1622                                 DST_REL(ABSOLUTE),
1623                                 DST_ELEM(ELEM_Y),
1624                                 CLAMP(0));
1625
1626    /* 16 srcX.z DOT4 - mask */
1627    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1628                             SRC0_REL(ABSOLUTE),
1629                             SRC0_ELEM(ELEM_Z),
1630                             SRC0_NEG(0),
1631                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1632                             SRC1_REL(ABSOLUTE),
1633                             SRC1_ELEM(ELEM_Z),
1634                             SRC1_NEG(0),
1635                             INDEX_MODE(SQ_INDEX_LOOP),
1636                             PRED_SEL(SQ_PRED_SEL_OFF),
1637                             LAST(0));
1638    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1639                                 SRC1_ABS(0),
1640                                 UPDATE_EXECUTE_MASK(0),
1641                                 UPDATE_PRED(0),
1642                                 WRITE_MASK(0),
1643                                 OMOD(SQ_ALU_OMOD_OFF),
1644                                 ALU_INST(SQ_OP2_INST_DOT4),
1645                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1646                                 DST_GPR(3),
1647                                 DST_REL(ABSOLUTE),
1648                                 DST_ELEM(ELEM_Z),
1649                                 CLAMP(0));
1650
1651    /* 17 srcX.w DOT4 - mask */
1652    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1653                             SRC0_REL(ABSOLUTE),
1654                             SRC0_ELEM(ELEM_W),
1655                             SRC0_NEG(0),
1656                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1657                             SRC1_REL(ABSOLUTE),
1658                             SRC1_ELEM(ELEM_W),
1659                             SRC1_NEG(0),
1660                             INDEX_MODE(SQ_INDEX_LOOP),
1661                             PRED_SEL(SQ_PRED_SEL_OFF),
1662                             LAST(1));
1663    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1664                                 SRC1_ABS(0),
1665                                 UPDATE_EXECUTE_MASK(0),
1666                                 UPDATE_PRED(0),
1667                                 WRITE_MASK(0),
1668                                 OMOD(SQ_ALU_OMOD_OFF),
1669                                 ALU_INST(SQ_OP2_INST_DOT4),
1670                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1671                                 DST_GPR(3),
1672                                 DST_REL(ABSOLUTE),
1673                                 DST_ELEM(ELEM_W),
1674                                 CLAMP(0));
1675
1676    /* 18 srcY.x DOT4 - mask */
1677    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1678                             SRC0_REL(ABSOLUTE),
1679                             SRC0_ELEM(ELEM_X),
1680                             SRC0_NEG(0),
1681                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1682                             SRC1_REL(ABSOLUTE),
1683                             SRC1_ELEM(ELEM_X),
1684                             SRC1_NEG(0),
1685                             INDEX_MODE(SQ_INDEX_LOOP),
1686                             PRED_SEL(SQ_PRED_SEL_OFF),
1687                             LAST(0));
1688    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1689                                 SRC1_ABS(0),
1690                                 UPDATE_EXECUTE_MASK(0),
1691                                 UPDATE_PRED(0),
1692                                 WRITE_MASK(0),
1693                                 OMOD(SQ_ALU_OMOD_OFF),
1694                                 ALU_INST(SQ_OP2_INST_DOT4),
1695                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1696                                 DST_GPR(3),
1697                                 DST_REL(ABSOLUTE),
1698                                 DST_ELEM(ELEM_X),
1699                                 CLAMP(0));
1700
1701    /* 19 srcY.y DOT4 - mask */
1702    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1703                             SRC0_REL(ABSOLUTE),
1704                             SRC0_ELEM(ELEM_Y),
1705                             SRC0_NEG(0),
1706                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1707                             SRC1_REL(ABSOLUTE),
1708                             SRC1_ELEM(ELEM_Y),
1709                             SRC1_NEG(0),
1710                             INDEX_MODE(SQ_INDEX_LOOP),
1711                             PRED_SEL(SQ_PRED_SEL_OFF),
1712                             LAST(0));
1713    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1714                                 SRC1_ABS(0),
1715                                 UPDATE_EXECUTE_MASK(0),
1716                                 UPDATE_PRED(0),
1717                                 WRITE_MASK(1),
1718                                 OMOD(SQ_ALU_OMOD_OFF),
1719                                 ALU_INST(SQ_OP2_INST_DOT4),
1720                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1721                                 DST_GPR(3),
1722                                 DST_REL(ABSOLUTE),
1723                                 DST_ELEM(ELEM_Y),
1724                                 CLAMP(0));
1725
1726    /* 20 srcY.z DOT4 - mask */
1727    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1728                             SRC0_REL(ABSOLUTE),
1729                             SRC0_ELEM(ELEM_Z),
1730                             SRC0_NEG(0),
1731                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1732                             SRC1_REL(ABSOLUTE),
1733                             SRC1_ELEM(ELEM_Z),
1734                             SRC1_NEG(0),
1735                             INDEX_MODE(SQ_INDEX_LOOP),
1736                             PRED_SEL(SQ_PRED_SEL_OFF),
1737                             LAST(0));
1738    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1739                                 SRC1_ABS(0),
1740                                 UPDATE_EXECUTE_MASK(0),
1741                                 UPDATE_PRED(0),
1742                                 WRITE_MASK(0),
1743                                 OMOD(SQ_ALU_OMOD_OFF),
1744                                 ALU_INST(SQ_OP2_INST_DOT4),
1745                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1746                                 DST_GPR(3),
1747                                 DST_REL(ABSOLUTE),
1748                                 DST_ELEM(ELEM_Z),
1749                                 CLAMP(0));
1750
1751    /* 21 srcY.w DOT4 - mask */
1752    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1753                             SRC0_REL(ABSOLUTE),
1754                             SRC0_ELEM(ELEM_W),
1755                             SRC0_NEG(0),
1756                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1757                             SRC1_REL(ABSOLUTE),
1758                             SRC1_ELEM(ELEM_W),
1759                             SRC1_NEG(0),
1760                             INDEX_MODE(SQ_INDEX_LOOP),
1761                             PRED_SEL(SQ_PRED_SEL_OFF),
1762                             LAST(1));
1763    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1764                                 SRC1_ABS(0),
1765                                 UPDATE_EXECUTE_MASK(0),
1766                                 UPDATE_PRED(0),
1767                                 WRITE_MASK(0),
1768                                 OMOD(SQ_ALU_OMOD_OFF),
1769                                 ALU_INST(SQ_OP2_INST_DOT4),
1770                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1771                                 DST_GPR(3),
1772                                 DST_REL(ABSOLUTE),
1773                                 DST_ELEM(ELEM_W),
1774                                 CLAMP(0));
1775
1776    /* 22 maskX.x DOT4 - mask */
1777    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1778                             SRC0_REL(ABSOLUTE),
1779                             SRC0_ELEM(ELEM_X),
1780                             SRC0_NEG(0),
1781                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1782                             SRC1_REL(ABSOLUTE),
1783                             SRC1_ELEM(ELEM_X),
1784                             SRC1_NEG(0),
1785                             INDEX_MODE(SQ_INDEX_LOOP),
1786                             PRED_SEL(SQ_PRED_SEL_OFF),
1787                             LAST(0));
1788    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1789                                 SRC1_ABS(0),
1790                                 UPDATE_EXECUTE_MASK(0),
1791                                 UPDATE_PRED(0),
1792                                 WRITE_MASK(1),
1793                                 OMOD(SQ_ALU_OMOD_OFF),
1794                                 ALU_INST(SQ_OP2_INST_DOT4),
1795                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1796                                 DST_GPR(4),
1797                                 DST_REL(ABSOLUTE),
1798                                 DST_ELEM(ELEM_X),
1799                                 CLAMP(0));
1800
1801    /* 23 maskX.y DOT4 - mask */
1802    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1803                             SRC0_REL(ABSOLUTE),
1804                             SRC0_ELEM(ELEM_Y),
1805                             SRC0_NEG(0),
1806                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1807                             SRC1_REL(ABSOLUTE),
1808                             SRC1_ELEM(ELEM_Y),
1809                             SRC1_NEG(0),
1810                             INDEX_MODE(SQ_INDEX_LOOP),
1811                             PRED_SEL(SQ_PRED_SEL_OFF),
1812                             LAST(0));
1813    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1814                                 SRC1_ABS(0),
1815                                 UPDATE_EXECUTE_MASK(0),
1816                                 UPDATE_PRED(0),
1817                                 WRITE_MASK(0),
1818                                 OMOD(SQ_ALU_OMOD_OFF),
1819                                 ALU_INST(SQ_OP2_INST_DOT4),
1820                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1821                                 DST_GPR(4),
1822                                 DST_REL(ABSOLUTE),
1823                                 DST_ELEM(ELEM_Y),
1824                                 CLAMP(0));
1825
1826    /* 24 maskX.z DOT4 - mask */
1827    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1828                             SRC0_REL(ABSOLUTE),
1829                             SRC0_ELEM(ELEM_Z),
1830                             SRC0_NEG(0),
1831                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1832                             SRC1_REL(ABSOLUTE),
1833                             SRC1_ELEM(ELEM_Z),
1834                             SRC1_NEG(0),
1835                             INDEX_MODE(SQ_INDEX_LOOP),
1836                             PRED_SEL(SQ_PRED_SEL_OFF),
1837                             LAST(0));
1838    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1839                                 SRC1_ABS(0),
1840                                 UPDATE_EXECUTE_MASK(0),
1841                                 UPDATE_PRED(0),
1842                                 WRITE_MASK(0),
1843                                 OMOD(SQ_ALU_OMOD_OFF),
1844                                 ALU_INST(SQ_OP2_INST_DOT4),
1845                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1846                                 DST_GPR(4),
1847                                 DST_REL(ABSOLUTE),
1848                                 DST_ELEM(ELEM_Z),
1849                                 CLAMP(0));
1850
1851    /* 25 maskX.w DOT4 - mask */
1852    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1853                             SRC0_REL(ABSOLUTE),
1854                             SRC0_ELEM(ELEM_W),
1855                             SRC0_NEG(0),
1856                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1857                             SRC1_REL(ABSOLUTE),
1858                             SRC1_ELEM(ELEM_W),
1859                             SRC1_NEG(0),
1860                             INDEX_MODE(SQ_INDEX_LOOP),
1861                             PRED_SEL(SQ_PRED_SEL_OFF),
1862                             LAST(1));
1863    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1864                                 SRC1_ABS(0),
1865                                 UPDATE_EXECUTE_MASK(0),
1866                                 UPDATE_PRED(0),
1867                                 WRITE_MASK(0),
1868                                 OMOD(SQ_ALU_OMOD_OFF),
1869                                 ALU_INST(SQ_OP2_INST_DOT4),
1870                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1871                                 DST_GPR(4),
1872                                 DST_REL(ABSOLUTE),
1873                                 DST_ELEM(ELEM_W),
1874                                 CLAMP(0));
1875
1876    /* 26 maskY.x DOT4 - mask */
1877    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1878                             SRC0_REL(ABSOLUTE),
1879                             SRC0_ELEM(ELEM_X),
1880                             SRC0_NEG(0),
1881                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1882                             SRC1_REL(ABSOLUTE),
1883                             SRC1_ELEM(ELEM_X),
1884                             SRC1_NEG(0),
1885                             INDEX_MODE(SQ_INDEX_LOOP),
1886                             PRED_SEL(SQ_PRED_SEL_OFF),
1887                             LAST(0));
1888    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1889                                 SRC1_ABS(0),
1890                                 UPDATE_EXECUTE_MASK(0),
1891                                 UPDATE_PRED(0),
1892                                 WRITE_MASK(0),
1893                                 OMOD(SQ_ALU_OMOD_OFF),
1894                                 ALU_INST(SQ_OP2_INST_DOT4),
1895                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1896                                 DST_GPR(4),
1897                                 DST_REL(ABSOLUTE),
1898                                 DST_ELEM(ELEM_X),
1899                                 CLAMP(0));
1900
1901    /* 27 maskY.y DOT4 - mask */
1902    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1903                             SRC0_REL(ABSOLUTE),
1904                             SRC0_ELEM(ELEM_Y),
1905                             SRC0_NEG(0),
1906                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1907                             SRC1_REL(ABSOLUTE),
1908                             SRC1_ELEM(ELEM_Y),
1909                             SRC1_NEG(0),
1910                             INDEX_MODE(SQ_INDEX_LOOP),
1911                             PRED_SEL(SQ_PRED_SEL_OFF),
1912                             LAST(0));
1913    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1914                                 SRC1_ABS(0),
1915                                 UPDATE_EXECUTE_MASK(0),
1916                                 UPDATE_PRED(0),
1917                                 WRITE_MASK(1),
1918                                 OMOD(SQ_ALU_OMOD_OFF),
1919                                 ALU_INST(SQ_OP2_INST_DOT4),
1920                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1921                                 DST_GPR(4),
1922                                 DST_REL(ABSOLUTE),
1923                                 DST_ELEM(ELEM_Y),
1924                                 CLAMP(0));
1925
1926    /* 28 maskY.z DOT4 - mask */
1927    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1928                             SRC0_REL(ABSOLUTE),
1929                             SRC0_ELEM(ELEM_Z),
1930                             SRC0_NEG(0),
1931                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1932                             SRC1_REL(ABSOLUTE),
1933                             SRC1_ELEM(ELEM_Z),
1934                             SRC1_NEG(0),
1935                             INDEX_MODE(SQ_INDEX_LOOP),
1936                             PRED_SEL(SQ_PRED_SEL_OFF),
1937                             LAST(0));
1938    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1939                                 SRC1_ABS(0),
1940                                 UPDATE_EXECUTE_MASK(0),
1941                                 UPDATE_PRED(0),
1942                                 WRITE_MASK(0),
1943                                 OMOD(SQ_ALU_OMOD_OFF),
1944                                 ALU_INST(SQ_OP2_INST_DOT4),
1945                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1946                                 DST_GPR(4),
1947                                 DST_REL(ABSOLUTE),
1948                                 DST_ELEM(ELEM_Z),
1949                                 CLAMP(0));
1950
1951    /* 29 maskY.w DOT4 - mask */
1952    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1953                             SRC0_REL(ABSOLUTE),
1954                             SRC0_ELEM(ELEM_W),
1955                             SRC0_NEG(0),
1956                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1957                             SRC1_REL(ABSOLUTE),
1958                             SRC1_ELEM(ELEM_W),
1959                             SRC1_NEG(0),
1960                             INDEX_MODE(SQ_INDEX_LOOP),
1961                             PRED_SEL(SQ_PRED_SEL_OFF),
1962                             LAST(1));
1963    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1964                                 SRC1_ABS(0),
1965                                 UPDATE_EXECUTE_MASK(0),
1966                                 UPDATE_PRED(0),
1967                                 WRITE_MASK(0),
1968                                 OMOD(SQ_ALU_OMOD_OFF),
1969                                 ALU_INST(SQ_OP2_INST_DOT4),
1970                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1971                                 DST_GPR(4),
1972                                 DST_REL(ABSOLUTE),
1973                                 DST_ELEM(ELEM_W),
1974                                 CLAMP(0));
1975
1976    /* 30 srcX / w */
1977    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1978                             SRC0_REL(ABSOLUTE),
1979                             SRC0_ELEM(ELEM_X),
1980                             SRC0_NEG(0),
1981                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1982                             SRC1_REL(ABSOLUTE),
1983                             SRC1_ELEM(ELEM_W),
1984                             SRC1_NEG(0),
1985                             INDEX_MODE(SQ_INDEX_AR_X),
1986                             PRED_SEL(SQ_PRED_SEL_OFF),
1987                             LAST(1));
1988    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1989                                 SRC1_ABS(0),
1990                                 UPDATE_EXECUTE_MASK(0),
1991                                 UPDATE_PRED(0),
1992                                 WRITE_MASK(1),
1993                                 OMOD(SQ_ALU_OMOD_OFF),
1994                                 ALU_INST(SQ_OP2_INST_MUL),
1995                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1996                                 DST_GPR(1),
1997                                 DST_REL(ABSOLUTE),
1998                                 DST_ELEM(ELEM_X),
1999                                 CLAMP(0));
2000
2001    /* 31 srcY / h */
2002    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
2003                             SRC0_REL(ABSOLUTE),
2004                             SRC0_ELEM(ELEM_Y),
2005                             SRC0_NEG(0),
2006                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2007                             SRC1_REL(ABSOLUTE),
2008                             SRC1_ELEM(ELEM_W),
2009                             SRC1_NEG(0),
2010                             INDEX_MODE(SQ_INDEX_AR_X),
2011                             PRED_SEL(SQ_PRED_SEL_OFF),
2012                             LAST(1));
2013    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2014                                 SRC1_ABS(0),
2015                                 UPDATE_EXECUTE_MASK(0),
2016                                 UPDATE_PRED(0),
2017                                 WRITE_MASK(1),
2018                                 OMOD(SQ_ALU_OMOD_OFF),
2019                                 ALU_INST(SQ_OP2_INST_MUL),
2020                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2021                                 DST_GPR(1),
2022                                 DST_REL(ABSOLUTE),
2023                                 DST_ELEM(ELEM_Y),
2024                                 CLAMP(0));
2025
2026    /* 32 maskX / w */
2027    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2028                             SRC0_REL(ABSOLUTE),
2029                             SRC0_ELEM(ELEM_X),
2030                             SRC0_NEG(0),
2031                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
2032                             SRC1_REL(ABSOLUTE),
2033                             SRC1_ELEM(ELEM_W),
2034                             SRC1_NEG(0),
2035                             INDEX_MODE(SQ_INDEX_AR_X),
2036                             PRED_SEL(SQ_PRED_SEL_OFF),
2037                             LAST(1));
2038    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2039                                 SRC1_ABS(0),
2040                                 UPDATE_EXECUTE_MASK(0),
2041                                 UPDATE_PRED(0),
2042                                 WRITE_MASK(1),
2043                                 OMOD(SQ_ALU_OMOD_OFF),
2044                                 ALU_INST(SQ_OP2_INST_MUL),
2045                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2046                                 DST_GPR(0),
2047                                 DST_REL(ABSOLUTE),
2048                                 DST_ELEM(ELEM_X),
2049                                 CLAMP(0));
2050
2051    /* 33 maskY / h */
2052    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2053                             SRC0_REL(ABSOLUTE),
2054                             SRC0_ELEM(ELEM_Y),
2055                             SRC0_NEG(0),
2056                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
2057                             SRC1_REL(ABSOLUTE),
2058                             SRC1_ELEM(ELEM_W),
2059                             SRC1_NEG(0),
2060                             INDEX_MODE(SQ_INDEX_AR_X),
2061                             PRED_SEL(SQ_PRED_SEL_OFF),
2062                             LAST(1));
2063    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2064                                 SRC1_ABS(0),
2065                                 UPDATE_EXECUTE_MASK(0),
2066                                 UPDATE_PRED(0),
2067                                 WRITE_MASK(1),
2068                                 OMOD(SQ_ALU_OMOD_OFF),
2069                                 ALU_INST(SQ_OP2_INST_MUL),
2070                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2071                                 DST_GPR(0),
2072                                 DST_REL(ABSOLUTE),
2073                                 DST_ELEM(ELEM_Y),
2074                                 CLAMP(0));
2075
2076    /* 34 srcX.x DOT4 - non-mask */
2077    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2078                             SRC0_REL(ABSOLUTE),
2079                             SRC0_ELEM(ELEM_X),
2080                             SRC0_NEG(0),
2081                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2082                             SRC1_REL(ABSOLUTE),
2083                             SRC1_ELEM(ELEM_X),
2084                             SRC1_NEG(0),
2085                             INDEX_MODE(SQ_INDEX_LOOP),
2086                             PRED_SEL(SQ_PRED_SEL_OFF),
2087                             LAST(0));
2088    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2089                                 SRC1_ABS(0),
2090                                 UPDATE_EXECUTE_MASK(0),
2091                                 UPDATE_PRED(0),
2092                                 WRITE_MASK(1),
2093                                 OMOD(SQ_ALU_OMOD_OFF),
2094                                 ALU_INST(SQ_OP2_INST_DOT4),
2095                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2096                                 DST_GPR(2),
2097                                 DST_REL(ABSOLUTE),
2098                                 DST_ELEM(ELEM_X),
2099                                 CLAMP(0));
2100
2101    /* 35 srcX.y DOT4 - non-mask */
2102    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2103                             SRC0_REL(ABSOLUTE),
2104                             SRC0_ELEM(ELEM_Y),
2105                             SRC0_NEG(0),
2106                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2107                             SRC1_REL(ABSOLUTE),
2108                             SRC1_ELEM(ELEM_Y),
2109                             SRC1_NEG(0),
2110                             INDEX_MODE(SQ_INDEX_LOOP),
2111                             PRED_SEL(SQ_PRED_SEL_OFF),
2112                             LAST(0));
2113    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2114                                 SRC1_ABS(0),
2115                                 UPDATE_EXECUTE_MASK(0),
2116                                 UPDATE_PRED(0),
2117                                 WRITE_MASK(0),
2118                                 OMOD(SQ_ALU_OMOD_OFF),
2119                                 ALU_INST(SQ_OP2_INST_DOT4),
2120                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2121                                 DST_GPR(2),
2122                                 DST_REL(ABSOLUTE),
2123                                 DST_ELEM(ELEM_Y),
2124                                 CLAMP(0));
2125
2126    /* 36 srcX.z DOT4 - non-mask */
2127    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2128                             SRC0_REL(ABSOLUTE),
2129                             SRC0_ELEM(ELEM_Z),
2130                             SRC0_NEG(0),
2131                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2132                             SRC1_REL(ABSOLUTE),
2133                             SRC1_ELEM(ELEM_Z),
2134                             SRC1_NEG(0),
2135                             INDEX_MODE(SQ_INDEX_LOOP),
2136                             PRED_SEL(SQ_PRED_SEL_OFF),
2137                             LAST(0));
2138    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2139                                 SRC1_ABS(0),
2140                                 UPDATE_EXECUTE_MASK(0),
2141                                 UPDATE_PRED(0),
2142                                 WRITE_MASK(0),
2143                                 OMOD(SQ_ALU_OMOD_OFF),
2144                                 ALU_INST(SQ_OP2_INST_DOT4),
2145                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2146                                 DST_GPR(2),
2147                                 DST_REL(ABSOLUTE),
2148                                 DST_ELEM(ELEM_Z),
2149                                 CLAMP(0));
2150
2151    /* 37 srcX.w DOT4 - non-mask */
2152    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2153                             SRC0_REL(ABSOLUTE),
2154                             SRC0_ELEM(ELEM_W),
2155                             SRC0_NEG(0),
2156                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2157                             SRC1_REL(ABSOLUTE),
2158                             SRC1_ELEM(ELEM_W),
2159                             SRC1_NEG(0),
2160                             INDEX_MODE(SQ_INDEX_LOOP),
2161                             PRED_SEL(SQ_PRED_SEL_OFF),
2162                             LAST(1));
2163    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2164                                 SRC1_ABS(0),
2165                                 UPDATE_EXECUTE_MASK(0),
2166                                 UPDATE_PRED(0),
2167                                 WRITE_MASK(0),
2168                                 OMOD(SQ_ALU_OMOD_OFF),
2169                                 ALU_INST(SQ_OP2_INST_DOT4),
2170                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2171                                 DST_GPR(2),
2172                                 DST_REL(ABSOLUTE),
2173                                 DST_ELEM(ELEM_W),
2174                                 CLAMP(0));
2175
2176    /* 38 srcY.x DOT4 - non-mask */
2177    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2178                             SRC0_REL(ABSOLUTE),
2179                             SRC0_ELEM(ELEM_X),
2180                             SRC0_NEG(0),
2181                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2182                             SRC1_REL(ABSOLUTE),
2183                             SRC1_ELEM(ELEM_X),
2184                             SRC1_NEG(0),
2185                             INDEX_MODE(SQ_INDEX_LOOP),
2186                             PRED_SEL(SQ_PRED_SEL_OFF),
2187                             LAST(0));
2188    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2189                                 SRC1_ABS(0),
2190                                 UPDATE_EXECUTE_MASK(0),
2191                                 UPDATE_PRED(0),
2192                                 WRITE_MASK(0),
2193                                 OMOD(SQ_ALU_OMOD_OFF),
2194                                 ALU_INST(SQ_OP2_INST_DOT4),
2195                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2196                                 DST_GPR(2),
2197                                 DST_REL(ABSOLUTE),
2198                                 DST_ELEM(ELEM_X),
2199                                 CLAMP(0));
2200
2201    /* 39 srcY.y DOT4 - non-mask */
2202    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2203                             SRC0_REL(ABSOLUTE),
2204                             SRC0_ELEM(ELEM_Y),
2205                             SRC0_NEG(0),
2206                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2207                             SRC1_REL(ABSOLUTE),
2208                             SRC1_ELEM(ELEM_Y),
2209                             SRC1_NEG(0),
2210                             INDEX_MODE(SQ_INDEX_LOOP),
2211                             PRED_SEL(SQ_PRED_SEL_OFF),
2212                             LAST(0));
2213    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2214                                 SRC1_ABS(0),
2215                                 UPDATE_EXECUTE_MASK(0),
2216                                 UPDATE_PRED(0),
2217                                 WRITE_MASK(1),
2218                                 OMOD(SQ_ALU_OMOD_OFF),
2219                                 ALU_INST(SQ_OP2_INST_DOT4),
2220                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2221                                 DST_GPR(2),
2222                                 DST_REL(ABSOLUTE),
2223                                 DST_ELEM(ELEM_Y),
2224                                 CLAMP(0));
2225
2226    /* 40 srcY.z DOT4 - non-mask */
2227    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2228                             SRC0_REL(ABSOLUTE),
2229                             SRC0_ELEM(ELEM_Z),
2230                             SRC0_NEG(0),
2231                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2232                             SRC1_REL(ABSOLUTE),
2233                             SRC1_ELEM(ELEM_Z),
2234                             SRC1_NEG(0),
2235                             INDEX_MODE(SQ_INDEX_LOOP),
2236                             PRED_SEL(SQ_PRED_SEL_OFF),
2237                             LAST(0));
2238    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2239                                 SRC1_ABS(0),
2240                                 UPDATE_EXECUTE_MASK(0),
2241                                 UPDATE_PRED(0),
2242                                 WRITE_MASK(0),
2243                                 OMOD(SQ_ALU_OMOD_OFF),
2244                                 ALU_INST(SQ_OP2_INST_DOT4),
2245                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2246                                 DST_GPR(2),
2247                                 DST_REL(ABSOLUTE),
2248                                 DST_ELEM(ELEM_Z),
2249                                 CLAMP(0));
2250
2251    /* 41 srcY.w DOT4 - non-mask */
2252    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2253                             SRC0_REL(ABSOLUTE),
2254                             SRC0_ELEM(ELEM_W),
2255                             SRC0_NEG(0),
2256                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2257                             SRC1_REL(ABSOLUTE),
2258                             SRC1_ELEM(ELEM_W),
2259                             SRC1_NEG(0),
2260                             INDEX_MODE(SQ_INDEX_LOOP),
2261                             PRED_SEL(SQ_PRED_SEL_OFF),
2262                             LAST(1));
2263    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2264                                 SRC1_ABS(0),
2265                                 UPDATE_EXECUTE_MASK(0),
2266                                 UPDATE_PRED(0),
2267                                 WRITE_MASK(0),
2268                                 OMOD(SQ_ALU_OMOD_OFF),
2269                                 ALU_INST(SQ_OP2_INST_DOT4),
2270                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2271                                 DST_GPR(2),
2272                                 DST_REL(ABSOLUTE),
2273                                 DST_ELEM(ELEM_W),
2274                                 CLAMP(0));
2275
2276    /* 42 srcX / w */
2277    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2278                             SRC0_REL(ABSOLUTE),
2279                             SRC0_ELEM(ELEM_X),
2280                             SRC0_NEG(0),
2281                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2282                             SRC1_REL(ABSOLUTE),
2283                             SRC1_ELEM(ELEM_W),
2284                             SRC1_NEG(0),
2285                             INDEX_MODE(SQ_INDEX_AR_X),
2286                             PRED_SEL(SQ_PRED_SEL_OFF),
2287                             LAST(1));
2288    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2289                                 SRC1_ABS(0),
2290                                 UPDATE_EXECUTE_MASK(0),
2291                                 UPDATE_PRED(0),
2292                                 WRITE_MASK(1),
2293                                 OMOD(SQ_ALU_OMOD_OFF),
2294                                 ALU_INST(SQ_OP2_INST_MUL),
2295                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2296                                 DST_GPR(0),
2297                                 DST_REL(ABSOLUTE),
2298                                 DST_ELEM(ELEM_X),
2299                                 CLAMP(0));
2300
2301    /* 43 srcY / h */
2302    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2303                             SRC0_REL(ABSOLUTE),
2304                             SRC0_ELEM(ELEM_Y),
2305                             SRC0_NEG(0),
2306                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2307                             SRC1_REL(ABSOLUTE),
2308                             SRC1_ELEM(ELEM_W),
2309                             SRC1_NEG(0),
2310                             INDEX_MODE(SQ_INDEX_AR_X),
2311                             PRED_SEL(SQ_PRED_SEL_OFF),
2312                             LAST(1));
2313    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2314                                 SRC1_ABS(0),
2315                                 UPDATE_EXECUTE_MASK(0),
2316                                 UPDATE_PRED(0),
2317                                 WRITE_MASK(1),
2318                                 OMOD(SQ_ALU_OMOD_OFF),
2319                                 ALU_INST(SQ_OP2_INST_MUL),
2320                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2321                                 DST_GPR(0),
2322                                 DST_REL(ABSOLUTE),
2323                                 DST_ELEM(ELEM_Y),
2324                                 CLAMP(0));
2325
2326    /* mask vfetch - 44/45 - dst */
2327    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2328			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2329			     FETCH_WHOLE_QUAD(0),
2330			     BUFFER_ID(0),
2331			     SRC_GPR(0),
2332			     SRC_REL(ABSOLUTE),
2333			     SRC_SEL_X(SQ_SEL_X),
2334			     MEGA_FETCH_COUNT(24));
2335    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2336				 DST_REL(0),
2337				 DST_SEL_X(SQ_SEL_X),
2338				 DST_SEL_Y(SQ_SEL_Y),
2339				 DST_SEL_Z(SQ_SEL_0),
2340				 DST_SEL_W(SQ_SEL_1),
2341				 USE_CONST_FIELDS(0),
2342				 DATA_FORMAT(FMT_32_32_FLOAT),
2343				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2344				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2345				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2346    shader[i++] = VTX_DWORD2(OFFSET(0),
2347			     ENDIAN_SWAP(ENDIAN_NONE),
2348			     CONST_BUF_NO_STRIDE(0),
2349			     MEGA_FETCH(1),
2350			     ALT_CONST(0),
2351			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2352    shader[i++] = VTX_DWORD_PAD;
2353    /* 46/47 - src */
2354    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2355			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2356			     FETCH_WHOLE_QUAD(0),
2357			     BUFFER_ID(0),
2358			     SRC_GPR(0),
2359			     SRC_REL(ABSOLUTE),
2360			     SRC_SEL_X(SQ_SEL_X),
2361			     MEGA_FETCH_COUNT(8));
2362    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2363				 DST_REL(0),
2364				 DST_SEL_X(SQ_SEL_X),
2365				 DST_SEL_Y(SQ_SEL_Y),
2366				 DST_SEL_Z(SQ_SEL_1),
2367				 DST_SEL_W(SQ_SEL_0),
2368				 USE_CONST_FIELDS(0),
2369				 DATA_FORMAT(FMT_32_32_FLOAT),
2370				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2371				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2372				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2373    shader[i++] = VTX_DWORD2(OFFSET(8),
2374			     ENDIAN_SWAP(ENDIAN_NONE),
2375			     CONST_BUF_NO_STRIDE(0),
2376			     MEGA_FETCH(0),
2377			     ALT_CONST(0),
2378			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2379    shader[i++] = VTX_DWORD_PAD;
2380    /* 48/49 - mask */
2381    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2382			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2383			     FETCH_WHOLE_QUAD(0),
2384			     BUFFER_ID(0),
2385			     SRC_GPR(0),
2386			     SRC_REL(ABSOLUTE),
2387			     SRC_SEL_X(SQ_SEL_X),
2388			     MEGA_FETCH_COUNT(8));
2389    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2390				 DST_REL(0),
2391				 DST_SEL_X(SQ_SEL_X),
2392				 DST_SEL_Y(SQ_SEL_Y),
2393				 DST_SEL_Z(SQ_SEL_1),
2394				 DST_SEL_W(SQ_SEL_0),
2395				 USE_CONST_FIELDS(0),
2396				 DATA_FORMAT(FMT_32_32_FLOAT),
2397				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2398				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2399				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2400    shader[i++] = VTX_DWORD2(OFFSET(16),
2401			     ENDIAN_SWAP(ENDIAN_NONE),
2402			     CONST_BUF_NO_STRIDE(0),
2403			     MEGA_FETCH(0),
2404			     ALT_CONST(0),
2405			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2406    shader[i++] = VTX_DWORD_PAD;
2407
2408    /* no mask vfetch - 50/51 - dst */
2409    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2410			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2411			     FETCH_WHOLE_QUAD(0),
2412			     BUFFER_ID(0),
2413			     SRC_GPR(0),
2414			     SRC_REL(ABSOLUTE),
2415			     SRC_SEL_X(SQ_SEL_X),
2416			     MEGA_FETCH_COUNT(16));
2417    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2418				 DST_REL(0),
2419				 DST_SEL_X(SQ_SEL_X),
2420				 DST_SEL_Y(SQ_SEL_Y),
2421				 DST_SEL_Z(SQ_SEL_0),
2422				 DST_SEL_W(SQ_SEL_1),
2423				 USE_CONST_FIELDS(0),
2424				 DATA_FORMAT(FMT_32_32_FLOAT),
2425				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2426				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2427				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2428    shader[i++] = VTX_DWORD2(OFFSET(0),
2429			     ENDIAN_SWAP(ENDIAN_NONE),
2430			     CONST_BUF_NO_STRIDE(0),
2431			     MEGA_FETCH(1),
2432			     ALT_CONST(0),
2433			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2434    shader[i++] = VTX_DWORD_PAD;
2435    /* 52/53 - src */
2436    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2437			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2438			     FETCH_WHOLE_QUAD(0),
2439			     BUFFER_ID(0),
2440			     SRC_GPR(0),
2441			     SRC_REL(ABSOLUTE),
2442			     SRC_SEL_X(SQ_SEL_X),
2443			     MEGA_FETCH_COUNT(8));
2444    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2445				 DST_REL(0),
2446				 DST_SEL_X(SQ_SEL_X),
2447				 DST_SEL_Y(SQ_SEL_Y),
2448				 DST_SEL_Z(SQ_SEL_1),
2449				 DST_SEL_W(SQ_SEL_0),
2450				 USE_CONST_FIELDS(0),
2451				 DATA_FORMAT(FMT_32_32_FLOAT),
2452				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2453				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2454				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2455    shader[i++] = VTX_DWORD2(OFFSET(8),
2456			     ENDIAN_SWAP(ENDIAN_NONE),
2457			     CONST_BUF_NO_STRIDE(0),
2458			     MEGA_FETCH(0),
2459                             ALT_CONST(0),
2460                             BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2461    shader[i++] = VTX_DWORD_PAD;
2462
2463    return i;
2464}
2465
2466/* comp ps --------------------------------------- */
2467int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2468{
2469    int i = 0;
2470
2471    /* 0 */
2472    shader[i++] = CF_DWORD0(ADDR(3),
2473			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2474    shader[i++] = CF_DWORD1(POP_COUNT(0),
2475                            CF_CONST(0),
2476                            COND(SQ_CF_COND_BOOL),
2477                            I_COUNT(0),
2478                            VALID_PIXEL_MODE(0),
2479                            END_OF_PROGRAM(0),
2480                            CF_INST(SQ_CF_INST_CALL),
2481                            WHOLE_QUAD_MODE(0),
2482                            BARRIER(0));
2483    /* 1 */
2484    shader[i++] = CF_DWORD0(ADDR(8),
2485			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2486    shader[i++] = CF_DWORD1(POP_COUNT(0),
2487                            CF_CONST(0),
2488                            COND(SQ_CF_COND_NOT_BOOL),
2489                            I_COUNT(0),
2490                            VALID_PIXEL_MODE(0),
2491                            END_OF_PROGRAM(0),
2492                            CF_INST(SQ_CF_INST_CALL),
2493                            WHOLE_QUAD_MODE(0),
2494                            BARRIER(0));
2495    /* 2 */
2496    shader[i++] = CF_DWORD0(ADDR(0),
2497                            JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2498    shader[i++] = CF_DWORD1(POP_COUNT(0),
2499                            CF_CONST(0),
2500                            COND(SQ_CF_COND_ACTIVE),
2501                            I_COUNT(0),
2502                            VALID_PIXEL_MODE(0),
2503                            END_OF_PROGRAM(1),
2504                            CF_INST(SQ_CF_INST_NOP),
2505                            WHOLE_QUAD_MODE(0),
2506                            BARRIER(1));
2507
2508    /* 3 - mask sub */
2509    shader[i++] = CF_ALU_DWORD0(ADDR(12),
2510				KCACHE_BANK0(0),
2511				KCACHE_BANK1(0),
2512				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2513    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2514				KCACHE_ADDR0(0),
2515				KCACHE_ADDR1(0),
2516				I_COUNT(8),
2517				ALT_CONST(0),
2518				CF_INST(SQ_CF_INST_ALU),
2519				WHOLE_QUAD_MODE(0),
2520				BARRIER(1));
2521
2522    /* 4 */
2523    shader[i++] = CF_DWORD0(ADDR(28),
2524			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2525    shader[i++] = CF_DWORD1(POP_COUNT(0),
2526			    CF_CONST(0),
2527			    COND(SQ_CF_COND_ACTIVE),
2528			    I_COUNT(2),
2529			    VALID_PIXEL_MODE(0),
2530			    END_OF_PROGRAM(0),
2531			    CF_INST(SQ_CF_INST_TC),
2532			    WHOLE_QUAD_MODE(0),
2533			    BARRIER(1));
2534
2535    /* 5 */
2536    shader[i++] = CF_ALU_DWORD0(ADDR(20),
2537				KCACHE_BANK0(0),
2538				KCACHE_BANK1(0),
2539				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2540    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2541				KCACHE_ADDR0(0),
2542				KCACHE_ADDR1(0),
2543				I_COUNT(4),
2544				ALT_CONST(0),
2545				CF_INST(SQ_CF_INST_ALU),
2546				WHOLE_QUAD_MODE(0),
2547				BARRIER(1));
2548
2549    /* 6 */
2550    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2551					  TYPE(SQ_EXPORT_PIXEL),
2552					  RW_GPR(2),
2553					  RW_REL(ABSOLUTE),
2554					  INDEX_GPR(0),
2555					  ELEM_SIZE(1));
2556
2557    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2558					       SRC_SEL_Y(SQ_SEL_Y),
2559					       SRC_SEL_Z(SQ_SEL_Z),
2560					       SRC_SEL_W(SQ_SEL_W),
2561					       BURST_COUNT(1),
2562					       VALID_PIXEL_MODE(0),
2563					       END_OF_PROGRAM(0),
2564					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2565					       MARK(0),
2566					       BARRIER(1));
2567    /* 7 */
2568    shader[i++] = CF_DWORD0(ADDR(0),
2569			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2570    shader[i++] = CF_DWORD1(POP_COUNT(0),
2571			    CF_CONST(0),
2572			    COND(SQ_CF_COND_ACTIVE),
2573			    I_COUNT(0),
2574			    VALID_PIXEL_MODE(0),
2575			    END_OF_PROGRAM(0),
2576			    CF_INST(SQ_CF_INST_RETURN),
2577			    WHOLE_QUAD_MODE(0),
2578			    BARRIER(1));
2579
2580    /* 8 - non-mask sub */
2581    shader[i++] = CF_ALU_DWORD0(ADDR(24),
2582				KCACHE_BANK0(0),
2583				KCACHE_BANK1(0),
2584				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2585    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2586				KCACHE_ADDR0(0),
2587				KCACHE_ADDR1(0),
2588				I_COUNT(4),
2589				ALT_CONST(0),
2590				CF_INST(SQ_CF_INST_ALU),
2591				WHOLE_QUAD_MODE(0),
2592				BARRIER(1));
2593    /* 9 */
2594    shader[i++] = CF_DWORD0(ADDR(32),
2595			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2596    shader[i++] = CF_DWORD1(POP_COUNT(0),
2597			    CF_CONST(0),
2598			    COND(SQ_CF_COND_ACTIVE),
2599			    I_COUNT(1),
2600			    VALID_PIXEL_MODE(0),
2601			    END_OF_PROGRAM(0),
2602			    CF_INST(SQ_CF_INST_TC),
2603			    WHOLE_QUAD_MODE(0),
2604			    BARRIER(1));
2605
2606    /* 10 */
2607    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2608					  TYPE(SQ_EXPORT_PIXEL),
2609					  RW_GPR(0),
2610					  RW_REL(ABSOLUTE),
2611					  INDEX_GPR(0),
2612					  ELEM_SIZE(1));
2613    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2614					       SRC_SEL_Y(SQ_SEL_Y),
2615					       SRC_SEL_Z(SQ_SEL_Z),
2616					       SRC_SEL_W(SQ_SEL_W),
2617					       BURST_COUNT(1),
2618					       VALID_PIXEL_MODE(0),
2619					       END_OF_PROGRAM(0),
2620					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2621					       MARK(0),
2622					       BARRIER(1));
2623
2624    /* 11 */
2625    shader[i++] = CF_DWORD0(ADDR(0),
2626			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2627    shader[i++] = CF_DWORD1(POP_COUNT(0),
2628			    CF_CONST(0),
2629			    COND(SQ_CF_COND_ACTIVE),
2630			    I_COUNT(0),
2631			    VALID_PIXEL_MODE(0),
2632			    END_OF_PROGRAM(0),
2633			    CF_INST(SQ_CF_INST_RETURN),
2634			    WHOLE_QUAD_MODE(0),
2635			    BARRIER(1));
2636
2637    /* 12 interpolate src tex coords - mask */
2638    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2639			     SRC0_REL(ABSOLUTE),
2640			     SRC0_ELEM(ELEM_Y),
2641			     SRC0_NEG(0),
2642			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2643			     SRC1_REL(ABSOLUTE),
2644			     SRC1_ELEM(ELEM_X),
2645			     SRC1_NEG(0),
2646			     INDEX_MODE(SQ_INDEX_AR_X),
2647			     PRED_SEL(SQ_PRED_SEL_OFF),
2648			     LAST(0));
2649    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2650				 SRC1_ABS(0),
2651				 UPDATE_EXECUTE_MASK(0),
2652				 UPDATE_PRED(0),
2653				 WRITE_MASK(1),
2654				 OMOD(SQ_ALU_OMOD_OFF),
2655				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2656				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2657				 DST_GPR(1),
2658				 DST_REL(ABSOLUTE),
2659				 DST_ELEM(ELEM_X),
2660				 CLAMP(0));
2661    /* 13 */
2662    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2663			     SRC0_REL(ABSOLUTE),
2664			     SRC0_ELEM(ELEM_X),
2665			     SRC0_NEG(0),
2666			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2667			     SRC1_REL(ABSOLUTE),
2668			     SRC1_ELEM(ELEM_X),
2669			     SRC1_NEG(0),
2670			     INDEX_MODE(SQ_INDEX_AR_X),
2671			     PRED_SEL(SQ_PRED_SEL_OFF),
2672			     LAST(0));
2673    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2674				 SRC1_ABS(0),
2675				 UPDATE_EXECUTE_MASK(0),
2676				 UPDATE_PRED(0),
2677				 WRITE_MASK(1),
2678				 OMOD(SQ_ALU_OMOD_OFF),
2679				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2680				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2681				 DST_GPR(1),
2682				 DST_REL(ABSOLUTE),
2683				 DST_ELEM(ELEM_Y),
2684				 CLAMP(0));
2685    /* 14 */
2686    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2687			     SRC0_REL(ABSOLUTE),
2688			     SRC0_ELEM(ELEM_Y),
2689			     SRC0_NEG(0),
2690			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2691			     SRC1_REL(ABSOLUTE),
2692			     SRC1_ELEM(ELEM_X),
2693			     SRC1_NEG(0),
2694			     INDEX_MODE(SQ_INDEX_AR_X),
2695			     PRED_SEL(SQ_PRED_SEL_OFF),
2696			     LAST(0));
2697    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2698				 SRC1_ABS(0),
2699				 UPDATE_EXECUTE_MASK(0),
2700				 UPDATE_PRED(0),
2701				 WRITE_MASK(0),
2702				 OMOD(SQ_ALU_OMOD_OFF),
2703				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2704				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2705				 DST_GPR(1),
2706				 DST_REL(ABSOLUTE),
2707				 DST_ELEM(ELEM_Z),
2708				 CLAMP(0));
2709    /* 15 */
2710    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2711			     SRC0_REL(ABSOLUTE),
2712			     SRC0_ELEM(ELEM_X),
2713			     SRC0_NEG(0),
2714			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2715			     SRC1_REL(ABSOLUTE),
2716			     SRC1_ELEM(ELEM_X),
2717			     SRC1_NEG(0),
2718			     INDEX_MODE(SQ_INDEX_AR_X),
2719			     PRED_SEL(SQ_PRED_SEL_OFF),
2720			     LAST(1));
2721    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2722				 SRC1_ABS(0),
2723				 UPDATE_EXECUTE_MASK(0),
2724				 UPDATE_PRED(0),
2725				 WRITE_MASK(0),
2726				 OMOD(SQ_ALU_OMOD_OFF),
2727				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2728				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2729				 DST_GPR(1),
2730				 DST_REL(ABSOLUTE),
2731				 DST_ELEM(ELEM_W),
2732				 CLAMP(0));
2733
2734    /* 16 interpolate mask tex coords */
2735    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2736			     SRC0_REL(ABSOLUTE),
2737			     SRC0_ELEM(ELEM_Y),
2738			     SRC0_NEG(0),
2739			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2740			     SRC1_REL(ABSOLUTE),
2741			     SRC1_ELEM(ELEM_X),
2742			     SRC1_NEG(0),
2743			     INDEX_MODE(SQ_INDEX_AR_X),
2744			     PRED_SEL(SQ_PRED_SEL_OFF),
2745			     LAST(0));
2746    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2747				 SRC1_ABS(0),
2748				 UPDATE_EXECUTE_MASK(0),
2749				 UPDATE_PRED(0),
2750				 WRITE_MASK(1),
2751				 OMOD(SQ_ALU_OMOD_OFF),
2752				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2753				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2754				 DST_GPR(0),
2755				 DST_REL(ABSOLUTE),
2756				 DST_ELEM(ELEM_X),
2757				 CLAMP(0));
2758    /* 17 */
2759    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2760			     SRC0_REL(ABSOLUTE),
2761			     SRC0_ELEM(ELEM_X),
2762			     SRC0_NEG(0),
2763			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2764			     SRC1_REL(ABSOLUTE),
2765			     SRC1_ELEM(ELEM_X),
2766			     SRC1_NEG(0),
2767			     INDEX_MODE(SQ_INDEX_AR_X),
2768			     PRED_SEL(SQ_PRED_SEL_OFF),
2769			     LAST(0));
2770    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2771				 SRC1_ABS(0),
2772				 UPDATE_EXECUTE_MASK(0),
2773				 UPDATE_PRED(0),
2774				 WRITE_MASK(1),
2775				 OMOD(SQ_ALU_OMOD_OFF),
2776				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2777				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2778				 DST_GPR(0),
2779				 DST_REL(ABSOLUTE),
2780				 DST_ELEM(ELEM_Y),
2781				 CLAMP(0));
2782    /* 18 */
2783    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2784			     SRC0_REL(ABSOLUTE),
2785			     SRC0_ELEM(ELEM_Y),
2786			     SRC0_NEG(0),
2787			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2788			     SRC1_REL(ABSOLUTE),
2789			     SRC1_ELEM(ELEM_X),
2790			     SRC1_NEG(0),
2791			     INDEX_MODE(SQ_INDEX_AR_X),
2792			     PRED_SEL(SQ_PRED_SEL_OFF),
2793			     LAST(0));
2794    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2795				 SRC1_ABS(0),
2796				 UPDATE_EXECUTE_MASK(0),
2797				 UPDATE_PRED(0),
2798				 WRITE_MASK(0),
2799				 OMOD(SQ_ALU_OMOD_OFF),
2800				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2801				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2802				 DST_GPR(0),
2803				 DST_REL(ABSOLUTE),
2804				 DST_ELEM(ELEM_Z),
2805				 CLAMP(0));
2806    /* 19 */
2807    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2808			     SRC0_REL(ABSOLUTE),
2809			     SRC0_ELEM(ELEM_X),
2810			     SRC0_NEG(0),
2811			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2812			     SRC1_REL(ABSOLUTE),
2813			     SRC1_ELEM(ELEM_X),
2814			     SRC1_NEG(0),
2815			     INDEX_MODE(SQ_INDEX_AR_X),
2816			     PRED_SEL(SQ_PRED_SEL_OFF),
2817			     LAST(1));
2818    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2819				 SRC1_ABS(0),
2820				 UPDATE_EXECUTE_MASK(0),
2821				 UPDATE_PRED(0),
2822				 WRITE_MASK(0),
2823				 OMOD(SQ_ALU_OMOD_OFF),
2824				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2825				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2826				 DST_GPR(0),
2827				 DST_REL(ABSOLUTE),
2828				 DST_ELEM(ELEM_W),
2829				 CLAMP(0));
2830
2831    /* 20 - alu 0 */
2832    /* MUL gpr[2].x gpr[0].x gpr[1].x */
2833    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2834			     SRC0_REL(ABSOLUTE),
2835			     SRC0_ELEM(ELEM_X),
2836			     SRC0_NEG(0),
2837			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2838			     SRC1_REL(ABSOLUTE),
2839			     SRC1_ELEM(ELEM_X),
2840			     SRC1_NEG(0),
2841			     INDEX_MODE(SQ_INDEX_LOOP),
2842			     PRED_SEL(SQ_PRED_SEL_OFF),
2843			     LAST(0));
2844    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2845				 SRC1_ABS(0),
2846				 UPDATE_EXECUTE_MASK(0),
2847				 UPDATE_PRED(0),
2848				 WRITE_MASK(1),
2849				 OMOD(SQ_ALU_OMOD_OFF),
2850				 ALU_INST(SQ_OP2_INST_MUL),
2851				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2852				 DST_GPR(2),
2853				 DST_REL(ABSOLUTE),
2854				 DST_ELEM(ELEM_X),
2855				 CLAMP(1));
2856    /* 21 - alu 1 */
2857    /* MUL gpr[2].y gpr[0].y gpr[1].y */
2858    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2859			     SRC0_REL(ABSOLUTE),
2860			     SRC0_ELEM(ELEM_Y),
2861			     SRC0_NEG(0),
2862			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2863			     SRC1_REL(ABSOLUTE),
2864			     SRC1_ELEM(ELEM_Y),
2865			     SRC1_NEG(0),
2866			     INDEX_MODE(SQ_INDEX_LOOP),
2867			     PRED_SEL(SQ_PRED_SEL_OFF),
2868			     LAST(0));
2869    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2870				 SRC1_ABS(0),
2871				 UPDATE_EXECUTE_MASK(0),
2872				 UPDATE_PRED(0),
2873				 WRITE_MASK(1),
2874				 OMOD(SQ_ALU_OMOD_OFF),
2875				 ALU_INST(SQ_OP2_INST_MUL),
2876				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2877				 DST_GPR(2),
2878				 DST_REL(ABSOLUTE),
2879				 DST_ELEM(ELEM_Y),
2880				 CLAMP(1));
2881    /* 22 - alu 2 */
2882    /* MUL gpr[2].z gpr[0].z gpr[1].z */
2883    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2884			     SRC0_REL(ABSOLUTE),
2885			     SRC0_ELEM(ELEM_Z),
2886			     SRC0_NEG(0),
2887			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2888			     SRC1_REL(ABSOLUTE),
2889			     SRC1_ELEM(ELEM_Z),
2890			     SRC1_NEG(0),
2891			     INDEX_MODE(SQ_INDEX_LOOP),
2892			     PRED_SEL(SQ_PRED_SEL_OFF),
2893			     LAST(0));
2894    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2895				 SRC1_ABS(0),
2896				 UPDATE_EXECUTE_MASK(0),
2897				 UPDATE_PRED(0),
2898				 WRITE_MASK(1),
2899				 OMOD(SQ_ALU_OMOD_OFF),
2900				 ALU_INST(SQ_OP2_INST_MUL),
2901				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2902				 DST_GPR(2),
2903				 DST_REL(ABSOLUTE),
2904				 DST_ELEM(ELEM_Z),
2905				 CLAMP(1));
2906    /* 23 - alu 3 */
2907    /* MUL gpr[2].w gpr[0].w gpr[1].w */
2908    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2909			     SRC0_REL(ABSOLUTE),
2910			     SRC0_ELEM(ELEM_W),
2911			     SRC0_NEG(0),
2912			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2913			     SRC1_REL(ABSOLUTE),
2914			     SRC1_ELEM(ELEM_W),
2915			     SRC1_NEG(0),
2916			     INDEX_MODE(SQ_INDEX_LOOP),
2917			     PRED_SEL(SQ_PRED_SEL_OFF),
2918			     LAST(1));
2919    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2920				 SRC1_ABS(0),
2921				 UPDATE_EXECUTE_MASK(0),
2922				 UPDATE_PRED(0),
2923				 WRITE_MASK(1),
2924				 OMOD(SQ_ALU_OMOD_OFF),
2925				 ALU_INST(SQ_OP2_INST_MUL),
2926				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2927				 DST_GPR(2),
2928				 DST_REL(ABSOLUTE),
2929				 DST_ELEM(ELEM_W),
2930				 CLAMP(1));
2931
2932    /* 24 - interpolate tex coords - non-mask */
2933    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2934			     SRC0_REL(ABSOLUTE),
2935			     SRC0_ELEM(ELEM_Y),
2936			     SRC0_NEG(0),
2937			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2938			     SRC1_REL(ABSOLUTE),
2939			     SRC1_ELEM(ELEM_X),
2940			     SRC1_NEG(0),
2941			     INDEX_MODE(SQ_INDEX_AR_X),
2942			     PRED_SEL(SQ_PRED_SEL_OFF),
2943			     LAST(0));
2944    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2945				 SRC1_ABS(0),
2946				 UPDATE_EXECUTE_MASK(0),
2947				 UPDATE_PRED(0),
2948				 WRITE_MASK(1),
2949				 OMOD(SQ_ALU_OMOD_OFF),
2950				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2951				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2952				 DST_GPR(0),
2953				 DST_REL(ABSOLUTE),
2954				 DST_ELEM(ELEM_X),
2955				 CLAMP(0));
2956    /* 25 */
2957    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2958			     SRC0_REL(ABSOLUTE),
2959			     SRC0_ELEM(ELEM_X),
2960			     SRC0_NEG(0),
2961			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2962			     SRC1_REL(ABSOLUTE),
2963			     SRC1_ELEM(ELEM_X),
2964			     SRC1_NEG(0),
2965			     INDEX_MODE(SQ_INDEX_AR_X),
2966			     PRED_SEL(SQ_PRED_SEL_OFF),
2967			     LAST(0));
2968    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2969				 SRC1_ABS(0),
2970				 UPDATE_EXECUTE_MASK(0),
2971				 UPDATE_PRED(0),
2972				 WRITE_MASK(1),
2973				 OMOD(SQ_ALU_OMOD_OFF),
2974				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2975				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2976				 DST_GPR(0),
2977				 DST_REL(ABSOLUTE),
2978				 DST_ELEM(ELEM_Y),
2979				 CLAMP(0));
2980    /* 26 */
2981    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2982			     SRC0_REL(ABSOLUTE),
2983			     SRC0_ELEM(ELEM_Y),
2984			     SRC0_NEG(0),
2985			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2986			     SRC1_REL(ABSOLUTE),
2987			     SRC1_ELEM(ELEM_X),
2988			     SRC1_NEG(0),
2989			     INDEX_MODE(SQ_INDEX_AR_X),
2990			     PRED_SEL(SQ_PRED_SEL_OFF),
2991			     LAST(0));
2992    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2993				 SRC1_ABS(0),
2994				 UPDATE_EXECUTE_MASK(0),
2995				 UPDATE_PRED(0),
2996				 WRITE_MASK(0),
2997				 OMOD(SQ_ALU_OMOD_OFF),
2998				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2999				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3000				 DST_GPR(0),
3001				 DST_REL(ABSOLUTE),
3002				 DST_ELEM(ELEM_Z),
3003				 CLAMP(0));
3004    /* 27 */
3005    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3006			     SRC0_REL(ABSOLUTE),
3007			     SRC0_ELEM(ELEM_X),
3008			     SRC0_NEG(0),
3009			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
3010			     SRC1_REL(ABSOLUTE),
3011			     SRC1_ELEM(ELEM_X),
3012			     SRC1_NEG(0),
3013			     INDEX_MODE(SQ_INDEX_AR_X),
3014			     PRED_SEL(SQ_PRED_SEL_OFF),
3015			     LAST(1));
3016    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3017				 SRC1_ABS(0),
3018				 UPDATE_EXECUTE_MASK(0),
3019				 UPDATE_PRED(0),
3020				 WRITE_MASK(0),
3021				 OMOD(SQ_ALU_OMOD_OFF),
3022				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3023				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3024				 DST_GPR(0),
3025				 DST_REL(ABSOLUTE),
3026				 DST_ELEM(ELEM_W),
3027				 CLAMP(0));
3028
3029    /* 28/29 - src - mask */
3030    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3031			     INST_MOD(0),
3032			     FETCH_WHOLE_QUAD(0),
3033			     RESOURCE_ID(0),
3034			     SRC_GPR(1),
3035			     SRC_REL(ABSOLUTE),
3036			     ALT_CONST(0),
3037			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3038			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3039    shader[i++] = TEX_DWORD1(DST_GPR(1),
3040			     DST_REL(ABSOLUTE),
3041			     DST_SEL_X(SQ_SEL_X),
3042			     DST_SEL_Y(SQ_SEL_Y),
3043			     DST_SEL_Z(SQ_SEL_Z),
3044			     DST_SEL_W(SQ_SEL_W),
3045			     LOD_BIAS(0),
3046			     COORD_TYPE_X(TEX_NORMALIZED),
3047			     COORD_TYPE_Y(TEX_NORMALIZED),
3048			     COORD_TYPE_Z(TEX_NORMALIZED),
3049			     COORD_TYPE_W(TEX_NORMALIZED));
3050    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3051			     OFFSET_Y(0),
3052			     OFFSET_Z(0),
3053			     SAMPLER_ID(0),
3054			     SRC_SEL_X(SQ_SEL_X),
3055			     SRC_SEL_Y(SQ_SEL_Y),
3056			     SRC_SEL_Z(SQ_SEL_0),
3057			     SRC_SEL_W(SQ_SEL_1));
3058    shader[i++] = TEX_DWORD_PAD;
3059    /* 30/31 - mask */
3060    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3061			     INST_MOD(0),
3062			     FETCH_WHOLE_QUAD(0),
3063			     RESOURCE_ID(1),
3064			     SRC_GPR(0),
3065			     SRC_REL(ABSOLUTE),
3066                             ALT_CONST(0),
3067                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3068                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3069    shader[i++] = TEX_DWORD1(DST_GPR(0),
3070			     DST_REL(ABSOLUTE),
3071			     DST_SEL_X(SQ_SEL_X),
3072			     DST_SEL_Y(SQ_SEL_Y),
3073			     DST_SEL_Z(SQ_SEL_Z),
3074			     DST_SEL_W(SQ_SEL_W),
3075			     LOD_BIAS(0),
3076			     COORD_TYPE_X(TEX_NORMALIZED),
3077			     COORD_TYPE_Y(TEX_NORMALIZED),
3078			     COORD_TYPE_Z(TEX_NORMALIZED),
3079			     COORD_TYPE_W(TEX_NORMALIZED));
3080    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3081			     OFFSET_Y(0),
3082			     OFFSET_Z(0),
3083			     SAMPLER_ID(1),
3084			     SRC_SEL_X(SQ_SEL_X),
3085			     SRC_SEL_Y(SQ_SEL_Y),
3086			     SRC_SEL_Z(SQ_SEL_0),
3087			     SRC_SEL_W(SQ_SEL_1));
3088    shader[i++] = TEX_DWORD_PAD;
3089
3090    /* 32/33 - src - non-mask */
3091    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3092			     INST_MOD(0),
3093			     FETCH_WHOLE_QUAD(0),
3094			     RESOURCE_ID(0),
3095			     SRC_GPR(0),
3096			     SRC_REL(ABSOLUTE),
3097			     ALT_CONST(0),
3098			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3099			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3100    shader[i++] = TEX_DWORD1(DST_GPR(0),
3101			     DST_REL(ABSOLUTE),
3102			     DST_SEL_X(SQ_SEL_X),
3103			     DST_SEL_Y(SQ_SEL_Y),
3104			     DST_SEL_Z(SQ_SEL_Z),
3105			     DST_SEL_W(SQ_SEL_W),
3106			     LOD_BIAS(0),
3107			     COORD_TYPE_X(TEX_NORMALIZED),
3108			     COORD_TYPE_Y(TEX_NORMALIZED),
3109			     COORD_TYPE_Z(TEX_NORMALIZED),
3110			     COORD_TYPE_W(TEX_NORMALIZED));
3111    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3112			     OFFSET_Y(0),
3113			     OFFSET_Z(0),
3114			     SAMPLER_ID(0),
3115			     SRC_SEL_X(SQ_SEL_X),
3116			     SRC_SEL_Y(SQ_SEL_Y),
3117			     SRC_SEL_Z(SQ_SEL_0),
3118			     SRC_SEL_W(SQ_SEL_1));
3119    shader[i++] = TEX_DWORD_PAD;
3120
3121    return i;
3122}
3123
3124#endif
3125