r600_shader.c revision 921a55d8
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "radeon.h"
34#include "r600_shader.h"
35#include "r600_reg.h"
36
37/* solid vs --------------------------------------- */
38int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39{
40    int i = 0;
41
42    /* 0 */
43    shader[i++] = CF_DWORD0(ADDR(4));
44    shader[i++] = CF_DWORD1(POP_COUNT(0),
45			    CF_CONST(0),
46			    COND(SQ_CF_COND_ACTIVE),
47			    I_COUNT(1),
48			    CALL_COUNT(0),
49			    END_OF_PROGRAM(0),
50			    VALID_PIXEL_MODE(0),
51			    CF_INST(SQ_CF_INST_VTX),
52			    WHOLE_QUAD_MODE(0),
53			    BARRIER(1));
54    /* 1 */
55    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56					  TYPE(SQ_EXPORT_POS),
57					  RW_GPR(1),
58					  RW_REL(ABSOLUTE),
59					  INDEX_GPR(0),
60					  ELEM_SIZE(0));
61    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62					       SRC_SEL_Y(SQ_SEL_Y),
63					       SRC_SEL_Z(SQ_SEL_Z),
64					       SRC_SEL_W(SQ_SEL_W),
65					       R6xx_ELEM_LOOP(0),
66					       BURST_COUNT(1),
67					       END_OF_PROGRAM(0),
68					       VALID_PIXEL_MODE(0),
69					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70					       WHOLE_QUAD_MODE(0),
71					       BARRIER(1));
72    /* 2 - always export a param whether it's used or not */
73    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74					  TYPE(SQ_EXPORT_PARAM),
75					  RW_GPR(0),
76					  RW_REL(ABSOLUTE),
77					  INDEX_GPR(0),
78					  ELEM_SIZE(0));
79    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80					       SRC_SEL_Y(SQ_SEL_Y),
81					       SRC_SEL_Z(SQ_SEL_Z),
82					       SRC_SEL_W(SQ_SEL_W),
83					       R6xx_ELEM_LOOP(0),
84					       BURST_COUNT(0),
85					       END_OF_PROGRAM(1),
86					       VALID_PIXEL_MODE(0),
87					       CF_INST(SQ_CF_INST_EXPORT_DONE),
88					       WHOLE_QUAD_MODE(0),
89					       BARRIER(0));
90    /* 3 - padding */
91    shader[i++] = 0x00000000;
92    shader[i++] = 0x00000000;
93    /* 4/5 */
94    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96			     FETCH_WHOLE_QUAD(0),
97			     BUFFER_ID(0),
98			     SRC_GPR(0),
99			     SRC_REL(ABSOLUTE),
100			     SRC_SEL_X(SQ_SEL_X),
101			     MEGA_FETCH_COUNT(8));
102    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103				 DST_REL(0),
104				 DST_SEL_X(SQ_SEL_X),
105				 DST_SEL_Y(SQ_SEL_Y),
106				 DST_SEL_Z(SQ_SEL_0),
107				 DST_SEL_W(SQ_SEL_1),
108				 USE_CONST_FIELDS(0),
109				 DATA_FORMAT(FMT_32_32_FLOAT),
110				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
111				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
112				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113    shader[i++] = VTX_DWORD2(OFFSET(0),
114			     ENDIAN_SWAP(ENDIAN_NONE),
115			     CONST_BUF_NO_STRIDE(0),
116			     MEGA_FETCH(1));
117    shader[i++] = VTX_DWORD_PAD;
118
119    return i;
120}
121
122/* solid ps --------------------------------------- */
123int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
124{
125    int i = 0;
126
127    /* 0 */
128    shader[i++] = CF_ALU_DWORD0(ADDR(2),
129				KCACHE_BANK0(0),
130				KCACHE_BANK1(0),
131				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
132    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
133				KCACHE_ADDR0(0),
134				KCACHE_ADDR1(0),
135				I_COUNT(4),
136				USES_WATERFALL(0),
137				CF_INST(SQ_CF_INST_ALU),
138				WHOLE_QUAD_MODE(0),
139				BARRIER(1));
140    /* 1 */
141    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
142					  TYPE(SQ_EXPORT_PIXEL),
143					  RW_GPR(0),
144					  RW_REL(ABSOLUTE),
145					  INDEX_GPR(0),
146					  ELEM_SIZE(1));
147    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
148					       SRC_SEL_Y(SQ_SEL_Y),
149					       SRC_SEL_Z(SQ_SEL_Z),
150					       SRC_SEL_W(SQ_SEL_W),
151					       R6xx_ELEM_LOOP(0),
152					       BURST_COUNT(1),
153					       END_OF_PROGRAM(1),
154					       VALID_PIXEL_MODE(0),
155					       CF_INST(SQ_CF_INST_EXPORT_DONE),
156					       WHOLE_QUAD_MODE(0),
157					       BARRIER(1));
158
159    /* 2 */
160    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
161			     SRC0_REL(ABSOLUTE),
162			     SRC0_ELEM(ELEM_X),
163			     SRC0_NEG(0),
164			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
165			     SRC1_REL(ABSOLUTE),
166			     SRC1_ELEM(ELEM_X),
167			     SRC1_NEG(0),
168			     INDEX_MODE(SQ_INDEX_AR_X),
169			     PRED_SEL(SQ_PRED_SEL_OFF),
170			     LAST(0));
171    shader[i++] = ALU_DWORD1_OP2(ChipSet,
172				 SRC0_ABS(0),
173				 SRC1_ABS(0),
174				 UPDATE_EXECUTE_MASK(0),
175				 UPDATE_PRED(0),
176				 WRITE_MASK(1),
177				 FOG_MERGE(0),
178				 OMOD(SQ_ALU_OMOD_OFF),
179				 ALU_INST(SQ_OP2_INST_MOV),
180				 BANK_SWIZZLE(SQ_ALU_VEC_012),
181				 DST_GPR(0),
182				 DST_REL(ABSOLUTE),
183				 DST_ELEM(ELEM_X),
184				 CLAMP(1));
185    /* 3 */
186    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
187			     SRC0_REL(ABSOLUTE),
188			     SRC0_ELEM(ELEM_Y),
189			     SRC0_NEG(0),
190			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
191			     SRC1_REL(ABSOLUTE),
192			     SRC1_ELEM(ELEM_Y),
193			     SRC1_NEG(0),
194			     INDEX_MODE(SQ_INDEX_AR_X),
195			     PRED_SEL(SQ_PRED_SEL_OFF),
196			     LAST(0));
197    shader[i++] = ALU_DWORD1_OP2(ChipSet,
198				 SRC0_ABS(0),
199				 SRC1_ABS(0),
200				 UPDATE_EXECUTE_MASK(0),
201				 UPDATE_PRED(0),
202				 WRITE_MASK(1),
203				 FOG_MERGE(0),
204				 OMOD(SQ_ALU_OMOD_OFF),
205				 ALU_INST(SQ_OP2_INST_MOV),
206				 BANK_SWIZZLE(SQ_ALU_VEC_012),
207				 DST_GPR(0),
208				 DST_REL(ABSOLUTE),
209				 DST_ELEM(ELEM_Y),
210				 CLAMP(1));
211    /* 4 */
212    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
213			     SRC0_REL(ABSOLUTE),
214			     SRC0_ELEM(ELEM_Z),
215			     SRC0_NEG(0),
216			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
217			     SRC1_REL(ABSOLUTE),
218			     SRC1_ELEM(ELEM_Z),
219			     SRC1_NEG(0),
220			     INDEX_MODE(SQ_INDEX_AR_X),
221			     PRED_SEL(SQ_PRED_SEL_OFF),
222			     LAST(0));
223    shader[i++] = ALU_DWORD1_OP2(ChipSet,
224				 SRC0_ABS(0),
225				 SRC1_ABS(0),
226				 UPDATE_EXECUTE_MASK(0),
227				 UPDATE_PRED(0),
228				 WRITE_MASK(1),
229				 FOG_MERGE(0),
230				 OMOD(SQ_ALU_OMOD_OFF),
231				 ALU_INST(SQ_OP2_INST_MOV),
232				 BANK_SWIZZLE(SQ_ALU_VEC_012),
233				 DST_GPR(0),
234				 DST_REL(ABSOLUTE),
235				 DST_ELEM(ELEM_Z),
236				 CLAMP(1));
237    /* 5 */
238    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
239			     SRC0_REL(ABSOLUTE),
240			     SRC0_ELEM(ELEM_W),
241			     SRC0_NEG(0),
242			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
243			     SRC1_REL(ABSOLUTE),
244			     SRC1_ELEM(ELEM_W),
245			     SRC1_NEG(0),
246			     INDEX_MODE(SQ_INDEX_AR_X),
247			     PRED_SEL(SQ_PRED_SEL_OFF),
248			     LAST(1));
249    shader[i++] = ALU_DWORD1_OP2(ChipSet,
250				 SRC0_ABS(0),
251				 SRC1_ABS(0),
252				 UPDATE_EXECUTE_MASK(0),
253				 UPDATE_PRED(0),
254				 WRITE_MASK(1),
255				 FOG_MERGE(0),
256				 OMOD(SQ_ALU_OMOD_OFF),
257				 ALU_INST(SQ_OP2_INST_MOV),
258				 BANK_SWIZZLE(SQ_ALU_VEC_012),
259				 DST_GPR(0),
260				 DST_REL(ABSOLUTE),
261				 DST_ELEM(ELEM_W),
262				 CLAMP(1));
263
264    return i;
265}
266
267/* copy vs --------------------------------------- */
268int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
269{
270    int i = 0;
271
272    /* 0 */
273    shader[i++] = CF_DWORD0(ADDR(4));
274    shader[i++] = CF_DWORD1(POP_COUNT(0),
275			    CF_CONST(0),
276			    COND(SQ_CF_COND_ACTIVE),
277			    I_COUNT(2),
278			    CALL_COUNT(0),
279			    END_OF_PROGRAM(0),
280			    VALID_PIXEL_MODE(0),
281			    CF_INST(SQ_CF_INST_VTX),
282			    WHOLE_QUAD_MODE(0),
283			    BARRIER(1));
284    /* 1 */
285    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
286					  TYPE(SQ_EXPORT_POS),
287					  RW_GPR(1),
288					  RW_REL(ABSOLUTE),
289					  INDEX_GPR(0),
290					  ELEM_SIZE(0));
291    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
292					       SRC_SEL_Y(SQ_SEL_Y),
293					       SRC_SEL_Z(SQ_SEL_Z),
294					       SRC_SEL_W(SQ_SEL_W),
295					       R6xx_ELEM_LOOP(0),
296					       BURST_COUNT(0),
297					       END_OF_PROGRAM(0),
298					       VALID_PIXEL_MODE(0),
299					       CF_INST(SQ_CF_INST_EXPORT_DONE),
300					       WHOLE_QUAD_MODE(0),
301					       BARRIER(1));
302    /* 2 */
303    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
304					  TYPE(SQ_EXPORT_PARAM),
305					  RW_GPR(0),
306					  RW_REL(ABSOLUTE),
307					  INDEX_GPR(0),
308					  ELEM_SIZE(0));
309    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
310					       SRC_SEL_Y(SQ_SEL_Y),
311					       SRC_SEL_Z(SQ_SEL_Z),
312					       SRC_SEL_W(SQ_SEL_W),
313					       R6xx_ELEM_LOOP(0),
314					       BURST_COUNT(0),
315					       END_OF_PROGRAM(1),
316					       VALID_PIXEL_MODE(0),
317					       CF_INST(SQ_CF_INST_EXPORT_DONE),
318					       WHOLE_QUAD_MODE(0),
319					       BARRIER(0));
320    /* 3 */
321    shader[i++] = 0x00000000;
322    shader[i++] = 0x00000000;
323    /* 4/5 */
324    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
325			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
326			     FETCH_WHOLE_QUAD(0),
327			     BUFFER_ID(0),
328			     SRC_GPR(0),
329			     SRC_REL(ABSOLUTE),
330			     SRC_SEL_X(SQ_SEL_X),
331			     MEGA_FETCH_COUNT(16));
332    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
333				 DST_REL(0),
334				 DST_SEL_X(SQ_SEL_X),
335				 DST_SEL_Y(SQ_SEL_Y),
336				 DST_SEL_Z(SQ_SEL_0),
337				 DST_SEL_W(SQ_SEL_1),
338				 USE_CONST_FIELDS(0),
339				 DATA_FORMAT(FMT_32_32_FLOAT),
340				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
341				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
342				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
343    shader[i++] = VTX_DWORD2(OFFSET(0),
344			     ENDIAN_SWAP(ENDIAN_NONE),
345			     CONST_BUF_NO_STRIDE(0),
346			     MEGA_FETCH(1));
347    shader[i++] = VTX_DWORD_PAD;
348    /* 6/7 */
349    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
350			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
351			     FETCH_WHOLE_QUAD(0),
352			     BUFFER_ID(0),
353			     SRC_GPR(0),
354			     SRC_REL(ABSOLUTE),
355			     SRC_SEL_X(SQ_SEL_X),
356			     MEGA_FETCH_COUNT(8));
357    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
358				 DST_REL(0),
359				 DST_SEL_X(SQ_SEL_X),
360				 DST_SEL_Y(SQ_SEL_Y),
361				 DST_SEL_Z(SQ_SEL_0),
362				 DST_SEL_W(SQ_SEL_1),
363				 USE_CONST_FIELDS(0),
364				 DATA_FORMAT(FMT_32_32_FLOAT),
365				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
366				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
367				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
368    shader[i++] = VTX_DWORD2(OFFSET(8),
369			     ENDIAN_SWAP(ENDIAN_NONE),
370			     CONST_BUF_NO_STRIDE(0),
371			     MEGA_FETCH(0));
372    shader[i++] = VTX_DWORD_PAD;
373
374    return i;
375}
376
377/* copy ps --------------------------------------- */
378int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
379{
380    int i=0;
381
382    /* CF INST 0 */
383    shader[i++] = CF_DWORD0(ADDR(2));
384    shader[i++] = CF_DWORD1(POP_COUNT(0),
385			    CF_CONST(0),
386			    COND(SQ_CF_COND_ACTIVE),
387			    I_COUNT(1),
388			    CALL_COUNT(0),
389			    END_OF_PROGRAM(0),
390			    VALID_PIXEL_MODE(0),
391			    CF_INST(SQ_CF_INST_TEX),
392			    WHOLE_QUAD_MODE(0),
393			    BARRIER(1));
394    /* CF INST 1 */
395    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
396					  TYPE(SQ_EXPORT_PIXEL),
397					  RW_GPR(0),
398					  RW_REL(ABSOLUTE),
399					  INDEX_GPR(0),
400					  ELEM_SIZE(1));
401    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
402					       SRC_SEL_Y(SQ_SEL_Y),
403					       SRC_SEL_Z(SQ_SEL_Z),
404					       SRC_SEL_W(SQ_SEL_W),
405					       R6xx_ELEM_LOOP(0),
406					       BURST_COUNT(1),
407					       END_OF_PROGRAM(1),
408					       VALID_PIXEL_MODE(0),
409					       CF_INST(SQ_CF_INST_EXPORT_DONE),
410					       WHOLE_QUAD_MODE(0),
411					       BARRIER(1));
412    /* TEX INST 0 */
413    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
414			     BC_FRAC_MODE(0),
415			     FETCH_WHOLE_QUAD(0),
416			     RESOURCE_ID(0),
417			     SRC_GPR(0),
418			     SRC_REL(ABSOLUTE),
419			     R7xx_ALT_CONST(0));
420    shader[i++] = TEX_DWORD1(DST_GPR(0),
421			     DST_REL(ABSOLUTE),
422			     DST_SEL_X(SQ_SEL_X), /* R */
423			     DST_SEL_Y(SQ_SEL_Y), /* G */
424			     DST_SEL_Z(SQ_SEL_Z), /* B */
425			     DST_SEL_W(SQ_SEL_W), /* A */
426			     LOD_BIAS(0),
427			     COORD_TYPE_X(TEX_UNNORMALIZED),
428			     COORD_TYPE_Y(TEX_UNNORMALIZED),
429			     COORD_TYPE_Z(TEX_UNNORMALIZED),
430			     COORD_TYPE_W(TEX_UNNORMALIZED));
431    shader[i++] = TEX_DWORD2(OFFSET_X(0),
432			     OFFSET_Y(0),
433			     OFFSET_Z(0),
434			     SAMPLER_ID(0),
435			     SRC_SEL_X(SQ_SEL_X),
436			     SRC_SEL_Y(SQ_SEL_Y),
437			     SRC_SEL_Z(SQ_SEL_0),
438			     SRC_SEL_W(SQ_SEL_1));
439    shader[i++] = TEX_DWORD_PAD;
440
441    return i;
442}
443
444/*
445 * ; xv vertex shader
446 * 00 VTX: ADDR(4) CNT(2)
447 *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT)
448 *          FORMAT_COMP(SIGNED)
449 *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
450 *          FORMAT_COMP(SIGNED)
451 * 01 EXP_DONE: POS0, R1
452 * 02 EXP_DONE: PARAM0, R0  NO_BARRIER
453 * END_OF_PROGRAM
454 */
455int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
456{
457    int i = 0;
458
459    /* 0 */
460    shader[i++] = CF_DWORD0(ADDR(6));
461    shader[i++] = CF_DWORD1(POP_COUNT(0),
462                            CF_CONST(0),
463                            COND(SQ_CF_COND_ACTIVE),
464                            I_COUNT(2),
465                            CALL_COUNT(0),
466                            END_OF_PROGRAM(0),
467                            VALID_PIXEL_MODE(0),
468                            CF_INST(SQ_CF_INST_VTX),
469                            WHOLE_QUAD_MODE(0),
470                            BARRIER(1));
471
472    /* 1 - ALU */
473    shader[i++] = CF_ALU_DWORD0(ADDR(4),
474				KCACHE_BANK0(0),
475				KCACHE_BANK1(0),
476				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
477    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
478				KCACHE_ADDR0(0),
479				KCACHE_ADDR1(0),
480				I_COUNT(2),
481				USES_WATERFALL(0),
482				CF_INST(SQ_CF_INST_ALU),
483				WHOLE_QUAD_MODE(0),
484				BARRIER(1));
485
486    /* 2 */
487    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
488                                          TYPE(SQ_EXPORT_POS),
489                                          RW_GPR(1),
490                                          RW_REL(ABSOLUTE),
491                                          INDEX_GPR(0),
492                                          ELEM_SIZE(3));
493    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
494                                               SRC_SEL_Y(SQ_SEL_Y),
495                                               SRC_SEL_Z(SQ_SEL_Z),
496                                               SRC_SEL_W(SQ_SEL_W),
497                                               R6xx_ELEM_LOOP(0),
498                                               BURST_COUNT(1),
499                                               END_OF_PROGRAM(0),
500                                               VALID_PIXEL_MODE(0),
501                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
502                                               WHOLE_QUAD_MODE(0),
503                                               BARRIER(1));
504    /* 3 */
505    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
506                                          TYPE(SQ_EXPORT_PARAM),
507                                          RW_GPR(0),
508                                          RW_REL(ABSOLUTE),
509                                          INDEX_GPR(0),
510                                          ELEM_SIZE(3));
511    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
512                                               SRC_SEL_Y(SQ_SEL_Y),
513                                               SRC_SEL_Z(SQ_SEL_Z),
514                                               SRC_SEL_W(SQ_SEL_W),
515                                               R6xx_ELEM_LOOP(0),
516                                               BURST_COUNT(1),
517                                               END_OF_PROGRAM(1),
518                                               VALID_PIXEL_MODE(0),
519                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
520                                               WHOLE_QUAD_MODE(0),
521                                               BARRIER(0));
522
523
524    /* 4 texX / w */
525    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
526                             SRC0_REL(ABSOLUTE),
527                             SRC0_ELEM(ELEM_X),
528                             SRC0_NEG(0),
529                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
530                             SRC1_REL(ABSOLUTE),
531                             SRC1_ELEM(ELEM_X),
532                             SRC1_NEG(0),
533                             INDEX_MODE(SQ_INDEX_AR_X),
534                             PRED_SEL(SQ_PRED_SEL_OFF),
535                             LAST(0));
536    shader[i++] = ALU_DWORD1_OP2(ChipSet,
537                                 SRC0_ABS(0),
538                                 SRC1_ABS(0),
539                                 UPDATE_EXECUTE_MASK(0),
540                                 UPDATE_PRED(0),
541                                 WRITE_MASK(1),
542                                 FOG_MERGE(0),
543                                 OMOD(SQ_ALU_OMOD_OFF),
544                                 ALU_INST(SQ_OP2_INST_MUL),
545                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
546                                 DST_GPR(0),
547                                 DST_REL(ABSOLUTE),
548                                 DST_ELEM(ELEM_X),
549                                 CLAMP(0));
550
551    /* 5 texY / h */
552    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
553                             SRC0_REL(ABSOLUTE),
554                             SRC0_ELEM(ELEM_Y),
555                             SRC0_NEG(0),
556                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
557                             SRC1_REL(ABSOLUTE),
558                             SRC1_ELEM(ELEM_Y),
559                             SRC1_NEG(0),
560                             INDEX_MODE(SQ_INDEX_AR_X),
561                             PRED_SEL(SQ_PRED_SEL_OFF),
562                             LAST(1));
563    shader[i++] = ALU_DWORD1_OP2(ChipSet,
564                                 SRC0_ABS(0),
565                                 SRC1_ABS(0),
566                                 UPDATE_EXECUTE_MASK(0),
567                                 UPDATE_PRED(0),
568                                 WRITE_MASK(1),
569                                 FOG_MERGE(0),
570                                 OMOD(SQ_ALU_OMOD_OFF),
571                                 ALU_INST(SQ_OP2_INST_MUL),
572                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
573                                 DST_GPR(0),
574                                 DST_REL(ABSOLUTE),
575                                 DST_ELEM(ELEM_Y),
576                                 CLAMP(0));
577
578    /* 6/7 */
579    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
580                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
581                             FETCH_WHOLE_QUAD(0),
582                             BUFFER_ID(0),
583                             SRC_GPR(0),
584                             SRC_REL(ABSOLUTE),
585                             SRC_SEL_X(SQ_SEL_X),
586                             MEGA_FETCH_COUNT(16));
587    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
588                                 DST_REL(ABSOLUTE),
589                                 DST_SEL_X(SQ_SEL_X),
590                                 DST_SEL_Y(SQ_SEL_Y),
591                                 DST_SEL_Z(SQ_SEL_0),
592                                 DST_SEL_W(SQ_SEL_1),
593                                 USE_CONST_FIELDS(0),
594                                 DATA_FORMAT(FMT_32_32_FLOAT),
595                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
596                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
597                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
598    shader[i++] = VTX_DWORD2(OFFSET(0),
599                             ENDIAN_SWAP(ENDIAN_NONE),
600                             CONST_BUF_NO_STRIDE(0),
601                             MEGA_FETCH(1));
602    shader[i++] = VTX_DWORD_PAD;
603    /* 8/9 */
604    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
605                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
606                             FETCH_WHOLE_QUAD(0),
607                             BUFFER_ID(0),
608                             SRC_GPR(0),
609                             SRC_REL(ABSOLUTE),
610                             SRC_SEL_X(SQ_SEL_X),
611                             MEGA_FETCH_COUNT(8));
612    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
613                                 DST_REL(ABSOLUTE),
614                                 DST_SEL_X(SQ_SEL_X),
615                                 DST_SEL_Y(SQ_SEL_Y),
616                                 DST_SEL_Z(SQ_SEL_0),
617                                 DST_SEL_W(SQ_SEL_1),
618                                 USE_CONST_FIELDS(0),
619                                 DATA_FORMAT(FMT_32_32_FLOAT),
620                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
621                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
622                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
623    shader[i++] = VTX_DWORD2(OFFSET(8),
624                             ENDIAN_SWAP(ENDIAN_NONE),
625                             CONST_BUF_NO_STRIDE(0),
626                             MEGA_FETCH(0));
627    shader[i++] = VTX_DWORD_PAD;
628
629    return i;
630}
631
632int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
633{
634    int i = 0;
635
636    /* 0 */
637    shader[i++] = CF_DWORD0(ADDR(16));
638    shader[i++] = CF_DWORD1(POP_COUNT(0),
639                            CF_CONST(0),
640                            COND(SQ_CF_COND_BOOL),
641                            I_COUNT(0),
642                            CALL_COUNT(0),
643                            END_OF_PROGRAM(0),
644                            VALID_PIXEL_MODE(0),
645                            CF_INST(SQ_CF_INST_CALL),
646                            WHOLE_QUAD_MODE(0),
647                            BARRIER(0));
648    /* 1 */
649    shader[i++] = CF_DWORD0(ADDR(24));
650    shader[i++] = CF_DWORD1(POP_COUNT(0),
651                            CF_CONST(0),
652                            COND(SQ_CF_COND_NOT_BOOL),
653                            I_COUNT(0),
654                            CALL_COUNT(0),
655                            END_OF_PROGRAM(0),
656                            VALID_PIXEL_MODE(0),
657                            CF_INST(SQ_CF_INST_CALL),
658                            WHOLE_QUAD_MODE(0),
659                            BARRIER(0));
660    /* 2 */
661    shader[i++] = CF_ALU_DWORD0(ADDR(4),
662                                KCACHE_BANK0(0),
663                                KCACHE_BANK1(0),
664                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
665    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
666                                KCACHE_ADDR0(0),
667                                KCACHE_ADDR1(0),
668                                I_COUNT(12),
669                                USES_WATERFALL(0),
670                                CF_INST(SQ_CF_INST_ALU),
671                                WHOLE_QUAD_MODE(0),
672                                BARRIER(1));
673    /* 3 */
674    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
675                                          TYPE(SQ_EXPORT_PIXEL),
676                                          RW_GPR(2),
677                                          RW_REL(ABSOLUTE),
678                                          INDEX_GPR(0),
679                                          ELEM_SIZE(3));
680    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
681                                               SRC_SEL_Y(SQ_SEL_Y),
682                                               SRC_SEL_Z(SQ_SEL_Z),
683                                               SRC_SEL_W(SQ_SEL_W),
684                                               R6xx_ELEM_LOOP(0),
685                                               BURST_COUNT(1),
686                                               END_OF_PROGRAM(1),
687                                               VALID_PIXEL_MODE(0),
688                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
689                                               WHOLE_QUAD_MODE(0),
690                                               BARRIER(1));
691    /* 4,5,6,7 */
692    /* r2.x = MAD(c0.w, r1.x, c0.x) */
693    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
694                             SRC0_REL(ABSOLUTE),
695                             SRC0_ELEM(ELEM_W),
696                             SRC0_NEG(0),
697                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
698                             SRC1_REL(ABSOLUTE),
699                             SRC1_ELEM(ELEM_X),
700                             SRC1_NEG(0),
701                             INDEX_MODE(SQ_INDEX_LOOP),
702                             PRED_SEL(SQ_PRED_SEL_OFF),
703                             LAST(0));
704    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
705                                 SRC2_REL(ABSOLUTE),
706                                 SRC2_ELEM(ELEM_X),
707                                 SRC2_NEG(0),
708                                 ALU_INST(SQ_OP3_INST_MULADD),
709                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
710                                 DST_GPR(2),
711                                 DST_REL(ABSOLUTE),
712                                 DST_ELEM(ELEM_X),
713                                 CLAMP(0));
714    /* r2.y = MAD(c0.w, r1.x, c0.y) */
715    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
716                             SRC0_REL(ABSOLUTE),
717                             SRC0_ELEM(ELEM_W),
718                             SRC0_NEG(0),
719                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
720                             SRC1_REL(ABSOLUTE),
721                             SRC1_ELEM(ELEM_X),
722                             SRC1_NEG(0),
723                             INDEX_MODE(SQ_INDEX_LOOP),
724                             PRED_SEL(SQ_PRED_SEL_OFF),
725                             LAST(0));
726    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
727                                 SRC2_REL(ABSOLUTE),
728                                 SRC2_ELEM(ELEM_Y),
729                                 SRC2_NEG(0),
730                                 ALU_INST(SQ_OP3_INST_MULADD),
731                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
732                                 DST_GPR(2),
733                                 DST_REL(ABSOLUTE),
734                                 DST_ELEM(ELEM_Y),
735                                 CLAMP(0));
736    /* r2.z = MAD(c0.w, r1.x, c0.z) */
737    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
738                             SRC0_REL(ABSOLUTE),
739                             SRC0_ELEM(ELEM_W),
740                             SRC0_NEG(0),
741                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
742                             SRC1_REL(ABSOLUTE),
743                             SRC1_ELEM(ELEM_X),
744                             SRC1_NEG(0),
745                             INDEX_MODE(SQ_INDEX_LOOP),
746                             PRED_SEL(SQ_PRED_SEL_OFF),
747                             LAST(0));
748    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
749                                 SRC2_REL(ABSOLUTE),
750                                 SRC2_ELEM(ELEM_Z),
751                                 SRC2_NEG(0),
752                                 ALU_INST(SQ_OP3_INST_MULADD),
753                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
754                                 DST_GPR(2),
755                                 DST_REL(ABSOLUTE),
756                                 DST_ELEM(ELEM_Z),
757                                 CLAMP(0));
758    /* r2.w = MAD(0, 0, 1) */
759    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
760                             SRC0_REL(ABSOLUTE),
761                             SRC0_ELEM(ELEM_X),
762                             SRC0_NEG(0),
763                             SRC1_SEL(SQ_ALU_SRC_0),
764                             SRC1_REL(ABSOLUTE),
765                             SRC1_ELEM(ELEM_X),
766                             SRC1_NEG(0),
767                             INDEX_MODE(SQ_INDEX_LOOP),
768                             PRED_SEL(SQ_PRED_SEL_OFF),
769                             LAST(1));
770    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
771                                 SRC2_REL(ABSOLUTE),
772                                 SRC2_ELEM(ELEM_X),
773                                 SRC2_NEG(0),
774                                 ALU_INST(SQ_OP3_INST_MULADD),
775                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
776                                 DST_GPR(2),
777                                 DST_REL(ABSOLUTE),
778                                 DST_ELEM(ELEM_W),
779                                 CLAMP(0));
780
781    /* 8,9,10,11 */
782    /* r2.x = MAD(c1.x, r1.y, pv.x) */
783    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
784                             SRC0_REL(ABSOLUTE),
785                             SRC0_ELEM(ELEM_X),
786                             SRC0_NEG(0),
787                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
788                             SRC1_REL(ABSOLUTE),
789                             SRC1_ELEM(ELEM_Y),
790                             SRC1_NEG(0),
791                             INDEX_MODE(SQ_INDEX_LOOP),
792                             PRED_SEL(SQ_PRED_SEL_OFF),
793                             LAST(0));
794    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
795                                 SRC2_REL(ABSOLUTE),
796                                 SRC2_ELEM(ELEM_X),
797                                 SRC2_NEG(0),
798                                 ALU_INST(SQ_OP3_INST_MULADD),
799                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
800                                 DST_GPR(2),
801                                 DST_REL(ABSOLUTE),
802                                 DST_ELEM(ELEM_X),
803                                 CLAMP(0));
804    /* r2.y = MAD(c1.y, r1.y, pv.y) */
805    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
806                             SRC0_REL(ABSOLUTE),
807                             SRC0_ELEM(ELEM_Y),
808                             SRC0_NEG(0),
809                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
810                             SRC1_REL(ABSOLUTE),
811                             SRC1_ELEM(ELEM_Y),
812                             SRC1_NEG(0),
813                             INDEX_MODE(SQ_INDEX_LOOP),
814                             PRED_SEL(SQ_PRED_SEL_OFF),
815                             LAST(0));
816    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
817                                 SRC2_REL(ABSOLUTE),
818                                 SRC2_ELEM(ELEM_Y),
819                                 SRC2_NEG(0),
820                                 ALU_INST(SQ_OP3_INST_MULADD),
821                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
822                                 DST_GPR(2),
823                                 DST_REL(ABSOLUTE),
824                                 DST_ELEM(ELEM_Y),
825                                 CLAMP(0));
826    /* r2.z = MAD(c1.z, r1.y, pv.z) */
827    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
828                             SRC0_REL(ABSOLUTE),
829                             SRC0_ELEM(ELEM_Z),
830                             SRC0_NEG(0),
831                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
832                             SRC1_REL(ABSOLUTE),
833                             SRC1_ELEM(ELEM_Y),
834                             SRC1_NEG(0),
835                             INDEX_MODE(SQ_INDEX_LOOP),
836                             PRED_SEL(SQ_PRED_SEL_OFF),
837                             LAST(0));
838    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
839                                 SRC2_REL(ABSOLUTE),
840                                 SRC2_ELEM(ELEM_Z),
841                                 SRC2_NEG(0),
842                                 ALU_INST(SQ_OP3_INST_MULADD),
843                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
844                                 DST_GPR(2),
845                                 DST_REL(ABSOLUTE),
846                                 DST_ELEM(ELEM_Z),
847                                 CLAMP(0));
848    /* r2.w = MAD(0, 0, 1) */
849    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
850                             SRC0_REL(ABSOLUTE),
851                             SRC0_ELEM(ELEM_X),
852                             SRC0_NEG(0),
853                             SRC1_SEL(SQ_ALU_SRC_0),
854                             SRC1_REL(ABSOLUTE),
855                             SRC1_ELEM(ELEM_X),
856                             SRC1_NEG(0),
857                             INDEX_MODE(SQ_INDEX_LOOP),
858                             PRED_SEL(SQ_PRED_SEL_OFF),
859                             LAST(1));
860    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
861                                 SRC2_REL(ABSOLUTE),
862                                 SRC2_ELEM(ELEM_W),
863                                 SRC2_NEG(0),
864                                 ALU_INST(SQ_OP3_INST_MULADD),
865                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
866                                 DST_GPR(2),
867                                 DST_REL(ABSOLUTE),
868                                 DST_ELEM(ELEM_W),
869                                 CLAMP(0));
870    /* 12,13,14,15 */
871    /* r2.x = MAD(c2.x, r1.z, pv.x) */
872    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
873                             SRC0_REL(ABSOLUTE),
874                             SRC0_ELEM(ELEM_X),
875                             SRC0_NEG(0),
876                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
877                             SRC1_REL(ABSOLUTE),
878                             SRC1_ELEM(ELEM_Z),
879                             SRC1_NEG(0),
880                             INDEX_MODE(SQ_INDEX_LOOP),
881                             PRED_SEL(SQ_PRED_SEL_OFF),
882                             LAST(0));
883    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
884                                 SRC2_REL(ABSOLUTE),
885                                 SRC2_ELEM(ELEM_X),
886                                 SRC2_NEG(0),
887                                 ALU_INST(SQ_OP3_INST_MULADD),
888                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
889                                 DST_GPR(2),
890                                 DST_REL(ABSOLUTE),
891                                 DST_ELEM(ELEM_X),
892                                 CLAMP(1));
893    /* r2.y = MAD(c2.y, r1.z, pv.y) */
894    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
895                             SRC0_REL(ABSOLUTE),
896                             SRC0_ELEM(ELEM_Y),
897                             SRC0_NEG(0),
898                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
899                             SRC1_REL(ABSOLUTE),
900                             SRC1_ELEM(ELEM_Z),
901                             SRC1_NEG(0),
902                             INDEX_MODE(SQ_INDEX_LOOP),
903                             PRED_SEL(SQ_PRED_SEL_OFF),
904                             LAST(0));
905    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
906                                 SRC2_REL(ABSOLUTE),
907                                 SRC2_ELEM(ELEM_Y),
908                                 SRC2_NEG(0),
909                                 ALU_INST(SQ_OP3_INST_MULADD),
910                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
911                                 DST_GPR(2),
912                                 DST_REL(ABSOLUTE),
913                                 DST_ELEM(ELEM_Y),
914                                 CLAMP(1));
915    /* r2.z = MAD(c2.z, r1.z, pv.z) */
916    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
917                             SRC0_REL(ABSOLUTE),
918                             SRC0_ELEM(ELEM_Z),
919                             SRC0_NEG(0),
920                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
921                             SRC1_REL(ABSOLUTE),
922                             SRC1_ELEM(ELEM_Z),
923                             SRC1_NEG(0),
924                             INDEX_MODE(SQ_INDEX_LOOP),
925                             PRED_SEL(SQ_PRED_SEL_OFF),
926                             LAST(0));
927    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
928                                 SRC2_REL(ABSOLUTE),
929                                 SRC2_ELEM(ELEM_Z),
930                                 SRC2_NEG(0),
931                                 ALU_INST(SQ_OP3_INST_MULADD),
932                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
933                                 DST_GPR(2),
934                                 DST_REL(ABSOLUTE),
935                                 DST_ELEM(ELEM_Z),
936                                 CLAMP(1));
937    /* r2.w = MAD(0, 0, 1) */
938    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
939                             SRC0_REL(ABSOLUTE),
940                             SRC0_ELEM(ELEM_X),
941                             SRC0_NEG(0),
942                             SRC1_SEL(SQ_ALU_SRC_0),
943                             SRC1_REL(ABSOLUTE),
944                             SRC1_ELEM(ELEM_X),
945                             SRC1_NEG(0),
946                             INDEX_MODE(SQ_INDEX_LOOP),
947                             PRED_SEL(SQ_PRED_SEL_OFF),
948                             LAST(1));
949    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
950                                 SRC2_REL(ABSOLUTE),
951                                 SRC2_ELEM(ELEM_X),
952                                 SRC2_NEG(0),
953                                 ALU_INST(SQ_OP3_INST_MULADD),
954                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
955                                 DST_GPR(2),
956                                 DST_REL(ABSOLUTE),
957                                 DST_ELEM(ELEM_W),
958                                 CLAMP(1));
959
960    /* 16 */
961    shader[i++] = CF_DWORD0(ADDR(18));
962    shader[i++] = CF_DWORD1(POP_COUNT(0),
963                            CF_CONST(0),
964                            COND(SQ_CF_COND_ACTIVE),
965                            I_COUNT(3),
966                            CALL_COUNT(0),
967                            END_OF_PROGRAM(0),
968                            VALID_PIXEL_MODE(0),
969                            CF_INST(SQ_CF_INST_TEX),
970                            WHOLE_QUAD_MODE(0),
971                            BARRIER(1));
972    /* 17 */
973    shader[i++] = CF_DWORD0(ADDR(0));
974    shader[i++] = CF_DWORD1(POP_COUNT(0),
975			    CF_CONST(0),
976			    COND(SQ_CF_COND_ACTIVE),
977			    I_COUNT(0),
978			    CALL_COUNT(0),
979			    END_OF_PROGRAM(0),
980			    VALID_PIXEL_MODE(0),
981			    CF_INST(SQ_CF_INST_RETURN),
982			    WHOLE_QUAD_MODE(0),
983			    BARRIER(1));
984    /* 18/19 */
985    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
986                             BC_FRAC_MODE(0),
987                             FETCH_WHOLE_QUAD(0),
988                             RESOURCE_ID(0),
989                             SRC_GPR(0),
990                             SRC_REL(ABSOLUTE),
991                             R7xx_ALT_CONST(0));
992    shader[i++] = TEX_DWORD1(DST_GPR(1),
993                             DST_REL(ABSOLUTE),
994                             DST_SEL_X(SQ_SEL_X),
995                             DST_SEL_Y(SQ_SEL_MASK),
996                             DST_SEL_Z(SQ_SEL_MASK),
997                             DST_SEL_W(SQ_SEL_1),
998                             LOD_BIAS(0),
999                             COORD_TYPE_X(TEX_NORMALIZED),
1000                             COORD_TYPE_Y(TEX_NORMALIZED),
1001                             COORD_TYPE_Z(TEX_NORMALIZED),
1002                             COORD_TYPE_W(TEX_NORMALIZED));
1003    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1004                             OFFSET_Y(0),
1005                             OFFSET_Z(0),
1006                             SAMPLER_ID(0),
1007                             SRC_SEL_X(SQ_SEL_X),
1008                             SRC_SEL_Y(SQ_SEL_Y),
1009                             SRC_SEL_Z(SQ_SEL_0),
1010                             SRC_SEL_W(SQ_SEL_1));
1011    shader[i++] = TEX_DWORD_PAD;
1012    /* 20/21 */
1013    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1014                             BC_FRAC_MODE(0),
1015                             FETCH_WHOLE_QUAD(0),
1016                             RESOURCE_ID(1),
1017                             SRC_GPR(0),
1018                             SRC_REL(ABSOLUTE),
1019                             R7xx_ALT_CONST(0));
1020    shader[i++] = TEX_DWORD1(DST_GPR(1),
1021                             DST_REL(ABSOLUTE),
1022                             DST_SEL_X(SQ_SEL_MASK),
1023                             DST_SEL_Y(SQ_SEL_MASK),
1024                             DST_SEL_Z(SQ_SEL_X),
1025                             DST_SEL_W(SQ_SEL_MASK),
1026                             LOD_BIAS(0),
1027                             COORD_TYPE_X(TEX_NORMALIZED),
1028                             COORD_TYPE_Y(TEX_NORMALIZED),
1029                             COORD_TYPE_Z(TEX_NORMALIZED),
1030                             COORD_TYPE_W(TEX_NORMALIZED));
1031    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1032                             OFFSET_Y(0),
1033                             OFFSET_Z(0),
1034                             SAMPLER_ID(1),
1035                             SRC_SEL_X(SQ_SEL_X),
1036                             SRC_SEL_Y(SQ_SEL_Y),
1037                             SRC_SEL_Z(SQ_SEL_0),
1038                             SRC_SEL_W(SQ_SEL_1));
1039    shader[i++] = TEX_DWORD_PAD;
1040    /* 22/23 */
1041    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1042                             BC_FRAC_MODE(0),
1043                             FETCH_WHOLE_QUAD(0),
1044                             RESOURCE_ID(2),
1045                             SRC_GPR(0),
1046                             SRC_REL(ABSOLUTE),
1047                             R7xx_ALT_CONST(0));
1048    shader[i++] = TEX_DWORD1(DST_GPR(1),
1049                             DST_REL(ABSOLUTE),
1050                             DST_SEL_X(SQ_SEL_MASK),
1051                             DST_SEL_Y(SQ_SEL_X),
1052                             DST_SEL_Z(SQ_SEL_MASK),
1053                             DST_SEL_W(SQ_SEL_MASK),
1054                             LOD_BIAS(0),
1055                             COORD_TYPE_X(TEX_NORMALIZED),
1056                             COORD_TYPE_Y(TEX_NORMALIZED),
1057                             COORD_TYPE_Z(TEX_NORMALIZED),
1058                             COORD_TYPE_W(TEX_NORMALIZED));
1059    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1060                             OFFSET_Y(0),
1061                             OFFSET_Z(0),
1062                             SAMPLER_ID(2),
1063                             SRC_SEL_X(SQ_SEL_X),
1064                             SRC_SEL_Y(SQ_SEL_Y),
1065                             SRC_SEL_Z(SQ_SEL_0),
1066                             SRC_SEL_W(SQ_SEL_1));
1067    shader[i++] = TEX_DWORD_PAD;
1068    /* 24 */
1069    shader[i++] = CF_DWORD0(ADDR(26));
1070    shader[i++] = CF_DWORD1(POP_COUNT(0),
1071                            CF_CONST(0),
1072                            COND(SQ_CF_COND_ACTIVE),
1073                            I_COUNT(2),
1074                            CALL_COUNT(0),
1075                            END_OF_PROGRAM(0),
1076                            VALID_PIXEL_MODE(0),
1077                            CF_INST(SQ_CF_INST_TEX),
1078                            WHOLE_QUAD_MODE(0),
1079                            BARRIER(1));
1080    /* 25 */
1081    shader[i++] = CF_DWORD0(ADDR(0));
1082    shader[i++] = CF_DWORD1(POP_COUNT(0),
1083			    CF_CONST(0),
1084			    COND(SQ_CF_COND_ACTIVE),
1085			    I_COUNT(0),
1086			    CALL_COUNT(0),
1087			    END_OF_PROGRAM(0),
1088			    VALID_PIXEL_MODE(0),
1089			    CF_INST(SQ_CF_INST_RETURN),
1090			    WHOLE_QUAD_MODE(0),
1091			    BARRIER(1));
1092    /* 26/27 */
1093    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1094                             BC_FRAC_MODE(0),
1095                             FETCH_WHOLE_QUAD(0),
1096                             RESOURCE_ID(0),
1097                             SRC_GPR(0),
1098                             SRC_REL(ABSOLUTE),
1099                             R7xx_ALT_CONST(0));
1100    shader[i++] = TEX_DWORD1(DST_GPR(1),
1101                             DST_REL(ABSOLUTE),
1102                             DST_SEL_X(SQ_SEL_X),
1103                             DST_SEL_Y(SQ_SEL_MASK),
1104                             DST_SEL_Z(SQ_SEL_MASK),
1105                             DST_SEL_W(SQ_SEL_1),
1106                             LOD_BIAS(0),
1107                             COORD_TYPE_X(TEX_NORMALIZED),
1108                             COORD_TYPE_Y(TEX_NORMALIZED),
1109                             COORD_TYPE_Z(TEX_NORMALIZED),
1110                             COORD_TYPE_W(TEX_NORMALIZED));
1111    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1112                             OFFSET_Y(0),
1113                             OFFSET_Z(0),
1114                             SAMPLER_ID(0),
1115                             SRC_SEL_X(SQ_SEL_X),
1116                             SRC_SEL_Y(SQ_SEL_Y),
1117                             SRC_SEL_Z(SQ_SEL_0),
1118                             SRC_SEL_W(SQ_SEL_1));
1119    shader[i++] = TEX_DWORD_PAD;
1120    /* 28/29 */
1121    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1122                             BC_FRAC_MODE(0),
1123                             FETCH_WHOLE_QUAD(0),
1124                             RESOURCE_ID(1),
1125                             SRC_GPR(0),
1126                             SRC_REL(ABSOLUTE),
1127                             R7xx_ALT_CONST(0));
1128    shader[i++] = TEX_DWORD1(DST_GPR(1),
1129                             DST_REL(ABSOLUTE),
1130                             DST_SEL_X(SQ_SEL_MASK),
1131                             DST_SEL_Y(SQ_SEL_X),
1132                             DST_SEL_Z(SQ_SEL_Y),
1133                             DST_SEL_W(SQ_SEL_MASK),
1134                             LOD_BIAS(0),
1135                             COORD_TYPE_X(TEX_NORMALIZED),
1136                             COORD_TYPE_Y(TEX_NORMALIZED),
1137                             COORD_TYPE_Z(TEX_NORMALIZED),
1138                             COORD_TYPE_W(TEX_NORMALIZED));
1139    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1140                             OFFSET_Y(0),
1141                             OFFSET_Z(0),
1142                             SAMPLER_ID(1),
1143                             SRC_SEL_X(SQ_SEL_X),
1144                             SRC_SEL_Y(SQ_SEL_Y),
1145                             SRC_SEL_Z(SQ_SEL_0),
1146                             SRC_SEL_W(SQ_SEL_1));
1147    shader[i++] = TEX_DWORD_PAD;
1148
1149    return i;
1150}
1151
1152/* comp vs --------------------------------------- */
1153int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1154{
1155    int i = 0;
1156
1157    /* 0 */
1158    shader[i++] = CF_DWORD0(ADDR(3));
1159    shader[i++] = CF_DWORD1(POP_COUNT(0),
1160                            CF_CONST(0),
1161                            COND(SQ_CF_COND_BOOL),
1162                            I_COUNT(0),
1163                            CALL_COUNT(0),
1164                            END_OF_PROGRAM(0),
1165                            VALID_PIXEL_MODE(0),
1166                            CF_INST(SQ_CF_INST_CALL),
1167                            WHOLE_QUAD_MODE(0),
1168                            BARRIER(0));
1169    /* 1 */
1170    shader[i++] = CF_DWORD0(ADDR(9));
1171    shader[i++] = CF_DWORD1(POP_COUNT(0),
1172                            CF_CONST(0),
1173                            COND(SQ_CF_COND_NOT_BOOL),
1174                            I_COUNT(0),
1175                            CALL_COUNT(0),
1176                            END_OF_PROGRAM(0),
1177                            VALID_PIXEL_MODE(0),
1178                            CF_INST(SQ_CF_INST_CALL),
1179                            WHOLE_QUAD_MODE(0),
1180                            BARRIER(0));
1181    /* 2 */
1182    shader[i++] = CF_DWORD0(ADDR(0));
1183    shader[i++] = CF_DWORD1(POP_COUNT(0),
1184                            CF_CONST(0),
1185                            COND(SQ_CF_COND_ACTIVE),
1186                            I_COUNT(0),
1187                            CALL_COUNT(0),
1188                            END_OF_PROGRAM(1),
1189                            VALID_PIXEL_MODE(0),
1190                            CF_INST(SQ_CF_INST_NOP),
1191                            WHOLE_QUAD_MODE(0),
1192                            BARRIER(1));
1193    /* 3 - mask sub */
1194    shader[i++] = CF_DWORD0(ADDR(44));
1195    shader[i++] = CF_DWORD1(POP_COUNT(0),
1196			    CF_CONST(0),
1197			    COND(SQ_CF_COND_ACTIVE),
1198			    I_COUNT(3),
1199			    CALL_COUNT(0),
1200			    END_OF_PROGRAM(0),
1201			    VALID_PIXEL_MODE(0),
1202			    CF_INST(SQ_CF_INST_VTX),
1203			    WHOLE_QUAD_MODE(0),
1204			    BARRIER(1));
1205
1206    /* 4 - ALU */
1207    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1208				KCACHE_BANK0(0),
1209				KCACHE_BANK1(0),
1210				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1211    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1212				KCACHE_ADDR0(0),
1213				KCACHE_ADDR1(0),
1214				I_COUNT(20),
1215				USES_WATERFALL(0),
1216				CF_INST(SQ_CF_INST_ALU),
1217				WHOLE_QUAD_MODE(0),
1218				BARRIER(1));
1219
1220    /* 5 - dst */
1221    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1222					  TYPE(SQ_EXPORT_POS),
1223					  RW_GPR(2),
1224					  RW_REL(ABSOLUTE),
1225					  INDEX_GPR(0),
1226					  ELEM_SIZE(0));
1227    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1228					       SRC_SEL_Y(SQ_SEL_Y),
1229					       SRC_SEL_Z(SQ_SEL_0),
1230					       SRC_SEL_W(SQ_SEL_1),
1231					       R6xx_ELEM_LOOP(0),
1232					       BURST_COUNT(1),
1233					       END_OF_PROGRAM(0),
1234					       VALID_PIXEL_MODE(0),
1235					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1236					       WHOLE_QUAD_MODE(0),
1237					       BARRIER(1));
1238    /* 6 - src */
1239    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1240					  TYPE(SQ_EXPORT_PARAM),
1241					  RW_GPR(1),
1242					  RW_REL(ABSOLUTE),
1243					  INDEX_GPR(0),
1244					  ELEM_SIZE(0));
1245    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1246					       SRC_SEL_Y(SQ_SEL_Y),
1247					       SRC_SEL_Z(SQ_SEL_0),
1248					       SRC_SEL_W(SQ_SEL_1),
1249					       R6xx_ELEM_LOOP(0),
1250					       BURST_COUNT(1),
1251					       END_OF_PROGRAM(0),
1252					       VALID_PIXEL_MODE(0),
1253					       CF_INST(SQ_CF_INST_EXPORT),
1254					       WHOLE_QUAD_MODE(0),
1255					       BARRIER(0));
1256    /* 7 - mask */
1257    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1258					  TYPE(SQ_EXPORT_PARAM),
1259					  RW_GPR(0),
1260					  RW_REL(ABSOLUTE),
1261					  INDEX_GPR(0),
1262					  ELEM_SIZE(0));
1263    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1264					       SRC_SEL_Y(SQ_SEL_Y),
1265					       SRC_SEL_Z(SQ_SEL_0),
1266					       SRC_SEL_W(SQ_SEL_1),
1267					       R6xx_ELEM_LOOP(0),
1268					       BURST_COUNT(1),
1269					       END_OF_PROGRAM(0),
1270					       VALID_PIXEL_MODE(0),
1271					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1272					       WHOLE_QUAD_MODE(0),
1273					       BARRIER(0));
1274    /* 8 */
1275    shader[i++] = CF_DWORD0(ADDR(0));
1276    shader[i++] = CF_DWORD1(POP_COUNT(0),
1277			    CF_CONST(0),
1278			    COND(SQ_CF_COND_ACTIVE),
1279			    I_COUNT(0),
1280			    CALL_COUNT(0),
1281			    END_OF_PROGRAM(0),
1282			    VALID_PIXEL_MODE(0),
1283			    CF_INST(SQ_CF_INST_RETURN),
1284			    WHOLE_QUAD_MODE(0),
1285			    BARRIER(1));
1286    /* 9 - non-mask sub */
1287    shader[i++] = CF_DWORD0(ADDR(50));
1288    shader[i++] = CF_DWORD1(POP_COUNT(0),
1289			    CF_CONST(0),
1290			    COND(SQ_CF_COND_ACTIVE),
1291			    I_COUNT(2),
1292			    CALL_COUNT(0),
1293			    END_OF_PROGRAM(0),
1294			    VALID_PIXEL_MODE(0),
1295			    CF_INST(SQ_CF_INST_VTX),
1296			    WHOLE_QUAD_MODE(0),
1297			    BARRIER(1));
1298
1299    /* 10 - ALU */
1300    shader[i++] = CF_ALU_DWORD0(ADDR(34),
1301				KCACHE_BANK0(0),
1302				KCACHE_BANK1(0),
1303				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1304    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1305				KCACHE_ADDR0(0),
1306				KCACHE_ADDR1(0),
1307				I_COUNT(10),
1308				USES_WATERFALL(0),
1309				CF_INST(SQ_CF_INST_ALU),
1310				WHOLE_QUAD_MODE(0),
1311				BARRIER(1));
1312
1313    /* 11 - dst */
1314    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1315					  TYPE(SQ_EXPORT_POS),
1316					  RW_GPR(1),
1317					  RW_REL(ABSOLUTE),
1318					  INDEX_GPR(0),
1319					  ELEM_SIZE(0));
1320    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1321					       SRC_SEL_Y(SQ_SEL_Y),
1322					       SRC_SEL_Z(SQ_SEL_0),
1323					       SRC_SEL_W(SQ_SEL_1),
1324					       R6xx_ELEM_LOOP(0),
1325					       BURST_COUNT(0),
1326					       END_OF_PROGRAM(0),
1327					       VALID_PIXEL_MODE(0),
1328					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1329					       WHOLE_QUAD_MODE(0),
1330					       BARRIER(1));
1331    /* 12 - src */
1332    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1333					  TYPE(SQ_EXPORT_PARAM),
1334					  RW_GPR(0),
1335					  RW_REL(ABSOLUTE),
1336					  INDEX_GPR(0),
1337					  ELEM_SIZE(0));
1338    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1339					       SRC_SEL_Y(SQ_SEL_Y),
1340					       SRC_SEL_Z(SQ_SEL_0),
1341					       SRC_SEL_W(SQ_SEL_1),
1342					       R6xx_ELEM_LOOP(0),
1343					       BURST_COUNT(0),
1344					       END_OF_PROGRAM(0),
1345					       VALID_PIXEL_MODE(0),
1346					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1347					       WHOLE_QUAD_MODE(0),
1348					       BARRIER(0));
1349    /* 13 */
1350    shader[i++] = CF_DWORD0(ADDR(0));
1351    shader[i++] = CF_DWORD1(POP_COUNT(0),
1352			    CF_CONST(0),
1353			    COND(SQ_CF_COND_ACTIVE),
1354			    I_COUNT(0),
1355			    CALL_COUNT(0),
1356			    END_OF_PROGRAM(0),
1357			    VALID_PIXEL_MODE(0),
1358			    CF_INST(SQ_CF_INST_RETURN),
1359			    WHOLE_QUAD_MODE(0),
1360			    BARRIER(1));
1361
1362
1363    /* 14 srcX.x DOT4 - mask */
1364    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1365                             SRC0_REL(ABSOLUTE),
1366                             SRC0_ELEM(ELEM_X),
1367                             SRC0_NEG(0),
1368                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1369                             SRC1_REL(ABSOLUTE),
1370                             SRC1_ELEM(ELEM_X),
1371                             SRC1_NEG(0),
1372                             INDEX_MODE(SQ_INDEX_LOOP),
1373                             PRED_SEL(SQ_PRED_SEL_OFF),
1374                             LAST(0));
1375    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1376                                 SRC0_ABS(0),
1377                                 SRC1_ABS(0),
1378                                 UPDATE_EXECUTE_MASK(0),
1379                                 UPDATE_PRED(0),
1380                                 WRITE_MASK(1),
1381                                 FOG_MERGE(0),
1382                                 OMOD(SQ_ALU_OMOD_OFF),
1383                                 ALU_INST(SQ_OP2_INST_DOT4),
1384                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1385                                 DST_GPR(3),
1386                                 DST_REL(ABSOLUTE),
1387                                 DST_ELEM(ELEM_X),
1388                                 CLAMP(0));
1389
1390    /* 15 srcX.y DOT4 - mask */
1391    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1392                             SRC0_REL(ABSOLUTE),
1393                             SRC0_ELEM(ELEM_Y),
1394                             SRC0_NEG(0),
1395                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1396                             SRC1_REL(ABSOLUTE),
1397                             SRC1_ELEM(ELEM_Y),
1398                             SRC1_NEG(0),
1399                             INDEX_MODE(SQ_INDEX_LOOP),
1400                             PRED_SEL(SQ_PRED_SEL_OFF),
1401                             LAST(0));
1402    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1403                                 SRC0_ABS(0),
1404                                 SRC1_ABS(0),
1405                                 UPDATE_EXECUTE_MASK(0),
1406                                 UPDATE_PRED(0),
1407                                 WRITE_MASK(0),
1408                                 FOG_MERGE(0),
1409                                 OMOD(SQ_ALU_OMOD_OFF),
1410                                 ALU_INST(SQ_OP2_INST_DOT4),
1411                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1412                                 DST_GPR(3),
1413                                 DST_REL(ABSOLUTE),
1414                                 DST_ELEM(ELEM_Y),
1415                                 CLAMP(0));
1416
1417    /* 16 srcX.z DOT4 - mask */
1418    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1419                             SRC0_REL(ABSOLUTE),
1420                             SRC0_ELEM(ELEM_Z),
1421                             SRC0_NEG(0),
1422                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1423                             SRC1_REL(ABSOLUTE),
1424                             SRC1_ELEM(ELEM_Z),
1425                             SRC1_NEG(0),
1426                             INDEX_MODE(SQ_INDEX_LOOP),
1427                             PRED_SEL(SQ_PRED_SEL_OFF),
1428                             LAST(0));
1429    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1430                                 SRC0_ABS(0),
1431                                 SRC1_ABS(0),
1432                                 UPDATE_EXECUTE_MASK(0),
1433                                 UPDATE_PRED(0),
1434                                 WRITE_MASK(0),
1435                                 FOG_MERGE(0),
1436                                 OMOD(SQ_ALU_OMOD_OFF),
1437                                 ALU_INST(SQ_OP2_INST_DOT4),
1438                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1439                                 DST_GPR(3),
1440                                 DST_REL(ABSOLUTE),
1441                                 DST_ELEM(ELEM_Z),
1442                                 CLAMP(0));
1443
1444    /* 17 srcX.w DOT4 - mask */
1445    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1446                             SRC0_REL(ABSOLUTE),
1447                             SRC0_ELEM(ELEM_W),
1448                             SRC0_NEG(0),
1449                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1450                             SRC1_REL(ABSOLUTE),
1451                             SRC1_ELEM(ELEM_W),
1452                             SRC1_NEG(0),
1453                             INDEX_MODE(SQ_INDEX_LOOP),
1454                             PRED_SEL(SQ_PRED_SEL_OFF),
1455                             LAST(1));
1456    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1457                                 SRC0_ABS(0),
1458                                 SRC1_ABS(0),
1459                                 UPDATE_EXECUTE_MASK(0),
1460                                 UPDATE_PRED(0),
1461                                 WRITE_MASK(0),
1462                                 FOG_MERGE(0),
1463                                 OMOD(SQ_ALU_OMOD_OFF),
1464                                 ALU_INST(SQ_OP2_INST_DOT4),
1465                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1466                                 DST_GPR(3),
1467                                 DST_REL(ABSOLUTE),
1468                                 DST_ELEM(ELEM_W),
1469                                 CLAMP(0));
1470
1471    /* 18 srcY.x DOT4 - mask */
1472    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1473                             SRC0_REL(ABSOLUTE),
1474                             SRC0_ELEM(ELEM_X),
1475                             SRC0_NEG(0),
1476                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1477                             SRC1_REL(ABSOLUTE),
1478                             SRC1_ELEM(ELEM_X),
1479                             SRC1_NEG(0),
1480                             INDEX_MODE(SQ_INDEX_LOOP),
1481                             PRED_SEL(SQ_PRED_SEL_OFF),
1482                             LAST(0));
1483    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1484                                 SRC0_ABS(0),
1485                                 SRC1_ABS(0),
1486                                 UPDATE_EXECUTE_MASK(0),
1487                                 UPDATE_PRED(0),
1488                                 WRITE_MASK(0),
1489                                 FOG_MERGE(0),
1490                                 OMOD(SQ_ALU_OMOD_OFF),
1491                                 ALU_INST(SQ_OP2_INST_DOT4),
1492                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1493                                 DST_GPR(3),
1494                                 DST_REL(ABSOLUTE),
1495                                 DST_ELEM(ELEM_X),
1496                                 CLAMP(0));
1497
1498    /* 19 srcY.y DOT4 - mask */
1499    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1500                             SRC0_REL(ABSOLUTE),
1501                             SRC0_ELEM(ELEM_Y),
1502                             SRC0_NEG(0),
1503                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1504                             SRC1_REL(ABSOLUTE),
1505                             SRC1_ELEM(ELEM_Y),
1506                             SRC1_NEG(0),
1507                             INDEX_MODE(SQ_INDEX_LOOP),
1508                             PRED_SEL(SQ_PRED_SEL_OFF),
1509                             LAST(0));
1510    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1511                                 SRC0_ABS(0),
1512                                 SRC1_ABS(0),
1513                                 UPDATE_EXECUTE_MASK(0),
1514                                 UPDATE_PRED(0),
1515                                 WRITE_MASK(1),
1516                                 FOG_MERGE(0),
1517                                 OMOD(SQ_ALU_OMOD_OFF),
1518                                 ALU_INST(SQ_OP2_INST_DOT4),
1519                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1520                                 DST_GPR(3),
1521                                 DST_REL(ABSOLUTE),
1522                                 DST_ELEM(ELEM_Y),
1523                                 CLAMP(0));
1524
1525    /* 20 srcY.z DOT4 - mask */
1526    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1527                             SRC0_REL(ABSOLUTE),
1528                             SRC0_ELEM(ELEM_Z),
1529                             SRC0_NEG(0),
1530                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1531                             SRC1_REL(ABSOLUTE),
1532                             SRC1_ELEM(ELEM_Z),
1533                             SRC1_NEG(0),
1534                             INDEX_MODE(SQ_INDEX_LOOP),
1535                             PRED_SEL(SQ_PRED_SEL_OFF),
1536                             LAST(0));
1537    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1538                                 SRC0_ABS(0),
1539                                 SRC1_ABS(0),
1540                                 UPDATE_EXECUTE_MASK(0),
1541                                 UPDATE_PRED(0),
1542                                 WRITE_MASK(0),
1543                                 FOG_MERGE(0),
1544                                 OMOD(SQ_ALU_OMOD_OFF),
1545                                 ALU_INST(SQ_OP2_INST_DOT4),
1546                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1547                                 DST_GPR(3),
1548                                 DST_REL(ABSOLUTE),
1549                                 DST_ELEM(ELEM_Z),
1550                                 CLAMP(0));
1551
1552    /* 21 srcY.w DOT4 - mask */
1553    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1554                             SRC0_REL(ABSOLUTE),
1555                             SRC0_ELEM(ELEM_W),
1556                             SRC0_NEG(0),
1557                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1558                             SRC1_REL(ABSOLUTE),
1559                             SRC1_ELEM(ELEM_W),
1560                             SRC1_NEG(0),
1561                             INDEX_MODE(SQ_INDEX_LOOP),
1562                             PRED_SEL(SQ_PRED_SEL_OFF),
1563                             LAST(1));
1564    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1565                                 SRC0_ABS(0),
1566                                 SRC1_ABS(0),
1567                                 UPDATE_EXECUTE_MASK(0),
1568                                 UPDATE_PRED(0),
1569                                 WRITE_MASK(0),
1570                                 FOG_MERGE(0),
1571                                 OMOD(SQ_ALU_OMOD_OFF),
1572                                 ALU_INST(SQ_OP2_INST_DOT4),
1573                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1574                                 DST_GPR(3),
1575                                 DST_REL(ABSOLUTE),
1576                                 DST_ELEM(ELEM_W),
1577                                 CLAMP(0));
1578
1579    /* 22 maskX.x DOT4 - mask */
1580    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1581                             SRC0_REL(ABSOLUTE),
1582                             SRC0_ELEM(ELEM_X),
1583                             SRC0_NEG(0),
1584                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1585                             SRC1_REL(ABSOLUTE),
1586                             SRC1_ELEM(ELEM_X),
1587                             SRC1_NEG(0),
1588                             INDEX_MODE(SQ_INDEX_LOOP),
1589                             PRED_SEL(SQ_PRED_SEL_OFF),
1590                             LAST(0));
1591    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1592                                 SRC0_ABS(0),
1593                                 SRC1_ABS(0),
1594                                 UPDATE_EXECUTE_MASK(0),
1595                                 UPDATE_PRED(0),
1596                                 WRITE_MASK(1),
1597                                 FOG_MERGE(0),
1598                                 OMOD(SQ_ALU_OMOD_OFF),
1599                                 ALU_INST(SQ_OP2_INST_DOT4),
1600                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1601                                 DST_GPR(4),
1602                                 DST_REL(ABSOLUTE),
1603                                 DST_ELEM(ELEM_X),
1604                                 CLAMP(0));
1605
1606    /* 23 maskX.y DOT4 - mask */
1607    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1608                             SRC0_REL(ABSOLUTE),
1609                             SRC0_ELEM(ELEM_Y),
1610                             SRC0_NEG(0),
1611                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1612                             SRC1_REL(ABSOLUTE),
1613                             SRC1_ELEM(ELEM_Y),
1614                             SRC1_NEG(0),
1615                             INDEX_MODE(SQ_INDEX_LOOP),
1616                             PRED_SEL(SQ_PRED_SEL_OFF),
1617                             LAST(0));
1618    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1619                                 SRC0_ABS(0),
1620                                 SRC1_ABS(0),
1621                                 UPDATE_EXECUTE_MASK(0),
1622                                 UPDATE_PRED(0),
1623                                 WRITE_MASK(0),
1624                                 FOG_MERGE(0),
1625                                 OMOD(SQ_ALU_OMOD_OFF),
1626                                 ALU_INST(SQ_OP2_INST_DOT4),
1627                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1628                                 DST_GPR(4),
1629                                 DST_REL(ABSOLUTE),
1630                                 DST_ELEM(ELEM_Y),
1631                                 CLAMP(0));
1632
1633    /* 24 maskX.z DOT4 - mask */
1634    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1635                             SRC0_REL(ABSOLUTE),
1636                             SRC0_ELEM(ELEM_Z),
1637                             SRC0_NEG(0),
1638                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1639                             SRC1_REL(ABSOLUTE),
1640                             SRC1_ELEM(ELEM_Z),
1641                             SRC1_NEG(0),
1642                             INDEX_MODE(SQ_INDEX_LOOP),
1643                             PRED_SEL(SQ_PRED_SEL_OFF),
1644                             LAST(0));
1645    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1646                                 SRC0_ABS(0),
1647                                 SRC1_ABS(0),
1648                                 UPDATE_EXECUTE_MASK(0),
1649                                 UPDATE_PRED(0),
1650                                 WRITE_MASK(0),
1651                                 FOG_MERGE(0),
1652                                 OMOD(SQ_ALU_OMOD_OFF),
1653                                 ALU_INST(SQ_OP2_INST_DOT4),
1654                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1655                                 DST_GPR(4),
1656                                 DST_REL(ABSOLUTE),
1657                                 DST_ELEM(ELEM_Z),
1658                                 CLAMP(0));
1659
1660    /* 25 maskX.w DOT4 - mask */
1661    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1662                             SRC0_REL(ABSOLUTE),
1663                             SRC0_ELEM(ELEM_W),
1664                             SRC0_NEG(0),
1665                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1666                             SRC1_REL(ABSOLUTE),
1667                             SRC1_ELEM(ELEM_W),
1668                             SRC1_NEG(0),
1669                             INDEX_MODE(SQ_INDEX_LOOP),
1670                             PRED_SEL(SQ_PRED_SEL_OFF),
1671                             LAST(1));
1672    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1673                                 SRC0_ABS(0),
1674                                 SRC1_ABS(0),
1675                                 UPDATE_EXECUTE_MASK(0),
1676                                 UPDATE_PRED(0),
1677                                 WRITE_MASK(0),
1678                                 FOG_MERGE(0),
1679                                 OMOD(SQ_ALU_OMOD_OFF),
1680                                 ALU_INST(SQ_OP2_INST_DOT4),
1681                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1682                                 DST_GPR(4),
1683                                 DST_REL(ABSOLUTE),
1684                                 DST_ELEM(ELEM_W),
1685                                 CLAMP(0));
1686
1687    /* 26 maskY.x DOT4 - mask */
1688    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1689                             SRC0_REL(ABSOLUTE),
1690                             SRC0_ELEM(ELEM_X),
1691                             SRC0_NEG(0),
1692                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1693                             SRC1_REL(ABSOLUTE),
1694                             SRC1_ELEM(ELEM_X),
1695                             SRC1_NEG(0),
1696                             INDEX_MODE(SQ_INDEX_LOOP),
1697                             PRED_SEL(SQ_PRED_SEL_OFF),
1698                             LAST(0));
1699    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1700                                 SRC0_ABS(0),
1701                                 SRC1_ABS(0),
1702                                 UPDATE_EXECUTE_MASK(0),
1703                                 UPDATE_PRED(0),
1704                                 WRITE_MASK(0),
1705                                 FOG_MERGE(0),
1706                                 OMOD(SQ_ALU_OMOD_OFF),
1707                                 ALU_INST(SQ_OP2_INST_DOT4),
1708                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1709                                 DST_GPR(4),
1710                                 DST_REL(ABSOLUTE),
1711                                 DST_ELEM(ELEM_X),
1712                                 CLAMP(0));
1713
1714    /* 27 maskY.y DOT4 - mask */
1715    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1716                             SRC0_REL(ABSOLUTE),
1717                             SRC0_ELEM(ELEM_Y),
1718                             SRC0_NEG(0),
1719                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1720                             SRC1_REL(ABSOLUTE),
1721                             SRC1_ELEM(ELEM_Y),
1722                             SRC1_NEG(0),
1723                             INDEX_MODE(SQ_INDEX_LOOP),
1724                             PRED_SEL(SQ_PRED_SEL_OFF),
1725                             LAST(0));
1726    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1727                                 SRC0_ABS(0),
1728                                 SRC1_ABS(0),
1729                                 UPDATE_EXECUTE_MASK(0),
1730                                 UPDATE_PRED(0),
1731                                 WRITE_MASK(1),
1732                                 FOG_MERGE(0),
1733                                 OMOD(SQ_ALU_OMOD_OFF),
1734                                 ALU_INST(SQ_OP2_INST_DOT4),
1735                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1736                                 DST_GPR(4),
1737                                 DST_REL(ABSOLUTE),
1738                                 DST_ELEM(ELEM_Y),
1739                                 CLAMP(0));
1740
1741    /* 28 maskY.z DOT4 - mask */
1742    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1743                             SRC0_REL(ABSOLUTE),
1744                             SRC0_ELEM(ELEM_Z),
1745                             SRC0_NEG(0),
1746                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1747                             SRC1_REL(ABSOLUTE),
1748                             SRC1_ELEM(ELEM_Z),
1749                             SRC1_NEG(0),
1750                             INDEX_MODE(SQ_INDEX_LOOP),
1751                             PRED_SEL(SQ_PRED_SEL_OFF),
1752                             LAST(0));
1753    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1754                                 SRC0_ABS(0),
1755                                 SRC1_ABS(0),
1756                                 UPDATE_EXECUTE_MASK(0),
1757                                 UPDATE_PRED(0),
1758                                 WRITE_MASK(0),
1759                                 FOG_MERGE(0),
1760                                 OMOD(SQ_ALU_OMOD_OFF),
1761                                 ALU_INST(SQ_OP2_INST_DOT4),
1762                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1763                                 DST_GPR(4),
1764                                 DST_REL(ABSOLUTE),
1765                                 DST_ELEM(ELEM_Z),
1766                                 CLAMP(0));
1767
1768    /* 29 maskY.w DOT4 - mask */
1769    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1770                             SRC0_REL(ABSOLUTE),
1771                             SRC0_ELEM(ELEM_W),
1772                             SRC0_NEG(0),
1773                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1774                             SRC1_REL(ABSOLUTE),
1775                             SRC1_ELEM(ELEM_W),
1776                             SRC1_NEG(0),
1777                             INDEX_MODE(SQ_INDEX_LOOP),
1778                             PRED_SEL(SQ_PRED_SEL_OFF),
1779                             LAST(1));
1780    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1781                                 SRC0_ABS(0),
1782                                 SRC1_ABS(0),
1783                                 UPDATE_EXECUTE_MASK(0),
1784                                 UPDATE_PRED(0),
1785                                 WRITE_MASK(0),
1786                                 FOG_MERGE(0),
1787                                 OMOD(SQ_ALU_OMOD_OFF),
1788                                 ALU_INST(SQ_OP2_INST_DOT4),
1789                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1790                                 DST_GPR(4),
1791                                 DST_REL(ABSOLUTE),
1792                                 DST_ELEM(ELEM_W),
1793                                 CLAMP(0));
1794
1795    /* 30 srcX / w */
1796    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1797                             SRC0_REL(ABSOLUTE),
1798                             SRC0_ELEM(ELEM_X),
1799                             SRC0_NEG(0),
1800                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1801                             SRC1_REL(ABSOLUTE),
1802                             SRC1_ELEM(ELEM_W),
1803                             SRC1_NEG(0),
1804                             INDEX_MODE(SQ_INDEX_AR_X),
1805                             PRED_SEL(SQ_PRED_SEL_OFF),
1806                             LAST(1));
1807    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1808                                 SRC0_ABS(0),
1809                                 SRC1_ABS(0),
1810                                 UPDATE_EXECUTE_MASK(0),
1811                                 UPDATE_PRED(0),
1812                                 WRITE_MASK(1),
1813                                 FOG_MERGE(0),
1814                                 OMOD(SQ_ALU_OMOD_OFF),
1815                                 ALU_INST(SQ_OP2_INST_MUL),
1816                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1817                                 DST_GPR(1),
1818                                 DST_REL(ABSOLUTE),
1819                                 DST_ELEM(ELEM_X),
1820                                 CLAMP(0));
1821
1822    /* 31 srcY / h */
1823    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1824                             SRC0_REL(ABSOLUTE),
1825                             SRC0_ELEM(ELEM_Y),
1826                             SRC0_NEG(0),
1827                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1828                             SRC1_REL(ABSOLUTE),
1829                             SRC1_ELEM(ELEM_W),
1830                             SRC1_NEG(0),
1831                             INDEX_MODE(SQ_INDEX_AR_X),
1832                             PRED_SEL(SQ_PRED_SEL_OFF),
1833                             LAST(1));
1834    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1835                                 SRC0_ABS(0),
1836                                 SRC1_ABS(0),
1837                                 UPDATE_EXECUTE_MASK(0),
1838                                 UPDATE_PRED(0),
1839                                 WRITE_MASK(1),
1840                                 FOG_MERGE(0),
1841                                 OMOD(SQ_ALU_OMOD_OFF),
1842                                 ALU_INST(SQ_OP2_INST_MUL),
1843                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1844                                 DST_GPR(1),
1845                                 DST_REL(ABSOLUTE),
1846                                 DST_ELEM(ELEM_Y),
1847                                 CLAMP(0));
1848
1849    /* 32 maskX / w */
1850    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1851                             SRC0_REL(ABSOLUTE),
1852                             SRC0_ELEM(ELEM_X),
1853                             SRC0_NEG(0),
1854                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1855                             SRC1_REL(ABSOLUTE),
1856                             SRC1_ELEM(ELEM_W),
1857                             SRC1_NEG(0),
1858                             INDEX_MODE(SQ_INDEX_AR_X),
1859                             PRED_SEL(SQ_PRED_SEL_OFF),
1860                             LAST(1));
1861    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1862                                 SRC0_ABS(0),
1863                                 SRC1_ABS(0),
1864                                 UPDATE_EXECUTE_MASK(0),
1865                                 UPDATE_PRED(0),
1866                                 WRITE_MASK(1),
1867                                 FOG_MERGE(0),
1868                                 OMOD(SQ_ALU_OMOD_OFF),
1869                                 ALU_INST(SQ_OP2_INST_MUL),
1870                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1871                                 DST_GPR(0),
1872                                 DST_REL(ABSOLUTE),
1873                                 DST_ELEM(ELEM_X),
1874                                 CLAMP(0));
1875
1876    /* 33 maskY / h */
1877    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1878                             SRC0_REL(ABSOLUTE),
1879                             SRC0_ELEM(ELEM_Y),
1880                             SRC0_NEG(0),
1881                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1882                             SRC1_REL(ABSOLUTE),
1883                             SRC1_ELEM(ELEM_W),
1884                             SRC1_NEG(0),
1885                             INDEX_MODE(SQ_INDEX_AR_X),
1886                             PRED_SEL(SQ_PRED_SEL_OFF),
1887                             LAST(1));
1888    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1889                                 SRC0_ABS(0),
1890                                 SRC1_ABS(0),
1891                                 UPDATE_EXECUTE_MASK(0),
1892                                 UPDATE_PRED(0),
1893                                 WRITE_MASK(1),
1894                                 FOG_MERGE(0),
1895                                 OMOD(SQ_ALU_OMOD_OFF),
1896                                 ALU_INST(SQ_OP2_INST_MUL),
1897                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1898                                 DST_GPR(0),
1899                                 DST_REL(ABSOLUTE),
1900                                 DST_ELEM(ELEM_Y),
1901                                 CLAMP(0));
1902
1903    /* 34 srcX.x DOT4 - non-mask */
1904    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1905                             SRC0_REL(ABSOLUTE),
1906                             SRC0_ELEM(ELEM_X),
1907                             SRC0_NEG(0),
1908                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1909                             SRC1_REL(ABSOLUTE),
1910                             SRC1_ELEM(ELEM_X),
1911                             SRC1_NEG(0),
1912                             INDEX_MODE(SQ_INDEX_LOOP),
1913                             PRED_SEL(SQ_PRED_SEL_OFF),
1914                             LAST(0));
1915    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1916                                 SRC0_ABS(0),
1917                                 SRC1_ABS(0),
1918                                 UPDATE_EXECUTE_MASK(0),
1919                                 UPDATE_PRED(0),
1920                                 WRITE_MASK(1),
1921                                 FOG_MERGE(0),
1922                                 OMOD(SQ_ALU_OMOD_OFF),
1923                                 ALU_INST(SQ_OP2_INST_DOT4),
1924                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1925                                 DST_GPR(2),
1926                                 DST_REL(ABSOLUTE),
1927                                 DST_ELEM(ELEM_X),
1928                                 CLAMP(0));
1929
1930    /* 35 srcX.y DOT4 - non-mask */
1931    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1932                             SRC0_REL(ABSOLUTE),
1933                             SRC0_ELEM(ELEM_Y),
1934                             SRC0_NEG(0),
1935                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1936                             SRC1_REL(ABSOLUTE),
1937                             SRC1_ELEM(ELEM_Y),
1938                             SRC1_NEG(0),
1939                             INDEX_MODE(SQ_INDEX_LOOP),
1940                             PRED_SEL(SQ_PRED_SEL_OFF),
1941                             LAST(0));
1942    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1943                                 SRC0_ABS(0),
1944                                 SRC1_ABS(0),
1945                                 UPDATE_EXECUTE_MASK(0),
1946                                 UPDATE_PRED(0),
1947                                 WRITE_MASK(0),
1948                                 FOG_MERGE(0),
1949                                 OMOD(SQ_ALU_OMOD_OFF),
1950                                 ALU_INST(SQ_OP2_INST_DOT4),
1951                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1952                                 DST_GPR(2),
1953                                 DST_REL(ABSOLUTE),
1954                                 DST_ELEM(ELEM_Y),
1955                                 CLAMP(0));
1956
1957    /* 36 srcX.z DOT4 - non-mask */
1958    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1959                             SRC0_REL(ABSOLUTE),
1960                             SRC0_ELEM(ELEM_Z),
1961                             SRC0_NEG(0),
1962                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1963                             SRC1_REL(ABSOLUTE),
1964                             SRC1_ELEM(ELEM_Z),
1965                             SRC1_NEG(0),
1966                             INDEX_MODE(SQ_INDEX_LOOP),
1967                             PRED_SEL(SQ_PRED_SEL_OFF),
1968                             LAST(0));
1969    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1970                                 SRC0_ABS(0),
1971                                 SRC1_ABS(0),
1972                                 UPDATE_EXECUTE_MASK(0),
1973                                 UPDATE_PRED(0),
1974                                 WRITE_MASK(0),
1975                                 FOG_MERGE(0),
1976                                 OMOD(SQ_ALU_OMOD_OFF),
1977                                 ALU_INST(SQ_OP2_INST_DOT4),
1978                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1979                                 DST_GPR(2),
1980                                 DST_REL(ABSOLUTE),
1981                                 DST_ELEM(ELEM_Z),
1982                                 CLAMP(0));
1983
1984    /* 37 srcX.w DOT4 - non-mask */
1985    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1986                             SRC0_REL(ABSOLUTE),
1987                             SRC0_ELEM(ELEM_W),
1988                             SRC0_NEG(0),
1989                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1990                             SRC1_REL(ABSOLUTE),
1991                             SRC1_ELEM(ELEM_W),
1992                             SRC1_NEG(0),
1993                             INDEX_MODE(SQ_INDEX_LOOP),
1994                             PRED_SEL(SQ_PRED_SEL_OFF),
1995                             LAST(1));
1996    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1997                                 SRC0_ABS(0),
1998                                 SRC1_ABS(0),
1999                                 UPDATE_EXECUTE_MASK(0),
2000                                 UPDATE_PRED(0),
2001                                 WRITE_MASK(0),
2002                                 FOG_MERGE(0),
2003                                 OMOD(SQ_ALU_OMOD_OFF),
2004                                 ALU_INST(SQ_OP2_INST_DOT4),
2005                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2006                                 DST_GPR(2),
2007                                 DST_REL(ABSOLUTE),
2008                                 DST_ELEM(ELEM_W),
2009                                 CLAMP(0));
2010
2011    /* 38 srcY.x DOT4 - non-mask */
2012    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2013                             SRC0_REL(ABSOLUTE),
2014                             SRC0_ELEM(ELEM_X),
2015                             SRC0_NEG(0),
2016                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2017                             SRC1_REL(ABSOLUTE),
2018                             SRC1_ELEM(ELEM_X),
2019                             SRC1_NEG(0),
2020                             INDEX_MODE(SQ_INDEX_LOOP),
2021                             PRED_SEL(SQ_PRED_SEL_OFF),
2022                             LAST(0));
2023    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2024                                 SRC0_ABS(0),
2025                                 SRC1_ABS(0),
2026                                 UPDATE_EXECUTE_MASK(0),
2027                                 UPDATE_PRED(0),
2028                                 WRITE_MASK(0),
2029                                 FOG_MERGE(0),
2030                                 OMOD(SQ_ALU_OMOD_OFF),
2031                                 ALU_INST(SQ_OP2_INST_DOT4),
2032                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2033                                 DST_GPR(2),
2034                                 DST_REL(ABSOLUTE),
2035                                 DST_ELEM(ELEM_X),
2036                                 CLAMP(0));
2037
2038    /* 39 srcY.y DOT4 - non-mask */
2039    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2040                             SRC0_REL(ABSOLUTE),
2041                             SRC0_ELEM(ELEM_Y),
2042                             SRC0_NEG(0),
2043                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2044                             SRC1_REL(ABSOLUTE),
2045                             SRC1_ELEM(ELEM_Y),
2046                             SRC1_NEG(0),
2047                             INDEX_MODE(SQ_INDEX_LOOP),
2048                             PRED_SEL(SQ_PRED_SEL_OFF),
2049                             LAST(0));
2050    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2051                                 SRC0_ABS(0),
2052                                 SRC1_ABS(0),
2053                                 UPDATE_EXECUTE_MASK(0),
2054                                 UPDATE_PRED(0),
2055                                 WRITE_MASK(1),
2056                                 FOG_MERGE(0),
2057                                 OMOD(SQ_ALU_OMOD_OFF),
2058                                 ALU_INST(SQ_OP2_INST_DOT4),
2059                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2060                                 DST_GPR(2),
2061                                 DST_REL(ABSOLUTE),
2062                                 DST_ELEM(ELEM_Y),
2063                                 CLAMP(0));
2064
2065    /* 40 srcY.z DOT4 - non-mask */
2066    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2067                             SRC0_REL(ABSOLUTE),
2068                             SRC0_ELEM(ELEM_Z),
2069                             SRC0_NEG(0),
2070                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2071                             SRC1_REL(ABSOLUTE),
2072                             SRC1_ELEM(ELEM_Z),
2073                             SRC1_NEG(0),
2074                             INDEX_MODE(SQ_INDEX_LOOP),
2075                             PRED_SEL(SQ_PRED_SEL_OFF),
2076                             LAST(0));
2077    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2078                                 SRC0_ABS(0),
2079                                 SRC1_ABS(0),
2080                                 UPDATE_EXECUTE_MASK(0),
2081                                 UPDATE_PRED(0),
2082                                 WRITE_MASK(0),
2083                                 FOG_MERGE(0),
2084                                 OMOD(SQ_ALU_OMOD_OFF),
2085                                 ALU_INST(SQ_OP2_INST_DOT4),
2086                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2087                                 DST_GPR(2),
2088                                 DST_REL(ABSOLUTE),
2089                                 DST_ELEM(ELEM_Z),
2090                                 CLAMP(0));
2091
2092    /* 41 srcY.w DOT4 - non-mask */
2093    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2094                             SRC0_REL(ABSOLUTE),
2095                             SRC0_ELEM(ELEM_W),
2096                             SRC0_NEG(0),
2097                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2098                             SRC1_REL(ABSOLUTE),
2099                             SRC1_ELEM(ELEM_W),
2100                             SRC1_NEG(0),
2101                             INDEX_MODE(SQ_INDEX_LOOP),
2102                             PRED_SEL(SQ_PRED_SEL_OFF),
2103                             LAST(1));
2104    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2105                                 SRC0_ABS(0),
2106                                 SRC1_ABS(0),
2107                                 UPDATE_EXECUTE_MASK(0),
2108                                 UPDATE_PRED(0),
2109                                 WRITE_MASK(0),
2110                                 FOG_MERGE(0),
2111                                 OMOD(SQ_ALU_OMOD_OFF),
2112                                 ALU_INST(SQ_OP2_INST_DOT4),
2113                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2114                                 DST_GPR(2),
2115                                 DST_REL(ABSOLUTE),
2116                                 DST_ELEM(ELEM_W),
2117                                 CLAMP(0));
2118
2119    /* 42 srcX / w */
2120    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2121                             SRC0_REL(ABSOLUTE),
2122                             SRC0_ELEM(ELEM_X),
2123                             SRC0_NEG(0),
2124                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
2125                             SRC1_REL(ABSOLUTE),
2126                             SRC1_ELEM(ELEM_W),
2127                             SRC1_NEG(0),
2128                             INDEX_MODE(SQ_INDEX_AR_X),
2129                             PRED_SEL(SQ_PRED_SEL_OFF),
2130                             LAST(1));
2131    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2132                                 SRC0_ABS(0),
2133                                 SRC1_ABS(0),
2134                                 UPDATE_EXECUTE_MASK(0),
2135                                 UPDATE_PRED(0),
2136                                 WRITE_MASK(1),
2137                                 FOG_MERGE(0),
2138                                 OMOD(SQ_ALU_OMOD_OFF),
2139                                 ALU_INST(SQ_OP2_INST_MUL),
2140                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2141                                 DST_GPR(0),
2142                                 DST_REL(ABSOLUTE),
2143                                 DST_ELEM(ELEM_X),
2144                                 CLAMP(0));
2145
2146    /* 43 srcY / h */
2147    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2148                             SRC0_REL(ABSOLUTE),
2149                             SRC0_ELEM(ELEM_Y),
2150                             SRC0_NEG(0),
2151                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2152                             SRC1_REL(ABSOLUTE),
2153                             SRC1_ELEM(ELEM_W),
2154                             SRC1_NEG(0),
2155                             INDEX_MODE(SQ_INDEX_AR_X),
2156                             PRED_SEL(SQ_PRED_SEL_OFF),
2157                             LAST(1));
2158    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2159                                 SRC0_ABS(0),
2160                                 SRC1_ABS(0),
2161                                 UPDATE_EXECUTE_MASK(0),
2162                                 UPDATE_PRED(0),
2163                                 WRITE_MASK(1),
2164                                 FOG_MERGE(0),
2165                                 OMOD(SQ_ALU_OMOD_OFF),
2166                                 ALU_INST(SQ_OP2_INST_MUL),
2167                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2168                                 DST_GPR(0),
2169                                 DST_REL(ABSOLUTE),
2170                                 DST_ELEM(ELEM_Y),
2171                                 CLAMP(0));
2172
2173    /* 44/45 - dst - mask */
2174    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2175			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2176			     FETCH_WHOLE_QUAD(0),
2177			     BUFFER_ID(0),
2178			     SRC_GPR(0),
2179			     SRC_REL(ABSOLUTE),
2180			     SRC_SEL_X(SQ_SEL_X),
2181			     MEGA_FETCH_COUNT(24));
2182    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2183				 DST_REL(0),
2184				 DST_SEL_X(SQ_SEL_X),
2185				 DST_SEL_Y(SQ_SEL_Y),
2186				 DST_SEL_Z(SQ_SEL_0),
2187				 DST_SEL_W(SQ_SEL_1),
2188				 USE_CONST_FIELDS(0),
2189				 DATA_FORMAT(FMT_32_32_FLOAT),
2190				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2191				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2192				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2193    shader[i++] = VTX_DWORD2(OFFSET(0),
2194			     ENDIAN_SWAP(ENDIAN_NONE),
2195			     CONST_BUF_NO_STRIDE(0),
2196			     MEGA_FETCH(1));
2197    shader[i++] = VTX_DWORD_PAD;
2198    /* 46/47 - src */
2199    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2200			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2201			     FETCH_WHOLE_QUAD(0),
2202			     BUFFER_ID(0),
2203			     SRC_GPR(0),
2204			     SRC_REL(ABSOLUTE),
2205			     SRC_SEL_X(SQ_SEL_X),
2206			     MEGA_FETCH_COUNT(8));
2207    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2208				 DST_REL(0),
2209				 DST_SEL_X(SQ_SEL_X),
2210				 DST_SEL_Y(SQ_SEL_Y),
2211				 DST_SEL_Z(SQ_SEL_1),
2212				 DST_SEL_W(SQ_SEL_0),
2213				 USE_CONST_FIELDS(0),
2214				 DATA_FORMAT(FMT_32_32_FLOAT),
2215				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2216				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2217				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2218    shader[i++] = VTX_DWORD2(OFFSET(8),
2219			     ENDIAN_SWAP(ENDIAN_NONE),
2220			     CONST_BUF_NO_STRIDE(0),
2221			     MEGA_FETCH(0));
2222    shader[i++] = VTX_DWORD_PAD;
2223    /* 48/49 - mask */
2224    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2225			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2226			     FETCH_WHOLE_QUAD(0),
2227			     BUFFER_ID(0),
2228			     SRC_GPR(0),
2229			     SRC_REL(ABSOLUTE),
2230			     SRC_SEL_X(SQ_SEL_X),
2231			     MEGA_FETCH_COUNT(8));
2232    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2233				 DST_REL(0),
2234				 DST_SEL_X(SQ_SEL_X),
2235				 DST_SEL_Y(SQ_SEL_Y),
2236				 DST_SEL_Z(SQ_SEL_1),
2237				 DST_SEL_W(SQ_SEL_0),
2238				 USE_CONST_FIELDS(0),
2239				 DATA_FORMAT(FMT_32_32_FLOAT),
2240				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2241				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2242				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2243    shader[i++] = VTX_DWORD2(OFFSET(16),
2244			     ENDIAN_SWAP(ENDIAN_NONE),
2245			     CONST_BUF_NO_STRIDE(0),
2246			     MEGA_FETCH(0));
2247    shader[i++] = VTX_DWORD_PAD;
2248
2249    /* 50/51 - dst - non-mask */
2250    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2251			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2252			     FETCH_WHOLE_QUAD(0),
2253			     BUFFER_ID(0),
2254			     SRC_GPR(0),
2255			     SRC_REL(ABSOLUTE),
2256			     SRC_SEL_X(SQ_SEL_X),
2257			     MEGA_FETCH_COUNT(16));
2258    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2259				 DST_REL(0),
2260				 DST_SEL_X(SQ_SEL_X),
2261				 DST_SEL_Y(SQ_SEL_Y),
2262				 DST_SEL_Z(SQ_SEL_0),
2263				 DST_SEL_W(SQ_SEL_1),
2264				 USE_CONST_FIELDS(0),
2265				 DATA_FORMAT(FMT_32_32_FLOAT),
2266				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2267				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2268				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2269    shader[i++] = VTX_DWORD2(OFFSET(0),
2270			     ENDIAN_SWAP(ENDIAN_NONE),
2271			     CONST_BUF_NO_STRIDE(0),
2272			     MEGA_FETCH(1));
2273    shader[i++] = VTX_DWORD_PAD;
2274    /* 52/53 - src */
2275    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2276			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2277			     FETCH_WHOLE_QUAD(0),
2278			     BUFFER_ID(0),
2279			     SRC_GPR(0),
2280			     SRC_REL(ABSOLUTE),
2281			     SRC_SEL_X(SQ_SEL_X),
2282			     MEGA_FETCH_COUNT(8));
2283    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2284				 DST_REL(0),
2285				 DST_SEL_X(SQ_SEL_X),
2286				 DST_SEL_Y(SQ_SEL_Y),
2287				 DST_SEL_Z(SQ_SEL_1),
2288				 DST_SEL_W(SQ_SEL_0),
2289				 USE_CONST_FIELDS(0),
2290				 DATA_FORMAT(FMT_32_32_FLOAT),
2291				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2292				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2293				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2294    shader[i++] = VTX_DWORD2(OFFSET(8),
2295			     ENDIAN_SWAP(ENDIAN_NONE),
2296			     CONST_BUF_NO_STRIDE(0),
2297			     MEGA_FETCH(0));
2298    shader[i++] = VTX_DWORD_PAD;
2299
2300    return i;
2301}
2302
2303/* comp ps --------------------------------------- */
2304int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2305{
2306    int i = 0;
2307
2308    /* 0 */
2309    shader[i++] = CF_DWORD0(ADDR(3));
2310    shader[i++] = CF_DWORD1(POP_COUNT(0),
2311                            CF_CONST(0),
2312                            COND(SQ_CF_COND_BOOL),
2313                            I_COUNT(0),
2314                            CALL_COUNT(0),
2315                            END_OF_PROGRAM(0),
2316                            VALID_PIXEL_MODE(0),
2317                            CF_INST(SQ_CF_INST_CALL),
2318                            WHOLE_QUAD_MODE(0),
2319                            BARRIER(0));
2320    /* 1 */
2321    shader[i++] = CF_DWORD0(ADDR(7));
2322    shader[i++] = CF_DWORD1(POP_COUNT(0),
2323                            CF_CONST(0),
2324                            COND(SQ_CF_COND_NOT_BOOL),
2325                            I_COUNT(0),
2326                            CALL_COUNT(0),
2327                            END_OF_PROGRAM(0),
2328                            VALID_PIXEL_MODE(0),
2329                            CF_INST(SQ_CF_INST_CALL),
2330                            WHOLE_QUAD_MODE(0),
2331                            BARRIER(0));
2332    /* 2 */
2333    shader[i++] = CF_DWORD0(ADDR(0));
2334    shader[i++] = CF_DWORD1(POP_COUNT(0),
2335                            CF_CONST(0),
2336                            COND(SQ_CF_COND_ACTIVE),
2337                            I_COUNT(0),
2338                            CALL_COUNT(0),
2339                            END_OF_PROGRAM(1),
2340                            VALID_PIXEL_MODE(0),
2341                            CF_INST(SQ_CF_INST_NOP),
2342                            WHOLE_QUAD_MODE(0),
2343                            BARRIER(1));
2344
2345    /* 3 - mask sub */
2346    shader[i++] = CF_DWORD0(ADDR(14));
2347    shader[i++] = CF_DWORD1(POP_COUNT(0),
2348			    CF_CONST(0),
2349			    COND(SQ_CF_COND_ACTIVE),
2350			    I_COUNT(2),
2351			    CALL_COUNT(0),
2352			    END_OF_PROGRAM(0),
2353			    VALID_PIXEL_MODE(0),
2354			    CF_INST(SQ_CF_INST_TEX),
2355			    WHOLE_QUAD_MODE(0),
2356			    BARRIER(1));
2357
2358    /* 4 */
2359    shader[i++] = CF_ALU_DWORD0(ADDR(10),
2360				KCACHE_BANK0(0),
2361				KCACHE_BANK1(0),
2362				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2363    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2364				KCACHE_ADDR0(0),
2365				KCACHE_ADDR1(0),
2366				I_COUNT(4),
2367				USES_WATERFALL(0),
2368				CF_INST(SQ_CF_INST_ALU),
2369				WHOLE_QUAD_MODE(0),
2370				BARRIER(1));
2371
2372    /* 5 */
2373    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2374					  TYPE(SQ_EXPORT_PIXEL),
2375					  RW_GPR(2),
2376					  RW_REL(ABSOLUTE),
2377					  INDEX_GPR(0),
2378					  ELEM_SIZE(1));
2379    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2380					       SRC_SEL_Y(SQ_SEL_Y),
2381					       SRC_SEL_Z(SQ_SEL_Z),
2382					       SRC_SEL_W(SQ_SEL_W),
2383					       R6xx_ELEM_LOOP(0),
2384					       BURST_COUNT(1),
2385					       END_OF_PROGRAM(0),
2386					       VALID_PIXEL_MODE(0),
2387					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2388					       WHOLE_QUAD_MODE(0),
2389					       BARRIER(1));
2390    /* 6 */
2391    shader[i++] = CF_DWORD0(ADDR(0));
2392    shader[i++] = CF_DWORD1(POP_COUNT(0),
2393			    CF_CONST(0),
2394			    COND(SQ_CF_COND_ACTIVE),
2395			    I_COUNT(0),
2396			    CALL_COUNT(0),
2397			    END_OF_PROGRAM(0),
2398			    VALID_PIXEL_MODE(0),
2399			    CF_INST(SQ_CF_INST_RETURN),
2400			    WHOLE_QUAD_MODE(0),
2401			    BARRIER(1));
2402
2403    /* 7 non-mask sub */
2404    shader[i++] = CF_DWORD0(ADDR(18));
2405    shader[i++] = CF_DWORD1(POP_COUNT(0),
2406			    CF_CONST(0),
2407			    COND(SQ_CF_COND_ACTIVE),
2408			    I_COUNT(1),
2409			    CALL_COUNT(0),
2410			    END_OF_PROGRAM(0),
2411			    VALID_PIXEL_MODE(0),
2412			    CF_INST(SQ_CF_INST_TEX),
2413			    WHOLE_QUAD_MODE(0),
2414			    BARRIER(1));
2415    /* 8 */
2416    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2417					  TYPE(SQ_EXPORT_PIXEL),
2418					  RW_GPR(0),
2419					  RW_REL(ABSOLUTE),
2420					  INDEX_GPR(0),
2421					  ELEM_SIZE(1));
2422    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2423					       SRC_SEL_Y(SQ_SEL_Y),
2424					       SRC_SEL_Z(SQ_SEL_Z),
2425					       SRC_SEL_W(SQ_SEL_W),
2426					       R6xx_ELEM_LOOP(0),
2427					       BURST_COUNT(1),
2428					       END_OF_PROGRAM(0),
2429					       VALID_PIXEL_MODE(0),
2430					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2431					       WHOLE_QUAD_MODE(0),
2432					       BARRIER(1));
2433    /* 9 */
2434    shader[i++] = CF_DWORD0(ADDR(0));
2435    shader[i++] = CF_DWORD1(POP_COUNT(0),
2436			    CF_CONST(0),
2437			    COND(SQ_CF_COND_ACTIVE),
2438			    I_COUNT(0),
2439			    CALL_COUNT(0),
2440			    END_OF_PROGRAM(0),
2441			    VALID_PIXEL_MODE(0),
2442			    CF_INST(SQ_CF_INST_RETURN),
2443			    WHOLE_QUAD_MODE(0),
2444			    BARRIER(1));
2445
2446    /* 10 - alu 0 */
2447    /* MUL gpr[2].x gpr[1].x gpr[0].x */
2448    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2449			     SRC0_REL(ABSOLUTE),
2450			     SRC0_ELEM(ELEM_X),
2451			     SRC0_NEG(0),
2452			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2453			     SRC1_REL(ABSOLUTE),
2454			     SRC1_ELEM(ELEM_X),
2455			     SRC1_NEG(0),
2456			     INDEX_MODE(SQ_INDEX_LOOP),
2457			     PRED_SEL(SQ_PRED_SEL_OFF),
2458			     LAST(0));
2459    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2460				 SRC0_ABS(0),
2461				 SRC1_ABS(0),
2462				 UPDATE_EXECUTE_MASK(0),
2463				 UPDATE_PRED(0),
2464				 WRITE_MASK(1),
2465				 FOG_MERGE(0),
2466				 OMOD(SQ_ALU_OMOD_OFF),
2467				 ALU_INST(SQ_OP2_INST_MUL),
2468				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2469				 DST_GPR(2),
2470				 DST_REL(ABSOLUTE),
2471				 DST_ELEM(ELEM_X),
2472				 CLAMP(1));
2473    /* 11 - alu 1 */
2474    /* MUL gpr[2].y gpr[1].y gpr[0].y */
2475    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2476			     SRC0_REL(ABSOLUTE),
2477			     SRC0_ELEM(ELEM_Y),
2478			     SRC0_NEG(0),
2479			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2480			     SRC1_REL(ABSOLUTE),
2481			     SRC1_ELEM(ELEM_Y),
2482			     SRC1_NEG(0),
2483			     INDEX_MODE(SQ_INDEX_LOOP),
2484			     PRED_SEL(SQ_PRED_SEL_OFF),
2485			     LAST(0));
2486    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2487				 SRC0_ABS(0),
2488				 SRC1_ABS(0),
2489				 UPDATE_EXECUTE_MASK(0),
2490				 UPDATE_PRED(0),
2491				 WRITE_MASK(1),
2492				 FOG_MERGE(0),
2493				 OMOD(SQ_ALU_OMOD_OFF),
2494				 ALU_INST(SQ_OP2_INST_MUL),
2495				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2496				 DST_GPR(2),
2497				 DST_REL(ABSOLUTE),
2498				 DST_ELEM(ELEM_Y),
2499				 CLAMP(1));
2500    /* 12 - alu 2 */
2501    /* MUL gpr[2].z gpr[1].z gpr[0].z */
2502    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2503			     SRC0_REL(ABSOLUTE),
2504			     SRC0_ELEM(ELEM_Z),
2505			     SRC0_NEG(0),
2506			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2507			     SRC1_REL(ABSOLUTE),
2508			     SRC1_ELEM(ELEM_Z),
2509			     SRC1_NEG(0),
2510			     INDEX_MODE(SQ_INDEX_LOOP),
2511			     PRED_SEL(SQ_PRED_SEL_OFF),
2512			     LAST(0));
2513    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2514				 SRC0_ABS(0),
2515				 SRC1_ABS(0),
2516				 UPDATE_EXECUTE_MASK(0),
2517				 UPDATE_PRED(0),
2518				 WRITE_MASK(1),
2519				 FOG_MERGE(0),
2520				 OMOD(SQ_ALU_OMOD_OFF),
2521				 ALU_INST(SQ_OP2_INST_MUL),
2522				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2523				 DST_GPR(2),
2524				 DST_REL(ABSOLUTE),
2525				 DST_ELEM(ELEM_Z),
2526				 CLAMP(1));
2527    /* 13 - alu 3 */
2528    /* MUL gpr[2].w gpr[1].w gpr[0].w */
2529    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2530			     SRC0_REL(ABSOLUTE),
2531			     SRC0_ELEM(ELEM_W),
2532			     SRC0_NEG(0),
2533			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2534			     SRC1_REL(ABSOLUTE),
2535			     SRC1_ELEM(ELEM_W),
2536			     SRC1_NEG(0),
2537			     INDEX_MODE(SQ_INDEX_LOOP),
2538			     PRED_SEL(SQ_PRED_SEL_OFF),
2539			     LAST(1));
2540    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2541				 SRC0_ABS(0),
2542				 SRC1_ABS(0),
2543				 UPDATE_EXECUTE_MASK(0),
2544				 UPDATE_PRED(0),
2545				 WRITE_MASK(1),
2546				 FOG_MERGE(0),
2547				 OMOD(SQ_ALU_OMOD_OFF),
2548				 ALU_INST(SQ_OP2_INST_MUL),
2549				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2550				 DST_GPR(2),
2551				 DST_REL(ABSOLUTE),
2552				 DST_ELEM(ELEM_W),
2553				 CLAMP(1));
2554
2555    /* 14/15 - src - mask */
2556    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2557			     BC_FRAC_MODE(0),
2558			     FETCH_WHOLE_QUAD(0),
2559			     RESOURCE_ID(0),
2560			     SRC_GPR(0),
2561			     SRC_REL(ABSOLUTE),
2562			     R7xx_ALT_CONST(0));
2563    shader[i++] = TEX_DWORD1(DST_GPR(0),
2564			     DST_REL(ABSOLUTE),
2565			     DST_SEL_X(SQ_SEL_X),
2566			     DST_SEL_Y(SQ_SEL_Y),
2567			     DST_SEL_Z(SQ_SEL_Z),
2568			     DST_SEL_W(SQ_SEL_W),
2569			     LOD_BIAS(0),
2570			     COORD_TYPE_X(TEX_NORMALIZED),
2571			     COORD_TYPE_Y(TEX_NORMALIZED),
2572			     COORD_TYPE_Z(TEX_NORMALIZED),
2573			     COORD_TYPE_W(TEX_NORMALIZED));
2574    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2575			     OFFSET_Y(0),
2576			     OFFSET_Z(0),
2577			     SAMPLER_ID(0),
2578			     SRC_SEL_X(SQ_SEL_X),
2579			     SRC_SEL_Y(SQ_SEL_Y),
2580			     SRC_SEL_Z(SQ_SEL_0),
2581			     SRC_SEL_W(SQ_SEL_1));
2582    shader[i++] = TEX_DWORD_PAD;
2583    /* 16/17 - mask */
2584    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2585			     BC_FRAC_MODE(0),
2586			     FETCH_WHOLE_QUAD(0),
2587			     RESOURCE_ID(1),
2588			     SRC_GPR(1),
2589			     SRC_REL(ABSOLUTE),
2590			     R7xx_ALT_CONST(0));
2591    shader[i++] = TEX_DWORD1(DST_GPR(1),
2592			     DST_REL(ABSOLUTE),
2593			     DST_SEL_X(SQ_SEL_X),
2594			     DST_SEL_Y(SQ_SEL_Y),
2595			     DST_SEL_Z(SQ_SEL_Z),
2596			     DST_SEL_W(SQ_SEL_W),
2597			     LOD_BIAS(0),
2598			     COORD_TYPE_X(TEX_NORMALIZED),
2599			     COORD_TYPE_Y(TEX_NORMALIZED),
2600			     COORD_TYPE_Z(TEX_NORMALIZED),
2601			     COORD_TYPE_W(TEX_NORMALIZED));
2602    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2603			     OFFSET_Y(0),
2604			     OFFSET_Z(0),
2605			     SAMPLER_ID(1),
2606			     SRC_SEL_X(SQ_SEL_X),
2607			     SRC_SEL_Y(SQ_SEL_Y),
2608			     SRC_SEL_Z(SQ_SEL_0),
2609			     SRC_SEL_W(SQ_SEL_1));
2610    shader[i++] = TEX_DWORD_PAD;
2611
2612    /* 18/19 - src - non-mask */
2613    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2614			     BC_FRAC_MODE(0),
2615			     FETCH_WHOLE_QUAD(0),
2616			     RESOURCE_ID(0),
2617			     SRC_GPR(0),
2618			     SRC_REL(ABSOLUTE),
2619			     R7xx_ALT_CONST(0));
2620    shader[i++] = TEX_DWORD1(DST_GPR(0),
2621			     DST_REL(ABSOLUTE),
2622			     DST_SEL_X(SQ_SEL_X),
2623			     DST_SEL_Y(SQ_SEL_Y),
2624			     DST_SEL_Z(SQ_SEL_Z),
2625			     DST_SEL_W(SQ_SEL_W),
2626			     LOD_BIAS(0),
2627			     COORD_TYPE_X(TEX_NORMALIZED),
2628			     COORD_TYPE_Y(TEX_NORMALIZED),
2629			     COORD_TYPE_Z(TEX_NORMALIZED),
2630			     COORD_TYPE_W(TEX_NORMALIZED));
2631    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2632			     OFFSET_Y(0),
2633			     OFFSET_Z(0),
2634			     SAMPLER_ID(0),
2635			     SRC_SEL_X(SQ_SEL_X),
2636			     SRC_SEL_Y(SQ_SEL_Y),
2637			     SRC_SEL_Z(SQ_SEL_0),
2638			     SRC_SEL_W(SQ_SEL_1));
2639    shader[i++] = TEX_DWORD_PAD;
2640
2641    return i;
2642}
2643