1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "radeon.h"
34#include "r600_shader.h"
35#include "r600_reg.h"
36
37/* solid vs --------------------------------------- */
38int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39{
40    int i = 0;
41
42    /* 0 */
43    shader[i++] = CF_DWORD0(ADDR(4));
44    shader[i++] = CF_DWORD1(POP_COUNT(0),
45			    CF_CONST(0),
46			    COND(SQ_CF_COND_ACTIVE),
47			    I_COUNT(1),
48			    CALL_COUNT(0),
49			    END_OF_PROGRAM(0),
50			    VALID_PIXEL_MODE(0),
51			    CF_INST(SQ_CF_INST_VTX),
52			    WHOLE_QUAD_MODE(0),
53			    BARRIER(1));
54    /* 1 */
55    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56					  TYPE(SQ_EXPORT_POS),
57					  RW_GPR(1),
58					  RW_REL(ABSOLUTE),
59					  INDEX_GPR(0),
60					  ELEM_SIZE(0));
61    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62					       SRC_SEL_Y(SQ_SEL_Y),
63					       SRC_SEL_Z(SQ_SEL_Z),
64					       SRC_SEL_W(SQ_SEL_W),
65					       R6xx_ELEM_LOOP(0),
66					       BURST_COUNT(1),
67					       END_OF_PROGRAM(0),
68					       VALID_PIXEL_MODE(0),
69					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70					       WHOLE_QUAD_MODE(0),
71					       BARRIER(1));
72    /* 2 - always export a param whether it's used or not */
73    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74					  TYPE(SQ_EXPORT_PARAM),
75					  RW_GPR(0),
76					  RW_REL(ABSOLUTE),
77					  INDEX_GPR(0),
78					  ELEM_SIZE(0));
79    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80					       SRC_SEL_Y(SQ_SEL_Y),
81					       SRC_SEL_Z(SQ_SEL_Z),
82					       SRC_SEL_W(SQ_SEL_W),
83					       R6xx_ELEM_LOOP(0),
84					       BURST_COUNT(0),
85					       END_OF_PROGRAM(1),
86					       VALID_PIXEL_MODE(0),
87					       CF_INST(SQ_CF_INST_EXPORT_DONE),
88					       WHOLE_QUAD_MODE(0),
89					       BARRIER(0));
90    /* 3 - padding */
91    shader[i++] = 0x00000000;
92    shader[i++] = 0x00000000;
93    /* 4/5 */
94    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96			     FETCH_WHOLE_QUAD(0),
97			     BUFFER_ID(0),
98			     SRC_GPR(0),
99			     SRC_REL(ABSOLUTE),
100			     SRC_SEL_X(SQ_SEL_X),
101			     MEGA_FETCH_COUNT(8));
102    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103				 DST_REL(0),
104				 DST_SEL_X(SQ_SEL_X),
105				 DST_SEL_Y(SQ_SEL_Y),
106				 DST_SEL_Z(SQ_SEL_0),
107				 DST_SEL_W(SQ_SEL_1),
108				 USE_CONST_FIELDS(0),
109				 DATA_FORMAT(FMT_32_32_FLOAT),
110				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
111				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
112				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113    shader[i++] = VTX_DWORD2(OFFSET(0),
114#if X_BYTE_ORDER == X_BIG_ENDIAN
115			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
116#else
117			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
118#endif
119			     CONST_BUF_NO_STRIDE(0),
120			     MEGA_FETCH(1));
121    shader[i++] = VTX_DWORD_PAD;
122
123    return i;
124}
125
126/* solid ps --------------------------------------- */
127int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
128{
129    int i = 0;
130
131    /* 0 */
132    shader[i++] = CF_ALU_DWORD0(ADDR(2),
133				KCACHE_BANK0(0),
134				KCACHE_BANK1(0),
135				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
136    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
137				KCACHE_ADDR0(0),
138				KCACHE_ADDR1(0),
139				I_COUNT(4),
140				USES_WATERFALL(0),
141				CF_INST(SQ_CF_INST_ALU),
142				WHOLE_QUAD_MODE(0),
143				BARRIER(1));
144    /* 1 */
145    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
146					  TYPE(SQ_EXPORT_PIXEL),
147					  RW_GPR(0),
148					  RW_REL(ABSOLUTE),
149					  INDEX_GPR(0),
150					  ELEM_SIZE(1));
151    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
152					       SRC_SEL_Y(SQ_SEL_Y),
153					       SRC_SEL_Z(SQ_SEL_Z),
154					       SRC_SEL_W(SQ_SEL_W),
155					       R6xx_ELEM_LOOP(0),
156					       BURST_COUNT(1),
157					       END_OF_PROGRAM(1),
158					       VALID_PIXEL_MODE(0),
159					       CF_INST(SQ_CF_INST_EXPORT_DONE),
160					       WHOLE_QUAD_MODE(0),
161					       BARRIER(1));
162
163    /* 2 */
164    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
165			     SRC0_REL(ABSOLUTE),
166			     SRC0_ELEM(ELEM_X),
167			     SRC0_NEG(0),
168			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
169			     SRC1_REL(ABSOLUTE),
170			     SRC1_ELEM(ELEM_X),
171			     SRC1_NEG(0),
172			     INDEX_MODE(SQ_INDEX_AR_X),
173			     PRED_SEL(SQ_PRED_SEL_OFF),
174			     LAST(0));
175    shader[i++] = ALU_DWORD1_OP2(ChipSet,
176				 SRC0_ABS(0),
177				 SRC1_ABS(0),
178				 UPDATE_EXECUTE_MASK(0),
179				 UPDATE_PRED(0),
180				 WRITE_MASK(1),
181				 FOG_MERGE(0),
182				 OMOD(SQ_ALU_OMOD_OFF),
183				 ALU_INST(SQ_OP2_INST_MOV),
184				 BANK_SWIZZLE(SQ_ALU_VEC_012),
185				 DST_GPR(0),
186				 DST_REL(ABSOLUTE),
187				 DST_ELEM(ELEM_X),
188				 CLAMP(1));
189    /* 3 */
190    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
191			     SRC0_REL(ABSOLUTE),
192			     SRC0_ELEM(ELEM_Y),
193			     SRC0_NEG(0),
194			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
195			     SRC1_REL(ABSOLUTE),
196			     SRC1_ELEM(ELEM_Y),
197			     SRC1_NEG(0),
198			     INDEX_MODE(SQ_INDEX_AR_X),
199			     PRED_SEL(SQ_PRED_SEL_OFF),
200			     LAST(0));
201    shader[i++] = ALU_DWORD1_OP2(ChipSet,
202				 SRC0_ABS(0),
203				 SRC1_ABS(0),
204				 UPDATE_EXECUTE_MASK(0),
205				 UPDATE_PRED(0),
206				 WRITE_MASK(1),
207				 FOG_MERGE(0),
208				 OMOD(SQ_ALU_OMOD_OFF),
209				 ALU_INST(SQ_OP2_INST_MOV),
210				 BANK_SWIZZLE(SQ_ALU_VEC_012),
211				 DST_GPR(0),
212				 DST_REL(ABSOLUTE),
213				 DST_ELEM(ELEM_Y),
214				 CLAMP(1));
215    /* 4 */
216    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
217			     SRC0_REL(ABSOLUTE),
218			     SRC0_ELEM(ELEM_Z),
219			     SRC0_NEG(0),
220			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
221			     SRC1_REL(ABSOLUTE),
222			     SRC1_ELEM(ELEM_Z),
223			     SRC1_NEG(0),
224			     INDEX_MODE(SQ_INDEX_AR_X),
225			     PRED_SEL(SQ_PRED_SEL_OFF),
226			     LAST(0));
227    shader[i++] = ALU_DWORD1_OP2(ChipSet,
228				 SRC0_ABS(0),
229				 SRC1_ABS(0),
230				 UPDATE_EXECUTE_MASK(0),
231				 UPDATE_PRED(0),
232				 WRITE_MASK(1),
233				 FOG_MERGE(0),
234				 OMOD(SQ_ALU_OMOD_OFF),
235				 ALU_INST(SQ_OP2_INST_MOV),
236				 BANK_SWIZZLE(SQ_ALU_VEC_012),
237				 DST_GPR(0),
238				 DST_REL(ABSOLUTE),
239				 DST_ELEM(ELEM_Z),
240				 CLAMP(1));
241    /* 5 */
242    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
243			     SRC0_REL(ABSOLUTE),
244			     SRC0_ELEM(ELEM_W),
245			     SRC0_NEG(0),
246			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
247			     SRC1_REL(ABSOLUTE),
248			     SRC1_ELEM(ELEM_W),
249			     SRC1_NEG(0),
250			     INDEX_MODE(SQ_INDEX_AR_X),
251			     PRED_SEL(SQ_PRED_SEL_OFF),
252			     LAST(1));
253    shader[i++] = ALU_DWORD1_OP2(ChipSet,
254				 SRC0_ABS(0),
255				 SRC1_ABS(0),
256				 UPDATE_EXECUTE_MASK(0),
257				 UPDATE_PRED(0),
258				 WRITE_MASK(1),
259				 FOG_MERGE(0),
260				 OMOD(SQ_ALU_OMOD_OFF),
261				 ALU_INST(SQ_OP2_INST_MOV),
262				 BANK_SWIZZLE(SQ_ALU_VEC_012),
263				 DST_GPR(0),
264				 DST_REL(ABSOLUTE),
265				 DST_ELEM(ELEM_W),
266				 CLAMP(1));
267
268    return i;
269}
270
271/* copy vs --------------------------------------- */
272int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
273{
274    int i = 0;
275
276    /* 0 */
277    shader[i++] = CF_DWORD0(ADDR(4));
278    shader[i++] = CF_DWORD1(POP_COUNT(0),
279			    CF_CONST(0),
280			    COND(SQ_CF_COND_ACTIVE),
281			    I_COUNT(2),
282			    CALL_COUNT(0),
283			    END_OF_PROGRAM(0),
284			    VALID_PIXEL_MODE(0),
285			    CF_INST(SQ_CF_INST_VTX),
286			    WHOLE_QUAD_MODE(0),
287			    BARRIER(1));
288    /* 1 */
289    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
290					  TYPE(SQ_EXPORT_POS),
291					  RW_GPR(1),
292					  RW_REL(ABSOLUTE),
293					  INDEX_GPR(0),
294					  ELEM_SIZE(0));
295    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
296					       SRC_SEL_Y(SQ_SEL_Y),
297					       SRC_SEL_Z(SQ_SEL_Z),
298					       SRC_SEL_W(SQ_SEL_W),
299					       R6xx_ELEM_LOOP(0),
300					       BURST_COUNT(0),
301					       END_OF_PROGRAM(0),
302					       VALID_PIXEL_MODE(0),
303					       CF_INST(SQ_CF_INST_EXPORT_DONE),
304					       WHOLE_QUAD_MODE(0),
305					       BARRIER(1));
306    /* 2 */
307    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
308					  TYPE(SQ_EXPORT_PARAM),
309					  RW_GPR(0),
310					  RW_REL(ABSOLUTE),
311					  INDEX_GPR(0),
312					  ELEM_SIZE(0));
313    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
314					       SRC_SEL_Y(SQ_SEL_Y),
315					       SRC_SEL_Z(SQ_SEL_Z),
316					       SRC_SEL_W(SQ_SEL_W),
317					       R6xx_ELEM_LOOP(0),
318					       BURST_COUNT(0),
319					       END_OF_PROGRAM(1),
320					       VALID_PIXEL_MODE(0),
321					       CF_INST(SQ_CF_INST_EXPORT_DONE),
322					       WHOLE_QUAD_MODE(0),
323					       BARRIER(0));
324    /* 3 */
325    shader[i++] = 0x00000000;
326    shader[i++] = 0x00000000;
327    /* 4/5 */
328    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
329			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
330			     FETCH_WHOLE_QUAD(0),
331			     BUFFER_ID(0),
332			     SRC_GPR(0),
333			     SRC_REL(ABSOLUTE),
334			     SRC_SEL_X(SQ_SEL_X),
335			     MEGA_FETCH_COUNT(16));
336    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
337				 DST_REL(0),
338				 DST_SEL_X(SQ_SEL_X),
339				 DST_SEL_Y(SQ_SEL_Y),
340				 DST_SEL_Z(SQ_SEL_0),
341				 DST_SEL_W(SQ_SEL_1),
342				 USE_CONST_FIELDS(0),
343				 DATA_FORMAT(FMT_32_32_FLOAT),
344				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
345				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
346				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
347    shader[i++] = VTX_DWORD2(OFFSET(0),
348#if X_BYTE_ORDER == X_BIG_ENDIAN
349			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
350#else
351			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
352#endif
353			     CONST_BUF_NO_STRIDE(0),
354			     MEGA_FETCH(1));
355    shader[i++] = VTX_DWORD_PAD;
356    /* 6/7 */
357    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
358			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
359			     FETCH_WHOLE_QUAD(0),
360			     BUFFER_ID(0),
361			     SRC_GPR(0),
362			     SRC_REL(ABSOLUTE),
363			     SRC_SEL_X(SQ_SEL_X),
364			     MEGA_FETCH_COUNT(8));
365    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
366				 DST_REL(0),
367				 DST_SEL_X(SQ_SEL_X),
368				 DST_SEL_Y(SQ_SEL_Y),
369				 DST_SEL_Z(SQ_SEL_0),
370				 DST_SEL_W(SQ_SEL_1),
371				 USE_CONST_FIELDS(0),
372				 DATA_FORMAT(FMT_32_32_FLOAT),
373				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
374				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
375				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
376    shader[i++] = VTX_DWORD2(OFFSET(8),
377#if X_BYTE_ORDER == X_BIG_ENDIAN
378                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
379#else
380                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
381#endif
382			     CONST_BUF_NO_STRIDE(0),
383			     MEGA_FETCH(0));
384    shader[i++] = VTX_DWORD_PAD;
385
386    return i;
387}
388
389/* copy ps --------------------------------------- */
390int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
391{
392    int i=0;
393
394    /* CF INST 0 */
395    shader[i++] = CF_DWORD0(ADDR(2));
396    shader[i++] = CF_DWORD1(POP_COUNT(0),
397			    CF_CONST(0),
398			    COND(SQ_CF_COND_ACTIVE),
399			    I_COUNT(1),
400			    CALL_COUNT(0),
401			    END_OF_PROGRAM(0),
402			    VALID_PIXEL_MODE(0),
403			    CF_INST(SQ_CF_INST_TEX),
404			    WHOLE_QUAD_MODE(0),
405			    BARRIER(1));
406    /* CF INST 1 */
407    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
408					  TYPE(SQ_EXPORT_PIXEL),
409					  RW_GPR(0),
410					  RW_REL(ABSOLUTE),
411					  INDEX_GPR(0),
412					  ELEM_SIZE(1));
413    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
414					       SRC_SEL_Y(SQ_SEL_Y),
415					       SRC_SEL_Z(SQ_SEL_Z),
416					       SRC_SEL_W(SQ_SEL_W),
417					       R6xx_ELEM_LOOP(0),
418					       BURST_COUNT(1),
419					       END_OF_PROGRAM(1),
420					       VALID_PIXEL_MODE(0),
421					       CF_INST(SQ_CF_INST_EXPORT_DONE),
422					       WHOLE_QUAD_MODE(0),
423					       BARRIER(1));
424    /* TEX INST 0 */
425    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
426			     BC_FRAC_MODE(0),
427			     FETCH_WHOLE_QUAD(0),
428			     RESOURCE_ID(0),
429			     SRC_GPR(0),
430			     SRC_REL(ABSOLUTE),
431			     R7xx_ALT_CONST(0));
432    shader[i++] = TEX_DWORD1(DST_GPR(0),
433			     DST_REL(ABSOLUTE),
434			     DST_SEL_X(SQ_SEL_X), /* R */
435			     DST_SEL_Y(SQ_SEL_Y), /* G */
436			     DST_SEL_Z(SQ_SEL_Z), /* B */
437			     DST_SEL_W(SQ_SEL_W), /* A */
438			     LOD_BIAS(0),
439			     COORD_TYPE_X(TEX_UNNORMALIZED),
440			     COORD_TYPE_Y(TEX_UNNORMALIZED),
441			     COORD_TYPE_Z(TEX_UNNORMALIZED),
442			     COORD_TYPE_W(TEX_UNNORMALIZED));
443    shader[i++] = TEX_DWORD2(OFFSET_X(0),
444			     OFFSET_Y(0),
445			     OFFSET_Z(0),
446			     SAMPLER_ID(0),
447			     SRC_SEL_X(SQ_SEL_X),
448			     SRC_SEL_Y(SQ_SEL_Y),
449			     SRC_SEL_Z(SQ_SEL_0),
450			     SRC_SEL_W(SQ_SEL_1));
451    shader[i++] = TEX_DWORD_PAD;
452
453    return i;
454}
455
456/*
457 * ; xv vertex shader
458 * 00 VTX: ADDR(4) CNT(2)
459 *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT)
460 *          FORMAT_COMP(SIGNED)
461 *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
462 *          FORMAT_COMP(SIGNED)
463 * 01 EXP_DONE: POS0, R1
464 * 02 EXP_DONE: PARAM0, R0  NO_BARRIER
465 * END_OF_PROGRAM
466 */
467int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
468{
469    int i = 0;
470
471    /* 0 */
472    shader[i++] = CF_DWORD0(ADDR(6));
473    shader[i++] = CF_DWORD1(POP_COUNT(0),
474                            CF_CONST(0),
475                            COND(SQ_CF_COND_ACTIVE),
476                            I_COUNT(2),
477                            CALL_COUNT(0),
478                            END_OF_PROGRAM(0),
479                            VALID_PIXEL_MODE(0),
480                            CF_INST(SQ_CF_INST_VTX),
481                            WHOLE_QUAD_MODE(0),
482                            BARRIER(1));
483
484    /* 1 - ALU */
485    shader[i++] = CF_ALU_DWORD0(ADDR(4),
486				KCACHE_BANK0(0),
487				KCACHE_BANK1(0),
488				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
489    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
490				KCACHE_ADDR0(0),
491				KCACHE_ADDR1(0),
492				I_COUNT(2),
493				USES_WATERFALL(0),
494				CF_INST(SQ_CF_INST_ALU),
495				WHOLE_QUAD_MODE(0),
496				BARRIER(1));
497
498    /* 2 */
499    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
500                                          TYPE(SQ_EXPORT_POS),
501                                          RW_GPR(1),
502                                          RW_REL(ABSOLUTE),
503                                          INDEX_GPR(0),
504                                          ELEM_SIZE(3));
505    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
506                                               SRC_SEL_Y(SQ_SEL_Y),
507                                               SRC_SEL_Z(SQ_SEL_Z),
508                                               SRC_SEL_W(SQ_SEL_W),
509                                               R6xx_ELEM_LOOP(0),
510                                               BURST_COUNT(1),
511                                               END_OF_PROGRAM(0),
512                                               VALID_PIXEL_MODE(0),
513                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
514                                               WHOLE_QUAD_MODE(0),
515                                               BARRIER(1));
516    /* 3 */
517    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
518                                          TYPE(SQ_EXPORT_PARAM),
519                                          RW_GPR(0),
520                                          RW_REL(ABSOLUTE),
521                                          INDEX_GPR(0),
522                                          ELEM_SIZE(3));
523    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
524                                               SRC_SEL_Y(SQ_SEL_Y),
525                                               SRC_SEL_Z(SQ_SEL_Z),
526                                               SRC_SEL_W(SQ_SEL_W),
527                                               R6xx_ELEM_LOOP(0),
528                                               BURST_COUNT(1),
529                                               END_OF_PROGRAM(1),
530                                               VALID_PIXEL_MODE(0),
531                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
532                                               WHOLE_QUAD_MODE(0),
533                                               BARRIER(0));
534
535
536    /* 4 texX / w */
537    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
538                             SRC0_REL(ABSOLUTE),
539                             SRC0_ELEM(ELEM_X),
540                             SRC0_NEG(0),
541                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
542                             SRC1_REL(ABSOLUTE),
543                             SRC1_ELEM(ELEM_X),
544                             SRC1_NEG(0),
545                             INDEX_MODE(SQ_INDEX_AR_X),
546                             PRED_SEL(SQ_PRED_SEL_OFF),
547                             LAST(0));
548    shader[i++] = ALU_DWORD1_OP2(ChipSet,
549                                 SRC0_ABS(0),
550                                 SRC1_ABS(0),
551                                 UPDATE_EXECUTE_MASK(0),
552                                 UPDATE_PRED(0),
553                                 WRITE_MASK(1),
554                                 FOG_MERGE(0),
555                                 OMOD(SQ_ALU_OMOD_OFF),
556                                 ALU_INST(SQ_OP2_INST_MUL),
557                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
558                                 DST_GPR(0),
559                                 DST_REL(ABSOLUTE),
560                                 DST_ELEM(ELEM_X),
561                                 CLAMP(0));
562
563    /* 5 texY / h */
564    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
565                             SRC0_REL(ABSOLUTE),
566                             SRC0_ELEM(ELEM_Y),
567                             SRC0_NEG(0),
568                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
569                             SRC1_REL(ABSOLUTE),
570                             SRC1_ELEM(ELEM_Y),
571                             SRC1_NEG(0),
572                             INDEX_MODE(SQ_INDEX_AR_X),
573                             PRED_SEL(SQ_PRED_SEL_OFF),
574                             LAST(1));
575    shader[i++] = ALU_DWORD1_OP2(ChipSet,
576                                 SRC0_ABS(0),
577                                 SRC1_ABS(0),
578                                 UPDATE_EXECUTE_MASK(0),
579                                 UPDATE_PRED(0),
580                                 WRITE_MASK(1),
581                                 FOG_MERGE(0),
582                                 OMOD(SQ_ALU_OMOD_OFF),
583                                 ALU_INST(SQ_OP2_INST_MUL),
584                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
585                                 DST_GPR(0),
586                                 DST_REL(ABSOLUTE),
587                                 DST_ELEM(ELEM_Y),
588                                 CLAMP(0));
589
590    /* 6/7 */
591    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
592                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
593                             FETCH_WHOLE_QUAD(0),
594                             BUFFER_ID(0),
595                             SRC_GPR(0),
596                             SRC_REL(ABSOLUTE),
597                             SRC_SEL_X(SQ_SEL_X),
598                             MEGA_FETCH_COUNT(16));
599    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
600                                 DST_REL(ABSOLUTE),
601                                 DST_SEL_X(SQ_SEL_X),
602                                 DST_SEL_Y(SQ_SEL_Y),
603                                 DST_SEL_Z(SQ_SEL_0),
604                                 DST_SEL_W(SQ_SEL_1),
605                                 USE_CONST_FIELDS(0),
606                                 DATA_FORMAT(FMT_32_32_FLOAT),
607                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
608                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
609                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
610    shader[i++] = VTX_DWORD2(OFFSET(0),
611#if X_BYTE_ORDER == X_BIG_ENDIAN
612                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
613#else
614                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
615#endif
616                             CONST_BUF_NO_STRIDE(0),
617                             MEGA_FETCH(1));
618    shader[i++] = VTX_DWORD_PAD;
619    /* 8/9 */
620    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
621                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
622                             FETCH_WHOLE_QUAD(0),
623                             BUFFER_ID(0),
624                             SRC_GPR(0),
625                             SRC_REL(ABSOLUTE),
626                             SRC_SEL_X(SQ_SEL_X),
627                             MEGA_FETCH_COUNT(8));
628    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
629                                 DST_REL(ABSOLUTE),
630                                 DST_SEL_X(SQ_SEL_X),
631                                 DST_SEL_Y(SQ_SEL_Y),
632                                 DST_SEL_Z(SQ_SEL_0),
633                                 DST_SEL_W(SQ_SEL_1),
634                                 USE_CONST_FIELDS(0),
635                                 DATA_FORMAT(FMT_32_32_FLOAT),
636                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
637                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
638                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
639    shader[i++] = VTX_DWORD2(OFFSET(8),
640#if X_BYTE_ORDER == X_BIG_ENDIAN
641                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
642#else
643                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
644#endif
645                             CONST_BUF_NO_STRIDE(0),
646                             MEGA_FETCH(0));
647    shader[i++] = VTX_DWORD_PAD;
648
649    return i;
650}
651
652int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
653{
654    int i = 0;
655
656    /* 0 */
657    shader[i++] = CF_DWORD0(ADDR(16));
658    shader[i++] = CF_DWORD1(POP_COUNT(0),
659                            CF_CONST(0),
660                            COND(SQ_CF_COND_BOOL),
661                            I_COUNT(0),
662                            CALL_COUNT(0),
663                            END_OF_PROGRAM(0),
664                            VALID_PIXEL_MODE(0),
665                            CF_INST(SQ_CF_INST_CALL),
666                            WHOLE_QUAD_MODE(0),
667                            BARRIER(0));
668    /* 1 */
669    shader[i++] = CF_DWORD0(ADDR(24));
670    shader[i++] = CF_DWORD1(POP_COUNT(0),
671                            CF_CONST(0),
672                            COND(SQ_CF_COND_NOT_BOOL),
673                            I_COUNT(0),
674                            CALL_COUNT(0),
675                            END_OF_PROGRAM(0),
676                            VALID_PIXEL_MODE(0),
677                            CF_INST(SQ_CF_INST_CALL),
678                            WHOLE_QUAD_MODE(0),
679                            BARRIER(0));
680    /* 2 */
681    shader[i++] = CF_ALU_DWORD0(ADDR(4),
682                                KCACHE_BANK0(0),
683                                KCACHE_BANK1(0),
684                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
685    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
686                                KCACHE_ADDR0(0),
687                                KCACHE_ADDR1(0),
688                                I_COUNT(12),
689                                USES_WATERFALL(0),
690                                CF_INST(SQ_CF_INST_ALU),
691                                WHOLE_QUAD_MODE(0),
692                                BARRIER(1));
693    /* 3 */
694    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
695                                          TYPE(SQ_EXPORT_PIXEL),
696                                          RW_GPR(2),
697                                          RW_REL(ABSOLUTE),
698                                          INDEX_GPR(0),
699                                          ELEM_SIZE(3));
700    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
701                                               SRC_SEL_Y(SQ_SEL_Y),
702                                               SRC_SEL_Z(SQ_SEL_Z),
703                                               SRC_SEL_W(SQ_SEL_W),
704                                               R6xx_ELEM_LOOP(0),
705                                               BURST_COUNT(1),
706                                               END_OF_PROGRAM(1),
707                                               VALID_PIXEL_MODE(0),
708                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
709                                               WHOLE_QUAD_MODE(0),
710                                               BARRIER(1));
711    /* 4,5,6,7 */
712    /* r2.x = MAD(c0.w, r1.x, c0.x) */
713    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
714                             SRC0_REL(ABSOLUTE),
715                             SRC0_ELEM(ELEM_W),
716                             SRC0_NEG(0),
717                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
718                             SRC1_REL(ABSOLUTE),
719                             SRC1_ELEM(ELEM_X),
720                             SRC1_NEG(0),
721                             INDEX_MODE(SQ_INDEX_LOOP),
722                             PRED_SEL(SQ_PRED_SEL_OFF),
723                             LAST(0));
724    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
725                                 SRC2_REL(ABSOLUTE),
726                                 SRC2_ELEM(ELEM_X),
727                                 SRC2_NEG(0),
728                                 ALU_INST(SQ_OP3_INST_MULADD),
729                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
730                                 DST_GPR(2),
731                                 DST_REL(ABSOLUTE),
732                                 DST_ELEM(ELEM_X),
733                                 CLAMP(0));
734    /* r2.y = MAD(c0.w, r1.x, c0.y) */
735    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
736                             SRC0_REL(ABSOLUTE),
737                             SRC0_ELEM(ELEM_W),
738                             SRC0_NEG(0),
739                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
740                             SRC1_REL(ABSOLUTE),
741                             SRC1_ELEM(ELEM_X),
742                             SRC1_NEG(0),
743                             INDEX_MODE(SQ_INDEX_LOOP),
744                             PRED_SEL(SQ_PRED_SEL_OFF),
745                             LAST(0));
746    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
747                                 SRC2_REL(ABSOLUTE),
748                                 SRC2_ELEM(ELEM_Y),
749                                 SRC2_NEG(0),
750                                 ALU_INST(SQ_OP3_INST_MULADD),
751                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
752                                 DST_GPR(2),
753                                 DST_REL(ABSOLUTE),
754                                 DST_ELEM(ELEM_Y),
755                                 CLAMP(0));
756    /* r2.z = MAD(c0.w, r1.x, c0.z) */
757    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
758                             SRC0_REL(ABSOLUTE),
759                             SRC0_ELEM(ELEM_W),
760                             SRC0_NEG(0),
761                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
762                             SRC1_REL(ABSOLUTE),
763                             SRC1_ELEM(ELEM_X),
764                             SRC1_NEG(0),
765                             INDEX_MODE(SQ_INDEX_LOOP),
766                             PRED_SEL(SQ_PRED_SEL_OFF),
767                             LAST(0));
768    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
769                                 SRC2_REL(ABSOLUTE),
770                                 SRC2_ELEM(ELEM_Z),
771                                 SRC2_NEG(0),
772                                 ALU_INST(SQ_OP3_INST_MULADD),
773                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
774                                 DST_GPR(2),
775                                 DST_REL(ABSOLUTE),
776                                 DST_ELEM(ELEM_Z),
777                                 CLAMP(0));
778    /* r2.w = MAD(0, 0, 1) */
779    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
780                             SRC0_REL(ABSOLUTE),
781                             SRC0_ELEM(ELEM_X),
782                             SRC0_NEG(0),
783                             SRC1_SEL(SQ_ALU_SRC_0),
784                             SRC1_REL(ABSOLUTE),
785                             SRC1_ELEM(ELEM_X),
786                             SRC1_NEG(0),
787                             INDEX_MODE(SQ_INDEX_LOOP),
788                             PRED_SEL(SQ_PRED_SEL_OFF),
789                             LAST(1));
790    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
791                                 SRC2_REL(ABSOLUTE),
792                                 SRC2_ELEM(ELEM_X),
793                                 SRC2_NEG(0),
794                                 ALU_INST(SQ_OP3_INST_MULADD),
795                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
796                                 DST_GPR(2),
797                                 DST_REL(ABSOLUTE),
798                                 DST_ELEM(ELEM_W),
799                                 CLAMP(0));
800
801    /* 8,9,10,11 */
802    /* r2.x = MAD(c1.x, r1.y, pv.x) */
803    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
804                             SRC0_REL(ABSOLUTE),
805                             SRC0_ELEM(ELEM_X),
806                             SRC0_NEG(0),
807                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
808                             SRC1_REL(ABSOLUTE),
809                             SRC1_ELEM(ELEM_Y),
810                             SRC1_NEG(0),
811                             INDEX_MODE(SQ_INDEX_LOOP),
812                             PRED_SEL(SQ_PRED_SEL_OFF),
813                             LAST(0));
814    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
815                                 SRC2_REL(ABSOLUTE),
816                                 SRC2_ELEM(ELEM_X),
817                                 SRC2_NEG(0),
818                                 ALU_INST(SQ_OP3_INST_MULADD),
819                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
820                                 DST_GPR(2),
821                                 DST_REL(ABSOLUTE),
822                                 DST_ELEM(ELEM_X),
823                                 CLAMP(0));
824    /* r2.y = MAD(c1.y, r1.y, pv.y) */
825    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
826                             SRC0_REL(ABSOLUTE),
827                             SRC0_ELEM(ELEM_Y),
828                             SRC0_NEG(0),
829                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
830                             SRC1_REL(ABSOLUTE),
831                             SRC1_ELEM(ELEM_Y),
832                             SRC1_NEG(0),
833                             INDEX_MODE(SQ_INDEX_LOOP),
834                             PRED_SEL(SQ_PRED_SEL_OFF),
835                             LAST(0));
836    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
837                                 SRC2_REL(ABSOLUTE),
838                                 SRC2_ELEM(ELEM_Y),
839                                 SRC2_NEG(0),
840                                 ALU_INST(SQ_OP3_INST_MULADD),
841                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
842                                 DST_GPR(2),
843                                 DST_REL(ABSOLUTE),
844                                 DST_ELEM(ELEM_Y),
845                                 CLAMP(0));
846    /* r2.z = MAD(c1.z, r1.y, pv.z) */
847    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
848                             SRC0_REL(ABSOLUTE),
849                             SRC0_ELEM(ELEM_Z),
850                             SRC0_NEG(0),
851                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
852                             SRC1_REL(ABSOLUTE),
853                             SRC1_ELEM(ELEM_Y),
854                             SRC1_NEG(0),
855                             INDEX_MODE(SQ_INDEX_LOOP),
856                             PRED_SEL(SQ_PRED_SEL_OFF),
857                             LAST(0));
858    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
859                                 SRC2_REL(ABSOLUTE),
860                                 SRC2_ELEM(ELEM_Z),
861                                 SRC2_NEG(0),
862                                 ALU_INST(SQ_OP3_INST_MULADD),
863                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
864                                 DST_GPR(2),
865                                 DST_REL(ABSOLUTE),
866                                 DST_ELEM(ELEM_Z),
867                                 CLAMP(0));
868    /* r2.w = MAD(0, 0, 1) */
869    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
870                             SRC0_REL(ABSOLUTE),
871                             SRC0_ELEM(ELEM_X),
872                             SRC0_NEG(0),
873                             SRC1_SEL(SQ_ALU_SRC_0),
874                             SRC1_REL(ABSOLUTE),
875                             SRC1_ELEM(ELEM_X),
876                             SRC1_NEG(0),
877                             INDEX_MODE(SQ_INDEX_LOOP),
878                             PRED_SEL(SQ_PRED_SEL_OFF),
879                             LAST(1));
880    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
881                                 SRC2_REL(ABSOLUTE),
882                                 SRC2_ELEM(ELEM_W),
883                                 SRC2_NEG(0),
884                                 ALU_INST(SQ_OP3_INST_MULADD),
885                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
886                                 DST_GPR(2),
887                                 DST_REL(ABSOLUTE),
888                                 DST_ELEM(ELEM_W),
889                                 CLAMP(0));
890    /* 12,13,14,15 */
891    /* r2.x = MAD(c2.x, r1.z, pv.x) */
892    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
893                             SRC0_REL(ABSOLUTE),
894                             SRC0_ELEM(ELEM_X),
895                             SRC0_NEG(0),
896                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
897                             SRC1_REL(ABSOLUTE),
898                             SRC1_ELEM(ELEM_Z),
899                             SRC1_NEG(0),
900                             INDEX_MODE(SQ_INDEX_LOOP),
901                             PRED_SEL(SQ_PRED_SEL_OFF),
902                             LAST(0));
903    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
904                                 SRC2_REL(ABSOLUTE),
905                                 SRC2_ELEM(ELEM_X),
906                                 SRC2_NEG(0),
907                                 ALU_INST(SQ_OP3_INST_MULADD),
908                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
909                                 DST_GPR(2),
910                                 DST_REL(ABSOLUTE),
911                                 DST_ELEM(ELEM_X),
912                                 CLAMP(1));
913    /* r2.y = MAD(c2.y, r1.z, pv.y) */
914    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
915                             SRC0_REL(ABSOLUTE),
916                             SRC0_ELEM(ELEM_Y),
917                             SRC0_NEG(0),
918                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
919                             SRC1_REL(ABSOLUTE),
920                             SRC1_ELEM(ELEM_Z),
921                             SRC1_NEG(0),
922                             INDEX_MODE(SQ_INDEX_LOOP),
923                             PRED_SEL(SQ_PRED_SEL_OFF),
924                             LAST(0));
925    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
926                                 SRC2_REL(ABSOLUTE),
927                                 SRC2_ELEM(ELEM_Y),
928                                 SRC2_NEG(0),
929                                 ALU_INST(SQ_OP3_INST_MULADD),
930                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
931                                 DST_GPR(2),
932                                 DST_REL(ABSOLUTE),
933                                 DST_ELEM(ELEM_Y),
934                                 CLAMP(1));
935    /* r2.z = MAD(c2.z, r1.z, pv.z) */
936    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
937                             SRC0_REL(ABSOLUTE),
938                             SRC0_ELEM(ELEM_Z),
939                             SRC0_NEG(0),
940                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
941                             SRC1_REL(ABSOLUTE),
942                             SRC1_ELEM(ELEM_Z),
943                             SRC1_NEG(0),
944                             INDEX_MODE(SQ_INDEX_LOOP),
945                             PRED_SEL(SQ_PRED_SEL_OFF),
946                             LAST(0));
947    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
948                                 SRC2_REL(ABSOLUTE),
949                                 SRC2_ELEM(ELEM_Z),
950                                 SRC2_NEG(0),
951                                 ALU_INST(SQ_OP3_INST_MULADD),
952                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
953                                 DST_GPR(2),
954                                 DST_REL(ABSOLUTE),
955                                 DST_ELEM(ELEM_Z),
956                                 CLAMP(1));
957    /* r2.w = MAD(0, 0, 1) */
958    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
959                             SRC0_REL(ABSOLUTE),
960                             SRC0_ELEM(ELEM_X),
961                             SRC0_NEG(0),
962                             SRC1_SEL(SQ_ALU_SRC_0),
963                             SRC1_REL(ABSOLUTE),
964                             SRC1_ELEM(ELEM_X),
965                             SRC1_NEG(0),
966                             INDEX_MODE(SQ_INDEX_LOOP),
967                             PRED_SEL(SQ_PRED_SEL_OFF),
968                             LAST(1));
969    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
970                                 SRC2_REL(ABSOLUTE),
971                                 SRC2_ELEM(ELEM_X),
972                                 SRC2_NEG(0),
973                                 ALU_INST(SQ_OP3_INST_MULADD),
974                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
975                                 DST_GPR(2),
976                                 DST_REL(ABSOLUTE),
977                                 DST_ELEM(ELEM_W),
978                                 CLAMP(1));
979
980    /* 16 */
981    shader[i++] = CF_DWORD0(ADDR(18));
982    shader[i++] = CF_DWORD1(POP_COUNT(0),
983                            CF_CONST(0),
984                            COND(SQ_CF_COND_ACTIVE),
985                            I_COUNT(3),
986                            CALL_COUNT(0),
987                            END_OF_PROGRAM(0),
988                            VALID_PIXEL_MODE(0),
989                            CF_INST(SQ_CF_INST_TEX),
990                            WHOLE_QUAD_MODE(0),
991                            BARRIER(1));
992    /* 17 */
993    shader[i++] = CF_DWORD0(ADDR(0));
994    shader[i++] = CF_DWORD1(POP_COUNT(0),
995			    CF_CONST(0),
996			    COND(SQ_CF_COND_ACTIVE),
997			    I_COUNT(0),
998			    CALL_COUNT(0),
999			    END_OF_PROGRAM(0),
1000			    VALID_PIXEL_MODE(0),
1001			    CF_INST(SQ_CF_INST_RETURN),
1002			    WHOLE_QUAD_MODE(0),
1003			    BARRIER(1));
1004    /* 18/19 */
1005    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1006                             BC_FRAC_MODE(0),
1007                             FETCH_WHOLE_QUAD(0),
1008                             RESOURCE_ID(0),
1009                             SRC_GPR(0),
1010                             SRC_REL(ABSOLUTE),
1011                             R7xx_ALT_CONST(0));
1012    shader[i++] = TEX_DWORD1(DST_GPR(1),
1013                             DST_REL(ABSOLUTE),
1014                             DST_SEL_X(SQ_SEL_X),
1015                             DST_SEL_Y(SQ_SEL_MASK),
1016                             DST_SEL_Z(SQ_SEL_MASK),
1017                             DST_SEL_W(SQ_SEL_1),
1018                             LOD_BIAS(0),
1019                             COORD_TYPE_X(TEX_NORMALIZED),
1020                             COORD_TYPE_Y(TEX_NORMALIZED),
1021                             COORD_TYPE_Z(TEX_NORMALIZED),
1022                             COORD_TYPE_W(TEX_NORMALIZED));
1023    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1024                             OFFSET_Y(0),
1025                             OFFSET_Z(0),
1026                             SAMPLER_ID(0),
1027                             SRC_SEL_X(SQ_SEL_X),
1028                             SRC_SEL_Y(SQ_SEL_Y),
1029                             SRC_SEL_Z(SQ_SEL_0),
1030                             SRC_SEL_W(SQ_SEL_1));
1031    shader[i++] = TEX_DWORD_PAD;
1032    /* 20/21 */
1033    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1034                             BC_FRAC_MODE(0),
1035                             FETCH_WHOLE_QUAD(0),
1036                             RESOURCE_ID(1),
1037                             SRC_GPR(0),
1038                             SRC_REL(ABSOLUTE),
1039                             R7xx_ALT_CONST(0));
1040    shader[i++] = TEX_DWORD1(DST_GPR(1),
1041                             DST_REL(ABSOLUTE),
1042                             DST_SEL_X(SQ_SEL_MASK),
1043                             DST_SEL_Y(SQ_SEL_MASK),
1044                             DST_SEL_Z(SQ_SEL_X),
1045                             DST_SEL_W(SQ_SEL_MASK),
1046                             LOD_BIAS(0),
1047                             COORD_TYPE_X(TEX_NORMALIZED),
1048                             COORD_TYPE_Y(TEX_NORMALIZED),
1049                             COORD_TYPE_Z(TEX_NORMALIZED),
1050                             COORD_TYPE_W(TEX_NORMALIZED));
1051    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1052                             OFFSET_Y(0),
1053                             OFFSET_Z(0),
1054                             SAMPLER_ID(1),
1055                             SRC_SEL_X(SQ_SEL_X),
1056                             SRC_SEL_Y(SQ_SEL_Y),
1057                             SRC_SEL_Z(SQ_SEL_0),
1058                             SRC_SEL_W(SQ_SEL_1));
1059    shader[i++] = TEX_DWORD_PAD;
1060    /* 22/23 */
1061    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1062                             BC_FRAC_MODE(0),
1063                             FETCH_WHOLE_QUAD(0),
1064                             RESOURCE_ID(2),
1065                             SRC_GPR(0),
1066                             SRC_REL(ABSOLUTE),
1067                             R7xx_ALT_CONST(0));
1068    shader[i++] = TEX_DWORD1(DST_GPR(1),
1069                             DST_REL(ABSOLUTE),
1070                             DST_SEL_X(SQ_SEL_MASK),
1071                             DST_SEL_Y(SQ_SEL_X),
1072                             DST_SEL_Z(SQ_SEL_MASK),
1073                             DST_SEL_W(SQ_SEL_MASK),
1074                             LOD_BIAS(0),
1075                             COORD_TYPE_X(TEX_NORMALIZED),
1076                             COORD_TYPE_Y(TEX_NORMALIZED),
1077                             COORD_TYPE_Z(TEX_NORMALIZED),
1078                             COORD_TYPE_W(TEX_NORMALIZED));
1079    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1080                             OFFSET_Y(0),
1081                             OFFSET_Z(0),
1082                             SAMPLER_ID(2),
1083                             SRC_SEL_X(SQ_SEL_X),
1084                             SRC_SEL_Y(SQ_SEL_Y),
1085                             SRC_SEL_Z(SQ_SEL_0),
1086                             SRC_SEL_W(SQ_SEL_1));
1087    shader[i++] = TEX_DWORD_PAD;
1088    /* 24 */
1089    shader[i++] = CF_DWORD0(ADDR(26));
1090    shader[i++] = CF_DWORD1(POP_COUNT(0),
1091                            CF_CONST(0),
1092                            COND(SQ_CF_COND_ACTIVE),
1093                            I_COUNT(1),
1094                            CALL_COUNT(0),
1095                            END_OF_PROGRAM(0),
1096                            VALID_PIXEL_MODE(0),
1097                            CF_INST(SQ_CF_INST_TEX),
1098                            WHOLE_QUAD_MODE(0),
1099                            BARRIER(1));
1100    /* 25 */
1101    shader[i++] = CF_DWORD0(ADDR(0));
1102    shader[i++] = CF_DWORD1(POP_COUNT(0),
1103			    CF_CONST(0),
1104			    COND(SQ_CF_COND_ACTIVE),
1105			    I_COUNT(0),
1106			    CALL_COUNT(0),
1107			    END_OF_PROGRAM(0),
1108			    VALID_PIXEL_MODE(0),
1109			    CF_INST(SQ_CF_INST_RETURN),
1110			    WHOLE_QUAD_MODE(0),
1111			    BARRIER(1));
1112    /* 26/27 */
1113    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1114                             BC_FRAC_MODE(0),
1115                             FETCH_WHOLE_QUAD(0),
1116                             RESOURCE_ID(0),
1117                             SRC_GPR(0),
1118                             SRC_REL(ABSOLUTE),
1119                             R7xx_ALT_CONST(0));
1120    shader[i++] = TEX_DWORD1(DST_GPR(1),
1121                             DST_REL(ABSOLUTE),
1122                             DST_SEL_X(SQ_SEL_X),
1123                             DST_SEL_Y(SQ_SEL_Y),
1124                             DST_SEL_Z(SQ_SEL_Z),
1125                             DST_SEL_W(SQ_SEL_1),
1126                             LOD_BIAS(0),
1127                             COORD_TYPE_X(TEX_NORMALIZED),
1128                             COORD_TYPE_Y(TEX_NORMALIZED),
1129                             COORD_TYPE_Z(TEX_NORMALIZED),
1130                             COORD_TYPE_W(TEX_NORMALIZED));
1131    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1132                             OFFSET_Y(0),
1133                             OFFSET_Z(0),
1134                             SAMPLER_ID(0),
1135                             SRC_SEL_X(SQ_SEL_X),
1136                             SRC_SEL_Y(SQ_SEL_Y),
1137                             SRC_SEL_Z(SQ_SEL_0),
1138                             SRC_SEL_W(SQ_SEL_1));
1139    shader[i++] = TEX_DWORD_PAD;
1140
1141    return i;
1142}
1143
1144/* comp vs --------------------------------------- */
1145int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1146{
1147    int i = 0;
1148
1149    /* 0 */
1150    shader[i++] = CF_DWORD0(ADDR(3));
1151    shader[i++] = CF_DWORD1(POP_COUNT(0),
1152                            CF_CONST(0),
1153                            COND(SQ_CF_COND_BOOL),
1154                            I_COUNT(0),
1155                            CALL_COUNT(0),
1156                            END_OF_PROGRAM(0),
1157                            VALID_PIXEL_MODE(0),
1158                            CF_INST(SQ_CF_INST_CALL),
1159                            WHOLE_QUAD_MODE(0),
1160                            BARRIER(0));
1161    /* 1 */
1162    shader[i++] = CF_DWORD0(ADDR(9));
1163    shader[i++] = CF_DWORD1(POP_COUNT(0),
1164                            CF_CONST(0),
1165                            COND(SQ_CF_COND_NOT_BOOL),
1166                            I_COUNT(0),
1167                            CALL_COUNT(0),
1168                            END_OF_PROGRAM(0),
1169                            VALID_PIXEL_MODE(0),
1170                            CF_INST(SQ_CF_INST_CALL),
1171                            WHOLE_QUAD_MODE(0),
1172                            BARRIER(0));
1173    /* 2 */
1174    shader[i++] = CF_DWORD0(ADDR(0));
1175    shader[i++] = CF_DWORD1(POP_COUNT(0),
1176                            CF_CONST(0),
1177                            COND(SQ_CF_COND_ACTIVE),
1178                            I_COUNT(0),
1179                            CALL_COUNT(0),
1180                            END_OF_PROGRAM(1),
1181                            VALID_PIXEL_MODE(0),
1182                            CF_INST(SQ_CF_INST_NOP),
1183                            WHOLE_QUAD_MODE(0),
1184                            BARRIER(1));
1185    /* 3 - mask sub */
1186    shader[i++] = CF_DWORD0(ADDR(44));
1187    shader[i++] = CF_DWORD1(POP_COUNT(0),
1188			    CF_CONST(0),
1189			    COND(SQ_CF_COND_ACTIVE),
1190			    I_COUNT(3),
1191			    CALL_COUNT(0),
1192			    END_OF_PROGRAM(0),
1193			    VALID_PIXEL_MODE(0),
1194			    CF_INST(SQ_CF_INST_VTX),
1195			    WHOLE_QUAD_MODE(0),
1196			    BARRIER(1));
1197
1198    /* 4 - ALU */
1199    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1200				KCACHE_BANK0(0),
1201				KCACHE_BANK1(0),
1202				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1203    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1204				KCACHE_ADDR0(0),
1205				KCACHE_ADDR1(0),
1206				I_COUNT(20),
1207				USES_WATERFALL(0),
1208				CF_INST(SQ_CF_INST_ALU),
1209				WHOLE_QUAD_MODE(0),
1210				BARRIER(1));
1211
1212    /* 5 - dst */
1213    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1214					  TYPE(SQ_EXPORT_POS),
1215					  RW_GPR(2),
1216					  RW_REL(ABSOLUTE),
1217					  INDEX_GPR(0),
1218					  ELEM_SIZE(0));
1219    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1220					       SRC_SEL_Y(SQ_SEL_Y),
1221					       SRC_SEL_Z(SQ_SEL_0),
1222					       SRC_SEL_W(SQ_SEL_1),
1223					       R6xx_ELEM_LOOP(0),
1224					       BURST_COUNT(1),
1225					       END_OF_PROGRAM(0),
1226					       VALID_PIXEL_MODE(0),
1227					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1228					       WHOLE_QUAD_MODE(0),
1229					       BARRIER(1));
1230    /* 6 - src */
1231    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1232					  TYPE(SQ_EXPORT_PARAM),
1233					  RW_GPR(1),
1234					  RW_REL(ABSOLUTE),
1235					  INDEX_GPR(0),
1236					  ELEM_SIZE(0));
1237    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1238					       SRC_SEL_Y(SQ_SEL_Y),
1239					       SRC_SEL_Z(SQ_SEL_0),
1240					       SRC_SEL_W(SQ_SEL_1),
1241					       R6xx_ELEM_LOOP(0),
1242					       BURST_COUNT(1),
1243					       END_OF_PROGRAM(0),
1244					       VALID_PIXEL_MODE(0),
1245					       CF_INST(SQ_CF_INST_EXPORT),
1246					       WHOLE_QUAD_MODE(0),
1247					       BARRIER(0));
1248    /* 7 - mask */
1249    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1250					  TYPE(SQ_EXPORT_PARAM),
1251					  RW_GPR(0),
1252					  RW_REL(ABSOLUTE),
1253					  INDEX_GPR(0),
1254					  ELEM_SIZE(0));
1255    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1256					       SRC_SEL_Y(SQ_SEL_Y),
1257					       SRC_SEL_Z(SQ_SEL_0),
1258					       SRC_SEL_W(SQ_SEL_1),
1259					       R6xx_ELEM_LOOP(0),
1260					       BURST_COUNT(1),
1261					       END_OF_PROGRAM(0),
1262					       VALID_PIXEL_MODE(0),
1263					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1264					       WHOLE_QUAD_MODE(0),
1265					       BARRIER(0));
1266    /* 8 */
1267    shader[i++] = CF_DWORD0(ADDR(0));
1268    shader[i++] = CF_DWORD1(POP_COUNT(0),
1269			    CF_CONST(0),
1270			    COND(SQ_CF_COND_ACTIVE),
1271			    I_COUNT(0),
1272			    CALL_COUNT(0),
1273			    END_OF_PROGRAM(0),
1274			    VALID_PIXEL_MODE(0),
1275			    CF_INST(SQ_CF_INST_RETURN),
1276			    WHOLE_QUAD_MODE(0),
1277			    BARRIER(1));
1278    /* 9 - non-mask sub */
1279    shader[i++] = CF_DWORD0(ADDR(50));
1280    shader[i++] = CF_DWORD1(POP_COUNT(0),
1281			    CF_CONST(0),
1282			    COND(SQ_CF_COND_ACTIVE),
1283			    I_COUNT(2),
1284			    CALL_COUNT(0),
1285			    END_OF_PROGRAM(0),
1286			    VALID_PIXEL_MODE(0),
1287			    CF_INST(SQ_CF_INST_VTX),
1288			    WHOLE_QUAD_MODE(0),
1289			    BARRIER(1));
1290
1291    /* 10 - ALU */
1292    shader[i++] = CF_ALU_DWORD0(ADDR(34),
1293				KCACHE_BANK0(0),
1294				KCACHE_BANK1(0),
1295				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1296    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1297				KCACHE_ADDR0(0),
1298				KCACHE_ADDR1(0),
1299				I_COUNT(10),
1300				USES_WATERFALL(0),
1301				CF_INST(SQ_CF_INST_ALU),
1302				WHOLE_QUAD_MODE(0),
1303				BARRIER(1));
1304
1305    /* 11 - dst */
1306    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1307					  TYPE(SQ_EXPORT_POS),
1308					  RW_GPR(1),
1309					  RW_REL(ABSOLUTE),
1310					  INDEX_GPR(0),
1311					  ELEM_SIZE(0));
1312    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1313					       SRC_SEL_Y(SQ_SEL_Y),
1314					       SRC_SEL_Z(SQ_SEL_0),
1315					       SRC_SEL_W(SQ_SEL_1),
1316					       R6xx_ELEM_LOOP(0),
1317					       BURST_COUNT(0),
1318					       END_OF_PROGRAM(0),
1319					       VALID_PIXEL_MODE(0),
1320					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1321					       WHOLE_QUAD_MODE(0),
1322					       BARRIER(1));
1323    /* 12 - src */
1324    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1325					  TYPE(SQ_EXPORT_PARAM),
1326					  RW_GPR(0),
1327					  RW_REL(ABSOLUTE),
1328					  INDEX_GPR(0),
1329					  ELEM_SIZE(0));
1330    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1331					       SRC_SEL_Y(SQ_SEL_Y),
1332					       SRC_SEL_Z(SQ_SEL_0),
1333					       SRC_SEL_W(SQ_SEL_1),
1334					       R6xx_ELEM_LOOP(0),
1335					       BURST_COUNT(0),
1336					       END_OF_PROGRAM(0),
1337					       VALID_PIXEL_MODE(0),
1338					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1339					       WHOLE_QUAD_MODE(0),
1340					       BARRIER(0));
1341    /* 13 */
1342    shader[i++] = CF_DWORD0(ADDR(0));
1343    shader[i++] = CF_DWORD1(POP_COUNT(0),
1344			    CF_CONST(0),
1345			    COND(SQ_CF_COND_ACTIVE),
1346			    I_COUNT(0),
1347			    CALL_COUNT(0),
1348			    END_OF_PROGRAM(0),
1349			    VALID_PIXEL_MODE(0),
1350			    CF_INST(SQ_CF_INST_RETURN),
1351			    WHOLE_QUAD_MODE(0),
1352			    BARRIER(1));
1353
1354
1355    /* 14 srcX.x DOT4 - mask */
1356    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1357                             SRC0_REL(ABSOLUTE),
1358                             SRC0_ELEM(ELEM_X),
1359                             SRC0_NEG(0),
1360                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1361                             SRC1_REL(ABSOLUTE),
1362                             SRC1_ELEM(ELEM_X),
1363                             SRC1_NEG(0),
1364                             INDEX_MODE(SQ_INDEX_LOOP),
1365                             PRED_SEL(SQ_PRED_SEL_OFF),
1366                             LAST(0));
1367    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1368                                 SRC0_ABS(0),
1369                                 SRC1_ABS(0),
1370                                 UPDATE_EXECUTE_MASK(0),
1371                                 UPDATE_PRED(0),
1372                                 WRITE_MASK(1),
1373                                 FOG_MERGE(0),
1374                                 OMOD(SQ_ALU_OMOD_OFF),
1375                                 ALU_INST(SQ_OP2_INST_DOT4),
1376                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1377                                 DST_GPR(3),
1378                                 DST_REL(ABSOLUTE),
1379                                 DST_ELEM(ELEM_X),
1380                                 CLAMP(0));
1381
1382    /* 15 srcX.y DOT4 - mask */
1383    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1384                             SRC0_REL(ABSOLUTE),
1385                             SRC0_ELEM(ELEM_Y),
1386                             SRC0_NEG(0),
1387                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1388                             SRC1_REL(ABSOLUTE),
1389                             SRC1_ELEM(ELEM_Y),
1390                             SRC1_NEG(0),
1391                             INDEX_MODE(SQ_INDEX_LOOP),
1392                             PRED_SEL(SQ_PRED_SEL_OFF),
1393                             LAST(0));
1394    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1395                                 SRC0_ABS(0),
1396                                 SRC1_ABS(0),
1397                                 UPDATE_EXECUTE_MASK(0),
1398                                 UPDATE_PRED(0),
1399                                 WRITE_MASK(0),
1400                                 FOG_MERGE(0),
1401                                 OMOD(SQ_ALU_OMOD_OFF),
1402                                 ALU_INST(SQ_OP2_INST_DOT4),
1403                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1404                                 DST_GPR(3),
1405                                 DST_REL(ABSOLUTE),
1406                                 DST_ELEM(ELEM_Y),
1407                                 CLAMP(0));
1408
1409    /* 16 srcX.z DOT4 - mask */
1410    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1411                             SRC0_REL(ABSOLUTE),
1412                             SRC0_ELEM(ELEM_Z),
1413                             SRC0_NEG(0),
1414                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1415                             SRC1_REL(ABSOLUTE),
1416                             SRC1_ELEM(ELEM_Z),
1417                             SRC1_NEG(0),
1418                             INDEX_MODE(SQ_INDEX_LOOP),
1419                             PRED_SEL(SQ_PRED_SEL_OFF),
1420                             LAST(0));
1421    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1422                                 SRC0_ABS(0),
1423                                 SRC1_ABS(0),
1424                                 UPDATE_EXECUTE_MASK(0),
1425                                 UPDATE_PRED(0),
1426                                 WRITE_MASK(0),
1427                                 FOG_MERGE(0),
1428                                 OMOD(SQ_ALU_OMOD_OFF),
1429                                 ALU_INST(SQ_OP2_INST_DOT4),
1430                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1431                                 DST_GPR(3),
1432                                 DST_REL(ABSOLUTE),
1433                                 DST_ELEM(ELEM_Z),
1434                                 CLAMP(0));
1435
1436    /* 17 srcX.w DOT4 - mask */
1437    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1438                             SRC0_REL(ABSOLUTE),
1439                             SRC0_ELEM(ELEM_W),
1440                             SRC0_NEG(0),
1441                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1442                             SRC1_REL(ABSOLUTE),
1443                             SRC1_ELEM(ELEM_W),
1444                             SRC1_NEG(0),
1445                             INDEX_MODE(SQ_INDEX_LOOP),
1446                             PRED_SEL(SQ_PRED_SEL_OFF),
1447                             LAST(1));
1448    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1449                                 SRC0_ABS(0),
1450                                 SRC1_ABS(0),
1451                                 UPDATE_EXECUTE_MASK(0),
1452                                 UPDATE_PRED(0),
1453                                 WRITE_MASK(0),
1454                                 FOG_MERGE(0),
1455                                 OMOD(SQ_ALU_OMOD_OFF),
1456                                 ALU_INST(SQ_OP2_INST_DOT4),
1457                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1458                                 DST_GPR(3),
1459                                 DST_REL(ABSOLUTE),
1460                                 DST_ELEM(ELEM_W),
1461                                 CLAMP(0));
1462
1463    /* 18 srcY.x DOT4 - mask */
1464    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1465                             SRC0_REL(ABSOLUTE),
1466                             SRC0_ELEM(ELEM_X),
1467                             SRC0_NEG(0),
1468                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1469                             SRC1_REL(ABSOLUTE),
1470                             SRC1_ELEM(ELEM_X),
1471                             SRC1_NEG(0),
1472                             INDEX_MODE(SQ_INDEX_LOOP),
1473                             PRED_SEL(SQ_PRED_SEL_OFF),
1474                             LAST(0));
1475    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1476                                 SRC0_ABS(0),
1477                                 SRC1_ABS(0),
1478                                 UPDATE_EXECUTE_MASK(0),
1479                                 UPDATE_PRED(0),
1480                                 WRITE_MASK(0),
1481                                 FOG_MERGE(0),
1482                                 OMOD(SQ_ALU_OMOD_OFF),
1483                                 ALU_INST(SQ_OP2_INST_DOT4),
1484                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1485                                 DST_GPR(3),
1486                                 DST_REL(ABSOLUTE),
1487                                 DST_ELEM(ELEM_X),
1488                                 CLAMP(0));
1489
1490    /* 19 srcY.y DOT4 - mask */
1491    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1492                             SRC0_REL(ABSOLUTE),
1493                             SRC0_ELEM(ELEM_Y),
1494                             SRC0_NEG(0),
1495                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1496                             SRC1_REL(ABSOLUTE),
1497                             SRC1_ELEM(ELEM_Y),
1498                             SRC1_NEG(0),
1499                             INDEX_MODE(SQ_INDEX_LOOP),
1500                             PRED_SEL(SQ_PRED_SEL_OFF),
1501                             LAST(0));
1502    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1503                                 SRC0_ABS(0),
1504                                 SRC1_ABS(0),
1505                                 UPDATE_EXECUTE_MASK(0),
1506                                 UPDATE_PRED(0),
1507                                 WRITE_MASK(1),
1508                                 FOG_MERGE(0),
1509                                 OMOD(SQ_ALU_OMOD_OFF),
1510                                 ALU_INST(SQ_OP2_INST_DOT4),
1511                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1512                                 DST_GPR(3),
1513                                 DST_REL(ABSOLUTE),
1514                                 DST_ELEM(ELEM_Y),
1515                                 CLAMP(0));
1516
1517    /* 20 srcY.z DOT4 - mask */
1518    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1519                             SRC0_REL(ABSOLUTE),
1520                             SRC0_ELEM(ELEM_Z),
1521                             SRC0_NEG(0),
1522                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1523                             SRC1_REL(ABSOLUTE),
1524                             SRC1_ELEM(ELEM_Z),
1525                             SRC1_NEG(0),
1526                             INDEX_MODE(SQ_INDEX_LOOP),
1527                             PRED_SEL(SQ_PRED_SEL_OFF),
1528                             LAST(0));
1529    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1530                                 SRC0_ABS(0),
1531                                 SRC1_ABS(0),
1532                                 UPDATE_EXECUTE_MASK(0),
1533                                 UPDATE_PRED(0),
1534                                 WRITE_MASK(0),
1535                                 FOG_MERGE(0),
1536                                 OMOD(SQ_ALU_OMOD_OFF),
1537                                 ALU_INST(SQ_OP2_INST_DOT4),
1538                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1539                                 DST_GPR(3),
1540                                 DST_REL(ABSOLUTE),
1541                                 DST_ELEM(ELEM_Z),
1542                                 CLAMP(0));
1543
1544    /* 21 srcY.w DOT4 - mask */
1545    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1546                             SRC0_REL(ABSOLUTE),
1547                             SRC0_ELEM(ELEM_W),
1548                             SRC0_NEG(0),
1549                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1550                             SRC1_REL(ABSOLUTE),
1551                             SRC1_ELEM(ELEM_W),
1552                             SRC1_NEG(0),
1553                             INDEX_MODE(SQ_INDEX_LOOP),
1554                             PRED_SEL(SQ_PRED_SEL_OFF),
1555                             LAST(1));
1556    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1557                                 SRC0_ABS(0),
1558                                 SRC1_ABS(0),
1559                                 UPDATE_EXECUTE_MASK(0),
1560                                 UPDATE_PRED(0),
1561                                 WRITE_MASK(0),
1562                                 FOG_MERGE(0),
1563                                 OMOD(SQ_ALU_OMOD_OFF),
1564                                 ALU_INST(SQ_OP2_INST_DOT4),
1565                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1566                                 DST_GPR(3),
1567                                 DST_REL(ABSOLUTE),
1568                                 DST_ELEM(ELEM_W),
1569                                 CLAMP(0));
1570
1571    /* 22 maskX.x DOT4 - mask */
1572    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1573                             SRC0_REL(ABSOLUTE),
1574                             SRC0_ELEM(ELEM_X),
1575                             SRC0_NEG(0),
1576                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1577                             SRC1_REL(ABSOLUTE),
1578                             SRC1_ELEM(ELEM_X),
1579                             SRC1_NEG(0),
1580                             INDEX_MODE(SQ_INDEX_LOOP),
1581                             PRED_SEL(SQ_PRED_SEL_OFF),
1582                             LAST(0));
1583    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1584                                 SRC0_ABS(0),
1585                                 SRC1_ABS(0),
1586                                 UPDATE_EXECUTE_MASK(0),
1587                                 UPDATE_PRED(0),
1588                                 WRITE_MASK(1),
1589                                 FOG_MERGE(0),
1590                                 OMOD(SQ_ALU_OMOD_OFF),
1591                                 ALU_INST(SQ_OP2_INST_DOT4),
1592                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1593                                 DST_GPR(4),
1594                                 DST_REL(ABSOLUTE),
1595                                 DST_ELEM(ELEM_X),
1596                                 CLAMP(0));
1597
1598    /* 23 maskX.y DOT4 - mask */
1599    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1600                             SRC0_REL(ABSOLUTE),
1601                             SRC0_ELEM(ELEM_Y),
1602                             SRC0_NEG(0),
1603                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1604                             SRC1_REL(ABSOLUTE),
1605                             SRC1_ELEM(ELEM_Y),
1606                             SRC1_NEG(0),
1607                             INDEX_MODE(SQ_INDEX_LOOP),
1608                             PRED_SEL(SQ_PRED_SEL_OFF),
1609                             LAST(0));
1610    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1611                                 SRC0_ABS(0),
1612                                 SRC1_ABS(0),
1613                                 UPDATE_EXECUTE_MASK(0),
1614                                 UPDATE_PRED(0),
1615                                 WRITE_MASK(0),
1616                                 FOG_MERGE(0),
1617                                 OMOD(SQ_ALU_OMOD_OFF),
1618                                 ALU_INST(SQ_OP2_INST_DOT4),
1619                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1620                                 DST_GPR(4),
1621                                 DST_REL(ABSOLUTE),
1622                                 DST_ELEM(ELEM_Y),
1623                                 CLAMP(0));
1624
1625    /* 24 maskX.z DOT4 - mask */
1626    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1627                             SRC0_REL(ABSOLUTE),
1628                             SRC0_ELEM(ELEM_Z),
1629                             SRC0_NEG(0),
1630                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1631                             SRC1_REL(ABSOLUTE),
1632                             SRC1_ELEM(ELEM_Z),
1633                             SRC1_NEG(0),
1634                             INDEX_MODE(SQ_INDEX_LOOP),
1635                             PRED_SEL(SQ_PRED_SEL_OFF),
1636                             LAST(0));
1637    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1638                                 SRC0_ABS(0),
1639                                 SRC1_ABS(0),
1640                                 UPDATE_EXECUTE_MASK(0),
1641                                 UPDATE_PRED(0),
1642                                 WRITE_MASK(0),
1643                                 FOG_MERGE(0),
1644                                 OMOD(SQ_ALU_OMOD_OFF),
1645                                 ALU_INST(SQ_OP2_INST_DOT4),
1646                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1647                                 DST_GPR(4),
1648                                 DST_REL(ABSOLUTE),
1649                                 DST_ELEM(ELEM_Z),
1650                                 CLAMP(0));
1651
1652    /* 25 maskX.w DOT4 - mask */
1653    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1654                             SRC0_REL(ABSOLUTE),
1655                             SRC0_ELEM(ELEM_W),
1656                             SRC0_NEG(0),
1657                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1658                             SRC1_REL(ABSOLUTE),
1659                             SRC1_ELEM(ELEM_W),
1660                             SRC1_NEG(0),
1661                             INDEX_MODE(SQ_INDEX_LOOP),
1662                             PRED_SEL(SQ_PRED_SEL_OFF),
1663                             LAST(1));
1664    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1665                                 SRC0_ABS(0),
1666                                 SRC1_ABS(0),
1667                                 UPDATE_EXECUTE_MASK(0),
1668                                 UPDATE_PRED(0),
1669                                 WRITE_MASK(0),
1670                                 FOG_MERGE(0),
1671                                 OMOD(SQ_ALU_OMOD_OFF),
1672                                 ALU_INST(SQ_OP2_INST_DOT4),
1673                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1674                                 DST_GPR(4),
1675                                 DST_REL(ABSOLUTE),
1676                                 DST_ELEM(ELEM_W),
1677                                 CLAMP(0));
1678
1679    /* 26 maskY.x DOT4 - mask */
1680    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1681                             SRC0_REL(ABSOLUTE),
1682                             SRC0_ELEM(ELEM_X),
1683                             SRC0_NEG(0),
1684                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1685                             SRC1_REL(ABSOLUTE),
1686                             SRC1_ELEM(ELEM_X),
1687                             SRC1_NEG(0),
1688                             INDEX_MODE(SQ_INDEX_LOOP),
1689                             PRED_SEL(SQ_PRED_SEL_OFF),
1690                             LAST(0));
1691    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1692                                 SRC0_ABS(0),
1693                                 SRC1_ABS(0),
1694                                 UPDATE_EXECUTE_MASK(0),
1695                                 UPDATE_PRED(0),
1696                                 WRITE_MASK(0),
1697                                 FOG_MERGE(0),
1698                                 OMOD(SQ_ALU_OMOD_OFF),
1699                                 ALU_INST(SQ_OP2_INST_DOT4),
1700                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1701                                 DST_GPR(4),
1702                                 DST_REL(ABSOLUTE),
1703                                 DST_ELEM(ELEM_X),
1704                                 CLAMP(0));
1705
1706    /* 27 maskY.y DOT4 - mask */
1707    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1708                             SRC0_REL(ABSOLUTE),
1709                             SRC0_ELEM(ELEM_Y),
1710                             SRC0_NEG(0),
1711                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1712                             SRC1_REL(ABSOLUTE),
1713                             SRC1_ELEM(ELEM_Y),
1714                             SRC1_NEG(0),
1715                             INDEX_MODE(SQ_INDEX_LOOP),
1716                             PRED_SEL(SQ_PRED_SEL_OFF),
1717                             LAST(0));
1718    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1719                                 SRC0_ABS(0),
1720                                 SRC1_ABS(0),
1721                                 UPDATE_EXECUTE_MASK(0),
1722                                 UPDATE_PRED(0),
1723                                 WRITE_MASK(1),
1724                                 FOG_MERGE(0),
1725                                 OMOD(SQ_ALU_OMOD_OFF),
1726                                 ALU_INST(SQ_OP2_INST_DOT4),
1727                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1728                                 DST_GPR(4),
1729                                 DST_REL(ABSOLUTE),
1730                                 DST_ELEM(ELEM_Y),
1731                                 CLAMP(0));
1732
1733    /* 28 maskY.z DOT4 - mask */
1734    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1735                             SRC0_REL(ABSOLUTE),
1736                             SRC0_ELEM(ELEM_Z),
1737                             SRC0_NEG(0),
1738                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1739                             SRC1_REL(ABSOLUTE),
1740                             SRC1_ELEM(ELEM_Z),
1741                             SRC1_NEG(0),
1742                             INDEX_MODE(SQ_INDEX_LOOP),
1743                             PRED_SEL(SQ_PRED_SEL_OFF),
1744                             LAST(0));
1745    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1746                                 SRC0_ABS(0),
1747                                 SRC1_ABS(0),
1748                                 UPDATE_EXECUTE_MASK(0),
1749                                 UPDATE_PRED(0),
1750                                 WRITE_MASK(0),
1751                                 FOG_MERGE(0),
1752                                 OMOD(SQ_ALU_OMOD_OFF),
1753                                 ALU_INST(SQ_OP2_INST_DOT4),
1754                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1755                                 DST_GPR(4),
1756                                 DST_REL(ABSOLUTE),
1757                                 DST_ELEM(ELEM_Z),
1758                                 CLAMP(0));
1759
1760    /* 29 maskY.w DOT4 - mask */
1761    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1762                             SRC0_REL(ABSOLUTE),
1763                             SRC0_ELEM(ELEM_W),
1764                             SRC0_NEG(0),
1765                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1766                             SRC1_REL(ABSOLUTE),
1767                             SRC1_ELEM(ELEM_W),
1768                             SRC1_NEG(0),
1769                             INDEX_MODE(SQ_INDEX_LOOP),
1770                             PRED_SEL(SQ_PRED_SEL_OFF),
1771                             LAST(1));
1772    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1773                                 SRC0_ABS(0),
1774                                 SRC1_ABS(0),
1775                                 UPDATE_EXECUTE_MASK(0),
1776                                 UPDATE_PRED(0),
1777                                 WRITE_MASK(0),
1778                                 FOG_MERGE(0),
1779                                 OMOD(SQ_ALU_OMOD_OFF),
1780                                 ALU_INST(SQ_OP2_INST_DOT4),
1781                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1782                                 DST_GPR(4),
1783                                 DST_REL(ABSOLUTE),
1784                                 DST_ELEM(ELEM_W),
1785                                 CLAMP(0));
1786
1787    /* 30 srcX / w */
1788    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1789                             SRC0_REL(ABSOLUTE),
1790                             SRC0_ELEM(ELEM_X),
1791                             SRC0_NEG(0),
1792                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1793                             SRC1_REL(ABSOLUTE),
1794                             SRC1_ELEM(ELEM_W),
1795                             SRC1_NEG(0),
1796                             INDEX_MODE(SQ_INDEX_AR_X),
1797                             PRED_SEL(SQ_PRED_SEL_OFF),
1798                             LAST(1));
1799    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1800                                 SRC0_ABS(0),
1801                                 SRC1_ABS(0),
1802                                 UPDATE_EXECUTE_MASK(0),
1803                                 UPDATE_PRED(0),
1804                                 WRITE_MASK(1),
1805                                 FOG_MERGE(0),
1806                                 OMOD(SQ_ALU_OMOD_OFF),
1807                                 ALU_INST(SQ_OP2_INST_MUL),
1808                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1809                                 DST_GPR(1),
1810                                 DST_REL(ABSOLUTE),
1811                                 DST_ELEM(ELEM_X),
1812                                 CLAMP(0));
1813
1814    /* 31 srcY / h */
1815    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1816                             SRC0_REL(ABSOLUTE),
1817                             SRC0_ELEM(ELEM_Y),
1818                             SRC0_NEG(0),
1819                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1820                             SRC1_REL(ABSOLUTE),
1821                             SRC1_ELEM(ELEM_W),
1822                             SRC1_NEG(0),
1823                             INDEX_MODE(SQ_INDEX_AR_X),
1824                             PRED_SEL(SQ_PRED_SEL_OFF),
1825                             LAST(1));
1826    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1827                                 SRC0_ABS(0),
1828                                 SRC1_ABS(0),
1829                                 UPDATE_EXECUTE_MASK(0),
1830                                 UPDATE_PRED(0),
1831                                 WRITE_MASK(1),
1832                                 FOG_MERGE(0),
1833                                 OMOD(SQ_ALU_OMOD_OFF),
1834                                 ALU_INST(SQ_OP2_INST_MUL),
1835                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1836                                 DST_GPR(1),
1837                                 DST_REL(ABSOLUTE),
1838                                 DST_ELEM(ELEM_Y),
1839                                 CLAMP(0));
1840
1841    /* 32 maskX / w */
1842    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1843                             SRC0_REL(ABSOLUTE),
1844                             SRC0_ELEM(ELEM_X),
1845                             SRC0_NEG(0),
1846                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1847                             SRC1_REL(ABSOLUTE),
1848                             SRC1_ELEM(ELEM_W),
1849                             SRC1_NEG(0),
1850                             INDEX_MODE(SQ_INDEX_AR_X),
1851                             PRED_SEL(SQ_PRED_SEL_OFF),
1852                             LAST(1));
1853    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1854                                 SRC0_ABS(0),
1855                                 SRC1_ABS(0),
1856                                 UPDATE_EXECUTE_MASK(0),
1857                                 UPDATE_PRED(0),
1858                                 WRITE_MASK(1),
1859                                 FOG_MERGE(0),
1860                                 OMOD(SQ_ALU_OMOD_OFF),
1861                                 ALU_INST(SQ_OP2_INST_MUL),
1862                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1863                                 DST_GPR(0),
1864                                 DST_REL(ABSOLUTE),
1865                                 DST_ELEM(ELEM_X),
1866                                 CLAMP(0));
1867
1868    /* 33 maskY / h */
1869    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1870                             SRC0_REL(ABSOLUTE),
1871                             SRC0_ELEM(ELEM_Y),
1872                             SRC0_NEG(0),
1873                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1874                             SRC1_REL(ABSOLUTE),
1875                             SRC1_ELEM(ELEM_W),
1876                             SRC1_NEG(0),
1877                             INDEX_MODE(SQ_INDEX_AR_X),
1878                             PRED_SEL(SQ_PRED_SEL_OFF),
1879                             LAST(1));
1880    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1881                                 SRC0_ABS(0),
1882                                 SRC1_ABS(0),
1883                                 UPDATE_EXECUTE_MASK(0),
1884                                 UPDATE_PRED(0),
1885                                 WRITE_MASK(1),
1886                                 FOG_MERGE(0),
1887                                 OMOD(SQ_ALU_OMOD_OFF),
1888                                 ALU_INST(SQ_OP2_INST_MUL),
1889                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1890                                 DST_GPR(0),
1891                                 DST_REL(ABSOLUTE),
1892                                 DST_ELEM(ELEM_Y),
1893                                 CLAMP(0));
1894
1895    /* 34 srcX.x DOT4 - non-mask */
1896    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1897                             SRC0_REL(ABSOLUTE),
1898                             SRC0_ELEM(ELEM_X),
1899                             SRC0_NEG(0),
1900                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1901                             SRC1_REL(ABSOLUTE),
1902                             SRC1_ELEM(ELEM_X),
1903                             SRC1_NEG(0),
1904                             INDEX_MODE(SQ_INDEX_LOOP),
1905                             PRED_SEL(SQ_PRED_SEL_OFF),
1906                             LAST(0));
1907    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1908                                 SRC0_ABS(0),
1909                                 SRC1_ABS(0),
1910                                 UPDATE_EXECUTE_MASK(0),
1911                                 UPDATE_PRED(0),
1912                                 WRITE_MASK(1),
1913                                 FOG_MERGE(0),
1914                                 OMOD(SQ_ALU_OMOD_OFF),
1915                                 ALU_INST(SQ_OP2_INST_DOT4),
1916                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1917                                 DST_GPR(2),
1918                                 DST_REL(ABSOLUTE),
1919                                 DST_ELEM(ELEM_X),
1920                                 CLAMP(0));
1921
1922    /* 35 srcX.y DOT4 - non-mask */
1923    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1924                             SRC0_REL(ABSOLUTE),
1925                             SRC0_ELEM(ELEM_Y),
1926                             SRC0_NEG(0),
1927                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1928                             SRC1_REL(ABSOLUTE),
1929                             SRC1_ELEM(ELEM_Y),
1930                             SRC1_NEG(0),
1931                             INDEX_MODE(SQ_INDEX_LOOP),
1932                             PRED_SEL(SQ_PRED_SEL_OFF),
1933                             LAST(0));
1934    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1935                                 SRC0_ABS(0),
1936                                 SRC1_ABS(0),
1937                                 UPDATE_EXECUTE_MASK(0),
1938                                 UPDATE_PRED(0),
1939                                 WRITE_MASK(0),
1940                                 FOG_MERGE(0),
1941                                 OMOD(SQ_ALU_OMOD_OFF),
1942                                 ALU_INST(SQ_OP2_INST_DOT4),
1943                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1944                                 DST_GPR(2),
1945                                 DST_REL(ABSOLUTE),
1946                                 DST_ELEM(ELEM_Y),
1947                                 CLAMP(0));
1948
1949    /* 36 srcX.z DOT4 - non-mask */
1950    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1951                             SRC0_REL(ABSOLUTE),
1952                             SRC0_ELEM(ELEM_Z),
1953                             SRC0_NEG(0),
1954                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1955                             SRC1_REL(ABSOLUTE),
1956                             SRC1_ELEM(ELEM_Z),
1957                             SRC1_NEG(0),
1958                             INDEX_MODE(SQ_INDEX_LOOP),
1959                             PRED_SEL(SQ_PRED_SEL_OFF),
1960                             LAST(0));
1961    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1962                                 SRC0_ABS(0),
1963                                 SRC1_ABS(0),
1964                                 UPDATE_EXECUTE_MASK(0),
1965                                 UPDATE_PRED(0),
1966                                 WRITE_MASK(0),
1967                                 FOG_MERGE(0),
1968                                 OMOD(SQ_ALU_OMOD_OFF),
1969                                 ALU_INST(SQ_OP2_INST_DOT4),
1970                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1971                                 DST_GPR(2),
1972                                 DST_REL(ABSOLUTE),
1973                                 DST_ELEM(ELEM_Z),
1974                                 CLAMP(0));
1975
1976    /* 37 srcX.w DOT4 - non-mask */
1977    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1978                             SRC0_REL(ABSOLUTE),
1979                             SRC0_ELEM(ELEM_W),
1980                             SRC0_NEG(0),
1981                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1982                             SRC1_REL(ABSOLUTE),
1983                             SRC1_ELEM(ELEM_W),
1984                             SRC1_NEG(0),
1985                             INDEX_MODE(SQ_INDEX_LOOP),
1986                             PRED_SEL(SQ_PRED_SEL_OFF),
1987                             LAST(1));
1988    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1989                                 SRC0_ABS(0),
1990                                 SRC1_ABS(0),
1991                                 UPDATE_EXECUTE_MASK(0),
1992                                 UPDATE_PRED(0),
1993                                 WRITE_MASK(0),
1994                                 FOG_MERGE(0),
1995                                 OMOD(SQ_ALU_OMOD_OFF),
1996                                 ALU_INST(SQ_OP2_INST_DOT4),
1997                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1998                                 DST_GPR(2),
1999                                 DST_REL(ABSOLUTE),
2000                                 DST_ELEM(ELEM_W),
2001                                 CLAMP(0));
2002
2003    /* 38 srcY.x DOT4 - non-mask */
2004    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2005                             SRC0_REL(ABSOLUTE),
2006                             SRC0_ELEM(ELEM_X),
2007                             SRC0_NEG(0),
2008                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2009                             SRC1_REL(ABSOLUTE),
2010                             SRC1_ELEM(ELEM_X),
2011                             SRC1_NEG(0),
2012                             INDEX_MODE(SQ_INDEX_LOOP),
2013                             PRED_SEL(SQ_PRED_SEL_OFF),
2014                             LAST(0));
2015    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2016                                 SRC0_ABS(0),
2017                                 SRC1_ABS(0),
2018                                 UPDATE_EXECUTE_MASK(0),
2019                                 UPDATE_PRED(0),
2020                                 WRITE_MASK(0),
2021                                 FOG_MERGE(0),
2022                                 OMOD(SQ_ALU_OMOD_OFF),
2023                                 ALU_INST(SQ_OP2_INST_DOT4),
2024                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2025                                 DST_GPR(2),
2026                                 DST_REL(ABSOLUTE),
2027                                 DST_ELEM(ELEM_X),
2028                                 CLAMP(0));
2029
2030    /* 39 srcY.y DOT4 - non-mask */
2031    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2032                             SRC0_REL(ABSOLUTE),
2033                             SRC0_ELEM(ELEM_Y),
2034                             SRC0_NEG(0),
2035                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2036                             SRC1_REL(ABSOLUTE),
2037                             SRC1_ELEM(ELEM_Y),
2038                             SRC1_NEG(0),
2039                             INDEX_MODE(SQ_INDEX_LOOP),
2040                             PRED_SEL(SQ_PRED_SEL_OFF),
2041                             LAST(0));
2042    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2043                                 SRC0_ABS(0),
2044                                 SRC1_ABS(0),
2045                                 UPDATE_EXECUTE_MASK(0),
2046                                 UPDATE_PRED(0),
2047                                 WRITE_MASK(1),
2048                                 FOG_MERGE(0),
2049                                 OMOD(SQ_ALU_OMOD_OFF),
2050                                 ALU_INST(SQ_OP2_INST_DOT4),
2051                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2052                                 DST_GPR(2),
2053                                 DST_REL(ABSOLUTE),
2054                                 DST_ELEM(ELEM_Y),
2055                                 CLAMP(0));
2056
2057    /* 40 srcY.z DOT4 - non-mask */
2058    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2059                             SRC0_REL(ABSOLUTE),
2060                             SRC0_ELEM(ELEM_Z),
2061                             SRC0_NEG(0),
2062                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2063                             SRC1_REL(ABSOLUTE),
2064                             SRC1_ELEM(ELEM_Z),
2065                             SRC1_NEG(0),
2066                             INDEX_MODE(SQ_INDEX_LOOP),
2067                             PRED_SEL(SQ_PRED_SEL_OFF),
2068                             LAST(0));
2069    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2070                                 SRC0_ABS(0),
2071                                 SRC1_ABS(0),
2072                                 UPDATE_EXECUTE_MASK(0),
2073                                 UPDATE_PRED(0),
2074                                 WRITE_MASK(0),
2075                                 FOG_MERGE(0),
2076                                 OMOD(SQ_ALU_OMOD_OFF),
2077                                 ALU_INST(SQ_OP2_INST_DOT4),
2078                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2079                                 DST_GPR(2),
2080                                 DST_REL(ABSOLUTE),
2081                                 DST_ELEM(ELEM_Z),
2082                                 CLAMP(0));
2083
2084    /* 41 srcY.w DOT4 - non-mask */
2085    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2086                             SRC0_REL(ABSOLUTE),
2087                             SRC0_ELEM(ELEM_W),
2088                             SRC0_NEG(0),
2089                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2090                             SRC1_REL(ABSOLUTE),
2091                             SRC1_ELEM(ELEM_W),
2092                             SRC1_NEG(0),
2093                             INDEX_MODE(SQ_INDEX_LOOP),
2094                             PRED_SEL(SQ_PRED_SEL_OFF),
2095                             LAST(1));
2096    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2097                                 SRC0_ABS(0),
2098                                 SRC1_ABS(0),
2099                                 UPDATE_EXECUTE_MASK(0),
2100                                 UPDATE_PRED(0),
2101                                 WRITE_MASK(0),
2102                                 FOG_MERGE(0),
2103                                 OMOD(SQ_ALU_OMOD_OFF),
2104                                 ALU_INST(SQ_OP2_INST_DOT4),
2105                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2106                                 DST_GPR(2),
2107                                 DST_REL(ABSOLUTE),
2108                                 DST_ELEM(ELEM_W),
2109                                 CLAMP(0));
2110
2111    /* 42 srcX / w */
2112    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2113                             SRC0_REL(ABSOLUTE),
2114                             SRC0_ELEM(ELEM_X),
2115                             SRC0_NEG(0),
2116                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
2117                             SRC1_REL(ABSOLUTE),
2118                             SRC1_ELEM(ELEM_W),
2119                             SRC1_NEG(0),
2120                             INDEX_MODE(SQ_INDEX_AR_X),
2121                             PRED_SEL(SQ_PRED_SEL_OFF),
2122                             LAST(1));
2123    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2124                                 SRC0_ABS(0),
2125                                 SRC1_ABS(0),
2126                                 UPDATE_EXECUTE_MASK(0),
2127                                 UPDATE_PRED(0),
2128                                 WRITE_MASK(1),
2129                                 FOG_MERGE(0),
2130                                 OMOD(SQ_ALU_OMOD_OFF),
2131                                 ALU_INST(SQ_OP2_INST_MUL),
2132                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2133                                 DST_GPR(0),
2134                                 DST_REL(ABSOLUTE),
2135                                 DST_ELEM(ELEM_X),
2136                                 CLAMP(0));
2137
2138    /* 43 srcY / h */
2139    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2140                             SRC0_REL(ABSOLUTE),
2141                             SRC0_ELEM(ELEM_Y),
2142                             SRC0_NEG(0),
2143                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2144                             SRC1_REL(ABSOLUTE),
2145                             SRC1_ELEM(ELEM_W),
2146                             SRC1_NEG(0),
2147                             INDEX_MODE(SQ_INDEX_AR_X),
2148                             PRED_SEL(SQ_PRED_SEL_OFF),
2149                             LAST(1));
2150    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2151                                 SRC0_ABS(0),
2152                                 SRC1_ABS(0),
2153                                 UPDATE_EXECUTE_MASK(0),
2154                                 UPDATE_PRED(0),
2155                                 WRITE_MASK(1),
2156                                 FOG_MERGE(0),
2157                                 OMOD(SQ_ALU_OMOD_OFF),
2158                                 ALU_INST(SQ_OP2_INST_MUL),
2159                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2160                                 DST_GPR(0),
2161                                 DST_REL(ABSOLUTE),
2162                                 DST_ELEM(ELEM_Y),
2163                                 CLAMP(0));
2164
2165    /* 44/45 - dst - mask */
2166    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2167			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2168			     FETCH_WHOLE_QUAD(0),
2169			     BUFFER_ID(0),
2170			     SRC_GPR(0),
2171			     SRC_REL(ABSOLUTE),
2172			     SRC_SEL_X(SQ_SEL_X),
2173			     MEGA_FETCH_COUNT(24));
2174    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2175				 DST_REL(0),
2176				 DST_SEL_X(SQ_SEL_X),
2177				 DST_SEL_Y(SQ_SEL_Y),
2178				 DST_SEL_Z(SQ_SEL_0),
2179				 DST_SEL_W(SQ_SEL_1),
2180				 USE_CONST_FIELDS(0),
2181				 DATA_FORMAT(FMT_32_32_FLOAT),
2182				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2183				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2184				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2185    shader[i++] = VTX_DWORD2(OFFSET(0),
2186#if X_BYTE_ORDER == X_BIG_ENDIAN
2187                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2188#else
2189                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2190#endif
2191			     CONST_BUF_NO_STRIDE(0),
2192			     MEGA_FETCH(1));
2193    shader[i++] = VTX_DWORD_PAD;
2194    /* 46/47 - src */
2195    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2196			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2197			     FETCH_WHOLE_QUAD(0),
2198			     BUFFER_ID(0),
2199			     SRC_GPR(0),
2200			     SRC_REL(ABSOLUTE),
2201			     SRC_SEL_X(SQ_SEL_X),
2202			     MEGA_FETCH_COUNT(8));
2203    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2204				 DST_REL(0),
2205				 DST_SEL_X(SQ_SEL_X),
2206				 DST_SEL_Y(SQ_SEL_Y),
2207				 DST_SEL_Z(SQ_SEL_1),
2208				 DST_SEL_W(SQ_SEL_0),
2209				 USE_CONST_FIELDS(0),
2210				 DATA_FORMAT(FMT_32_32_FLOAT),
2211				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2212				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2213				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2214    shader[i++] = VTX_DWORD2(OFFSET(8),
2215#if X_BYTE_ORDER == X_BIG_ENDIAN
2216                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2217#else
2218                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2219#endif
2220			     CONST_BUF_NO_STRIDE(0),
2221			     MEGA_FETCH(0));
2222    shader[i++] = VTX_DWORD_PAD;
2223    /* 48/49 - mask */
2224    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2225			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2226			     FETCH_WHOLE_QUAD(0),
2227			     BUFFER_ID(0),
2228			     SRC_GPR(0),
2229			     SRC_REL(ABSOLUTE),
2230			     SRC_SEL_X(SQ_SEL_X),
2231			     MEGA_FETCH_COUNT(8));
2232    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2233				 DST_REL(0),
2234				 DST_SEL_X(SQ_SEL_X),
2235				 DST_SEL_Y(SQ_SEL_Y),
2236				 DST_SEL_Z(SQ_SEL_1),
2237				 DST_SEL_W(SQ_SEL_0),
2238				 USE_CONST_FIELDS(0),
2239				 DATA_FORMAT(FMT_32_32_FLOAT),
2240				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2241				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2242				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2243    shader[i++] = VTX_DWORD2(OFFSET(16),
2244#if X_BYTE_ORDER == X_BIG_ENDIAN
2245                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2246#else
2247                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2248#endif
2249			     CONST_BUF_NO_STRIDE(0),
2250			     MEGA_FETCH(0));
2251    shader[i++] = VTX_DWORD_PAD;
2252
2253    /* 50/51 - dst - non-mask */
2254    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2255			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2256			     FETCH_WHOLE_QUAD(0),
2257			     BUFFER_ID(0),
2258			     SRC_GPR(0),
2259			     SRC_REL(ABSOLUTE),
2260			     SRC_SEL_X(SQ_SEL_X),
2261			     MEGA_FETCH_COUNT(16));
2262    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2263				 DST_REL(0),
2264				 DST_SEL_X(SQ_SEL_X),
2265				 DST_SEL_Y(SQ_SEL_Y),
2266				 DST_SEL_Z(SQ_SEL_0),
2267				 DST_SEL_W(SQ_SEL_1),
2268				 USE_CONST_FIELDS(0),
2269				 DATA_FORMAT(FMT_32_32_FLOAT),
2270				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2271				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2272				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2273    shader[i++] = VTX_DWORD2(OFFSET(0),
2274#if X_BYTE_ORDER == X_BIG_ENDIAN
2275                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2276#else
2277                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2278#endif
2279			     CONST_BUF_NO_STRIDE(0),
2280			     MEGA_FETCH(1));
2281    shader[i++] = VTX_DWORD_PAD;
2282    /* 52/53 - src */
2283    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2284			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2285			     FETCH_WHOLE_QUAD(0),
2286			     BUFFER_ID(0),
2287			     SRC_GPR(0),
2288			     SRC_REL(ABSOLUTE),
2289			     SRC_SEL_X(SQ_SEL_X),
2290			     MEGA_FETCH_COUNT(8));
2291    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2292				 DST_REL(0),
2293				 DST_SEL_X(SQ_SEL_X),
2294				 DST_SEL_Y(SQ_SEL_Y),
2295				 DST_SEL_Z(SQ_SEL_1),
2296				 DST_SEL_W(SQ_SEL_0),
2297				 USE_CONST_FIELDS(0),
2298				 DATA_FORMAT(FMT_32_32_FLOAT),
2299				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2300				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2301				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2302    shader[i++] = VTX_DWORD2(OFFSET(8),
2303#if X_BYTE_ORDER == X_BIG_ENDIAN
2304                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2305#else
2306                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2307#endif
2308			     CONST_BUF_NO_STRIDE(0),
2309			     MEGA_FETCH(0));
2310    shader[i++] = VTX_DWORD_PAD;
2311
2312    return i;
2313}
2314
2315/* comp ps --------------------------------------- */
2316int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2317{
2318    int i = 0;
2319
2320    /* 0 */
2321    /* call fetch-mask if boolean1 == true */
2322    shader[i++] = CF_DWORD0(ADDR(10));
2323    shader[i++] = CF_DWORD1(POP_COUNT(0),
2324                            CF_CONST(1),
2325                            COND(SQ_CF_COND_BOOL),
2326                            I_COUNT(0),
2327                            CALL_COUNT(0),
2328                            END_OF_PROGRAM(0),
2329                            VALID_PIXEL_MODE(0),
2330                            CF_INST(SQ_CF_INST_CALL),
2331                            WHOLE_QUAD_MODE(0),
2332                            BARRIER(0));
2333    /* 1 */
2334    /* call read-constant-mask if boolean1 == false */
2335    shader[i++] = CF_DWORD0(ADDR(12));
2336    shader[i++] = CF_DWORD1(POP_COUNT(0),
2337                            CF_CONST(1),
2338                            COND(SQ_CF_COND_NOT_BOOL),
2339                            I_COUNT(0),
2340                            CALL_COUNT(0),
2341                            END_OF_PROGRAM(0),
2342                            VALID_PIXEL_MODE(0),
2343                            CF_INST(SQ_CF_INST_CALL),
2344                            WHOLE_QUAD_MODE(0),
2345                            BARRIER(0));
2346    /* 2 */
2347    /* call fetch-src if boolean0 == true */
2348    shader[i++] = CF_DWORD0(ADDR(6));
2349    shader[i++] = CF_DWORD1(POP_COUNT(0),
2350                            CF_CONST(0),
2351                            COND(SQ_CF_COND_BOOL),
2352                            I_COUNT(0),
2353                            CALL_COUNT(0),
2354                            END_OF_PROGRAM(0),
2355                            VALID_PIXEL_MODE(0),
2356                            CF_INST(SQ_CF_INST_CALL),
2357                            WHOLE_QUAD_MODE(0),
2358                            BARRIER(0));
2359
2360    /* 3 */
2361    /* call read-constant-src if boolean0 == false */
2362    shader[i++] = CF_DWORD0(ADDR(8));
2363    shader[i++] = CF_DWORD1(POP_COUNT(0),
2364			    CF_CONST(0),
2365			    COND(SQ_CF_COND_NOT_BOOL),
2366			    I_COUNT(0),
2367			    CALL_COUNT(0),
2368			    END_OF_PROGRAM(0),
2369			    VALID_PIXEL_MODE(0),
2370			    CF_INST(SQ_CF_INST_CALL),
2371			    WHOLE_QUAD_MODE(0),
2372			    BARRIER(0));
2373
2374    /* 4 */
2375    /* src IN mask (GPR0 := GPR1 .* GPR0) */
2376    shader[i++] = CF_ALU_DWORD0(ADDR(14),
2377				KCACHE_BANK0(0),
2378				KCACHE_BANK1(0),
2379				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2380    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2381				KCACHE_ADDR0(0),
2382				KCACHE_ADDR1(0),
2383				I_COUNT(4),
2384				USES_WATERFALL(0),
2385				CF_INST(SQ_CF_INST_ALU),
2386				WHOLE_QUAD_MODE(0),
2387				BARRIER(1));
2388
2389    /* 5 */
2390    /* export pixel data */
2391    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2392					  TYPE(SQ_EXPORT_PIXEL),
2393					  RW_GPR(0),
2394					  RW_REL(ABSOLUTE),
2395					  INDEX_GPR(0),
2396					  ELEM_SIZE(1));
2397    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2398					       SRC_SEL_Y(SQ_SEL_Y),
2399					       SRC_SEL_Z(SQ_SEL_Z),
2400					       SRC_SEL_W(SQ_SEL_W),
2401					       R6xx_ELEM_LOOP(0),
2402					       BURST_COUNT(1),
2403					       END_OF_PROGRAM(1),
2404					       VALID_PIXEL_MODE(0),
2405					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2406					       WHOLE_QUAD_MODE(0),
2407					       BARRIER(1));
2408    /* subroutine fetch src */
2409    /* 6 */
2410    /* fetch src into GPR0*/
2411    shader[i++] = CF_DWORD0(ADDR(26));
2412    shader[i++] = CF_DWORD1(POP_COUNT(0),
2413			    CF_CONST(0),
2414			    COND(SQ_CF_COND_ACTIVE),
2415			    I_COUNT(1),
2416			    CALL_COUNT(0),
2417			    END_OF_PROGRAM(0),
2418			    VALID_PIXEL_MODE(0),
2419			    CF_INST(SQ_CF_INST_TEX),
2420			    WHOLE_QUAD_MODE(0),
2421			    BARRIER(1));
2422
2423    /* 7 */
2424    /* return */
2425    shader[i++] = CF_DWORD0(ADDR(0));
2426    shader[i++] = CF_DWORD1(POP_COUNT(0),
2427			    CF_CONST(0),
2428			    COND(SQ_CF_COND_ACTIVE),
2429			    I_COUNT(0),
2430			    CALL_COUNT(0),
2431			    END_OF_PROGRAM(0),
2432			    VALID_PIXEL_MODE(0),
2433			    CF_INST(SQ_CF_INST_RETURN),
2434			    WHOLE_QUAD_MODE(0),
2435			    BARRIER(1));
2436
2437    /* subroutine read-constant-src*/
2438    /* 8 */
2439    /* read constants into GPR0 */
2440    shader[i++] = CF_ALU_DWORD0(ADDR(18),
2441				KCACHE_BANK0(0),
2442				KCACHE_BANK1(0),
2443				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2444    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2445				KCACHE_ADDR0(0),
2446				KCACHE_ADDR1(0),
2447				I_COUNT(4),
2448				USES_WATERFALL(0),
2449				CF_INST(SQ_CF_INST_ALU),
2450				WHOLE_QUAD_MODE(0),
2451				BARRIER(1));
2452    /* 9 */
2453    /* return */
2454    shader[i++] = CF_DWORD0(ADDR(0));
2455    shader[i++] = CF_DWORD1(POP_COUNT(0),
2456			    CF_CONST(0),
2457			    COND(SQ_CF_COND_ACTIVE),
2458			    I_COUNT(0),
2459			    CALL_COUNT(0),
2460			    END_OF_PROGRAM(0),
2461			    VALID_PIXEL_MODE(0),
2462			    CF_INST(SQ_CF_INST_RETURN),
2463			    WHOLE_QUAD_MODE(0),
2464			    BARRIER(1));
2465
2466    /* subroutine fetch mask */
2467    /* 10 */
2468    /* fetch mask into GPR1*/
2469    shader[i++] = CF_DWORD0(ADDR(28));
2470    shader[i++] = CF_DWORD1(POP_COUNT(0),
2471                            CF_CONST(0),
2472                            COND(SQ_CF_COND_ACTIVE),
2473                            I_COUNT(1),
2474                            CALL_COUNT(0),
2475                            END_OF_PROGRAM(0),
2476                            VALID_PIXEL_MODE(0),
2477                            CF_INST(SQ_CF_INST_TEX),
2478                            WHOLE_QUAD_MODE(0),
2479                            BARRIER(1));
2480
2481    /* 11 */
2482    /* return */
2483    shader[i++] = CF_DWORD0(ADDR(0));
2484    shader[i++] = CF_DWORD1(POP_COUNT(0),
2485                            CF_CONST(0),
2486                            COND(SQ_CF_COND_ACTIVE),
2487                            I_COUNT(0),
2488                            CALL_COUNT(0),
2489                            END_OF_PROGRAM(0),
2490                            VALID_PIXEL_MODE(0),
2491                            CF_INST(SQ_CF_INST_RETURN),
2492                            WHOLE_QUAD_MODE(0),
2493                            BARRIER(1));
2494
2495    /* subroutine read-constant-mask*/
2496    /* 12 */
2497    /* read constants into GPR1 */
2498    shader[i++] = CF_ALU_DWORD0(ADDR(22),
2499                                KCACHE_BANK0(0),
2500                                KCACHE_BANK1(0),
2501                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2502    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2503                                KCACHE_ADDR0(0),
2504                                KCACHE_ADDR1(0),
2505                                I_COUNT(4),
2506                                USES_WATERFALL(0),
2507                                CF_INST(SQ_CF_INST_ALU),
2508                                WHOLE_QUAD_MODE(0),
2509                                BARRIER(1));
2510    /* 13 */
2511    /* return */
2512    shader[i++] = CF_DWORD0(ADDR(0));
2513    shader[i++] = CF_DWORD1(POP_COUNT(0),
2514                            CF_CONST(0),
2515                            COND(SQ_CF_COND_ACTIVE),
2516                            I_COUNT(0),
2517                            CALL_COUNT(0),
2518                            END_OF_PROGRAM(0),
2519                            VALID_PIXEL_MODE(0),
2520                            CF_INST(SQ_CF_INST_RETURN),
2521                            WHOLE_QUAD_MODE(0),
2522                            BARRIER(1));
2523    /* ALU clauses */
2524
2525    /* 14 - alu 0 */
2526    /* MUL gpr[0].x gpr[1].x gpr[0].x */
2527    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2528			     SRC0_REL(ABSOLUTE),
2529			     SRC0_ELEM(ELEM_X),
2530			     SRC0_NEG(0),
2531			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2532			     SRC1_REL(ABSOLUTE),
2533			     SRC1_ELEM(ELEM_X),
2534			     SRC1_NEG(0),
2535			     INDEX_MODE(SQ_INDEX_LOOP),
2536			     PRED_SEL(SQ_PRED_SEL_OFF),
2537			     LAST(0));
2538    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2539				 SRC0_ABS(0),
2540				 SRC1_ABS(0),
2541				 UPDATE_EXECUTE_MASK(0),
2542				 UPDATE_PRED(0),
2543				 WRITE_MASK(1),
2544				 FOG_MERGE(0),
2545				 OMOD(SQ_ALU_OMOD_OFF),
2546				 ALU_INST(SQ_OP2_INST_MUL),
2547				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2548				 DST_GPR(0),
2549				 DST_REL(ABSOLUTE),
2550				 DST_ELEM(ELEM_X),
2551				 CLAMP(1));
2552    /* 15 - alu 1 */
2553    /* MUL gpr[0].y gpr[1].y gpr[0].y */
2554    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2555			     SRC0_REL(ABSOLUTE),
2556			     SRC0_ELEM(ELEM_Y),
2557			     SRC0_NEG(0),
2558			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2559			     SRC1_REL(ABSOLUTE),
2560			     SRC1_ELEM(ELEM_Y),
2561			     SRC1_NEG(0),
2562			     INDEX_MODE(SQ_INDEX_LOOP),
2563			     PRED_SEL(SQ_PRED_SEL_OFF),
2564			     LAST(0));
2565    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2566				 SRC0_ABS(0),
2567				 SRC1_ABS(0),
2568				 UPDATE_EXECUTE_MASK(0),
2569				 UPDATE_PRED(0),
2570				 WRITE_MASK(1),
2571				 FOG_MERGE(0),
2572				 OMOD(SQ_ALU_OMOD_OFF),
2573				 ALU_INST(SQ_OP2_INST_MUL),
2574				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2575				 DST_GPR(0),
2576				 DST_REL(ABSOLUTE),
2577				 DST_ELEM(ELEM_Y),
2578				 CLAMP(1));
2579    /* 16 - alu 2 */
2580    /* MUL gpr[0].z gpr[1].z gpr[0].z */
2581    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2582			     SRC0_REL(ABSOLUTE),
2583			     SRC0_ELEM(ELEM_Z),
2584			     SRC0_NEG(0),
2585			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2586			     SRC1_REL(ABSOLUTE),
2587			     SRC1_ELEM(ELEM_Z),
2588			     SRC1_NEG(0),
2589			     INDEX_MODE(SQ_INDEX_LOOP),
2590			     PRED_SEL(SQ_PRED_SEL_OFF),
2591			     LAST(0));
2592    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2593				 SRC0_ABS(0),
2594				 SRC1_ABS(0),
2595				 UPDATE_EXECUTE_MASK(0),
2596				 UPDATE_PRED(0),
2597				 WRITE_MASK(1),
2598				 FOG_MERGE(0),
2599				 OMOD(SQ_ALU_OMOD_OFF),
2600				 ALU_INST(SQ_OP2_INST_MUL),
2601				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2602				 DST_GPR(0),
2603				 DST_REL(ABSOLUTE),
2604				 DST_ELEM(ELEM_Z),
2605				 CLAMP(1));
2606    /* 17 - alu 3 */
2607    /* MUL gpr[0].w gpr[1].w gpr[0].w */
2608    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2609			     SRC0_REL(ABSOLUTE),
2610			     SRC0_ELEM(ELEM_W),
2611			     SRC0_NEG(0),
2612			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2613			     SRC1_REL(ABSOLUTE),
2614			     SRC1_ELEM(ELEM_W),
2615			     SRC1_NEG(0),
2616			     INDEX_MODE(SQ_INDEX_LOOP),
2617			     PRED_SEL(SQ_PRED_SEL_OFF),
2618			     LAST(1));
2619    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2620				 SRC0_ABS(0),
2621				 SRC1_ABS(0),
2622				 UPDATE_EXECUTE_MASK(0),
2623				 UPDATE_PRED(0),
2624				 WRITE_MASK(1),
2625				 FOG_MERGE(0),
2626				 OMOD(SQ_ALU_OMOD_OFF),
2627				 ALU_INST(SQ_OP2_INST_MUL),
2628				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2629				 DST_GPR(0),
2630				 DST_REL(ABSOLUTE),
2631				 DST_ELEM(ELEM_W),
2632				 CLAMP(1));
2633
2634    /* 18 */
2635    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
2636			     SRC0_REL(ABSOLUTE),
2637			     SRC0_ELEM(ELEM_X),
2638			     SRC0_NEG(0),
2639			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2640			     SRC1_REL(ABSOLUTE),
2641			     SRC1_ELEM(ELEM_X),
2642			     SRC1_NEG(0),
2643			     INDEX_MODE(SQ_INDEX_AR_X),
2644			     PRED_SEL(SQ_PRED_SEL_OFF),
2645			     LAST(0));
2646    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2647				 SRC0_ABS(0),
2648				 SRC1_ABS(0),
2649				 UPDATE_EXECUTE_MASK(0),
2650				 UPDATE_PRED(0),
2651				 WRITE_MASK(1),
2652				 FOG_MERGE(0),
2653				 OMOD(SQ_ALU_OMOD_OFF),
2654				 ALU_INST(SQ_OP2_INST_MOV),
2655				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2656				 DST_GPR(0),
2657				 DST_REL(ABSOLUTE),
2658				 DST_ELEM(ELEM_X),
2659				 CLAMP(1));
2660    /* 19 */
2661    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
2662			     SRC0_REL(ABSOLUTE),
2663			     SRC0_ELEM(ELEM_Y),
2664			     SRC0_NEG(0),
2665			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2666			     SRC1_REL(ABSOLUTE),
2667			     SRC1_ELEM(ELEM_Y),
2668			     SRC1_NEG(0),
2669			     INDEX_MODE(SQ_INDEX_AR_X),
2670			     PRED_SEL(SQ_PRED_SEL_OFF),
2671			     LAST(0));
2672    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2673				 SRC0_ABS(0),
2674				 SRC1_ABS(0),
2675				 UPDATE_EXECUTE_MASK(0),
2676				 UPDATE_PRED(0),
2677				 WRITE_MASK(1),
2678				 FOG_MERGE(0),
2679				 OMOD(SQ_ALU_OMOD_OFF),
2680				 ALU_INST(SQ_OP2_INST_MOV),
2681				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2682				 DST_GPR(0),
2683				 DST_REL(ABSOLUTE),
2684				 DST_ELEM(ELEM_Y),
2685				 CLAMP(1));
2686    /* 20 */
2687    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
2688			     SRC0_REL(ABSOLUTE),
2689			     SRC0_ELEM(ELEM_Z),
2690			     SRC0_NEG(0),
2691			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2692			     SRC1_REL(ABSOLUTE),
2693			     SRC1_ELEM(ELEM_Z),
2694			     SRC1_NEG(0),
2695			     INDEX_MODE(SQ_INDEX_AR_X),
2696			     PRED_SEL(SQ_PRED_SEL_OFF),
2697			     LAST(0));
2698    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2699				 SRC0_ABS(0),
2700				 SRC1_ABS(0),
2701				 UPDATE_EXECUTE_MASK(0),
2702				 UPDATE_PRED(0),
2703				 WRITE_MASK(1),
2704				 FOG_MERGE(0),
2705				 OMOD(SQ_ALU_OMOD_OFF),
2706				 ALU_INST(SQ_OP2_INST_MOV),
2707				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2708				 DST_GPR(0),
2709				 DST_REL(ABSOLUTE),
2710				 DST_ELEM(ELEM_Z),
2711				 CLAMP(1));
2712    /* 21 */
2713    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
2714			     SRC0_REL(ABSOLUTE),
2715			     SRC0_ELEM(ELEM_W),
2716			     SRC0_NEG(0),
2717			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2718			     SRC1_REL(ABSOLUTE),
2719			     SRC1_ELEM(ELEM_W),
2720			     SRC1_NEG(0),
2721			     INDEX_MODE(SQ_INDEX_AR_X),
2722			     PRED_SEL(SQ_PRED_SEL_OFF),
2723			     LAST(1));
2724    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2725				 SRC0_ABS(0),
2726				 SRC1_ABS(0),
2727				 UPDATE_EXECUTE_MASK(0),
2728				 UPDATE_PRED(0),
2729				 WRITE_MASK(1),
2730				 FOG_MERGE(0),
2731				 OMOD(SQ_ALU_OMOD_OFF),
2732				 ALU_INST(SQ_OP2_INST_MOV),
2733				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2734				 DST_GPR(0),
2735				 DST_REL(ABSOLUTE),
2736				 DST_ELEM(ELEM_W),
2737				 CLAMP(1));
2738
2739    /* 22 */
2740    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
2741			     SRC0_REL(ABSOLUTE),
2742			     SRC0_ELEM(ELEM_X),
2743			     SRC0_NEG(0),
2744			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2745			     SRC1_REL(ABSOLUTE),
2746			     SRC1_ELEM(ELEM_X),
2747			     SRC1_NEG(0),
2748			     INDEX_MODE(SQ_INDEX_AR_X),
2749			     PRED_SEL(SQ_PRED_SEL_OFF),
2750			     LAST(0));
2751    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2752				 SRC0_ABS(0),
2753				 SRC1_ABS(0),
2754				 UPDATE_EXECUTE_MASK(0),
2755				 UPDATE_PRED(0),
2756				 WRITE_MASK(1),
2757				 FOG_MERGE(0),
2758				 OMOD(SQ_ALU_OMOD_OFF),
2759				 ALU_INST(SQ_OP2_INST_MOV),
2760				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2761				 DST_GPR(1),
2762				 DST_REL(ABSOLUTE),
2763				 DST_ELEM(ELEM_X),
2764				 CLAMP(1));
2765    /* 23 */
2766    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
2767			     SRC0_REL(ABSOLUTE),
2768			     SRC0_ELEM(ELEM_Y),
2769			     SRC0_NEG(0),
2770			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2771			     SRC1_REL(ABSOLUTE),
2772			     SRC1_ELEM(ELEM_Y),
2773			     SRC1_NEG(0),
2774			     INDEX_MODE(SQ_INDEX_AR_X),
2775			     PRED_SEL(SQ_PRED_SEL_OFF),
2776			     LAST(0));
2777    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2778				 SRC0_ABS(0),
2779				 SRC1_ABS(0),
2780				 UPDATE_EXECUTE_MASK(0),
2781				 UPDATE_PRED(0),
2782				 WRITE_MASK(1),
2783				 FOG_MERGE(0),
2784				 OMOD(SQ_ALU_OMOD_OFF),
2785				 ALU_INST(SQ_OP2_INST_MOV),
2786				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2787				 DST_GPR(1),
2788				 DST_REL(ABSOLUTE),
2789				 DST_ELEM(ELEM_Y),
2790				 CLAMP(1));
2791    /* 24 */
2792    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
2793			     SRC0_REL(ABSOLUTE),
2794			     SRC0_ELEM(ELEM_Z),
2795			     SRC0_NEG(0),
2796			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2797			     SRC1_REL(ABSOLUTE),
2798			     SRC1_ELEM(ELEM_Z),
2799			     SRC1_NEG(0),
2800			     INDEX_MODE(SQ_INDEX_AR_X),
2801			     PRED_SEL(SQ_PRED_SEL_OFF),
2802			     LAST(0));
2803    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2804				 SRC0_ABS(0),
2805				 SRC1_ABS(0),
2806				 UPDATE_EXECUTE_MASK(0),
2807				 UPDATE_PRED(0),
2808				 WRITE_MASK(1),
2809				 FOG_MERGE(0),
2810				 OMOD(SQ_ALU_OMOD_OFF),
2811				 ALU_INST(SQ_OP2_INST_MOV),
2812				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2813				 DST_GPR(1),
2814				 DST_REL(ABSOLUTE),
2815				 DST_ELEM(ELEM_Z),
2816				 CLAMP(1));
2817    /* 25 */
2818    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
2819			     SRC0_REL(ABSOLUTE),
2820			     SRC0_ELEM(ELEM_W),
2821			     SRC0_NEG(0),
2822			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2823			     SRC1_REL(ABSOLUTE),
2824			     SRC1_ELEM(ELEM_W),
2825			     SRC1_NEG(0),
2826			     INDEX_MODE(SQ_INDEX_AR_X),
2827			     PRED_SEL(SQ_PRED_SEL_OFF),
2828			     LAST(1));
2829    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2830				 SRC0_ABS(0),
2831				 SRC1_ABS(0),
2832				 UPDATE_EXECUTE_MASK(0),
2833				 UPDATE_PRED(0),
2834				 WRITE_MASK(1),
2835				 FOG_MERGE(0),
2836				 OMOD(SQ_ALU_OMOD_OFF),
2837				 ALU_INST(SQ_OP2_INST_MOV),
2838				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2839				 DST_GPR(1),
2840				 DST_REL(ABSOLUTE),
2841				 DST_ELEM(ELEM_W),
2842				 CLAMP(1));
2843
2844    /* 26/27 - src */
2845    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2846			     BC_FRAC_MODE(0),
2847			     FETCH_WHOLE_QUAD(0),
2848			     RESOURCE_ID(0),
2849			     SRC_GPR(0),
2850			     SRC_REL(ABSOLUTE),
2851			     R7xx_ALT_CONST(0));
2852    shader[i++] = TEX_DWORD1(DST_GPR(0),
2853			     DST_REL(ABSOLUTE),
2854			     DST_SEL_X(SQ_SEL_X),
2855			     DST_SEL_Y(SQ_SEL_Y),
2856			     DST_SEL_Z(SQ_SEL_Z),
2857			     DST_SEL_W(SQ_SEL_W),
2858			     LOD_BIAS(0),
2859			     COORD_TYPE_X(TEX_NORMALIZED),
2860			     COORD_TYPE_Y(TEX_NORMALIZED),
2861			     COORD_TYPE_Z(TEX_NORMALIZED),
2862			     COORD_TYPE_W(TEX_NORMALIZED));
2863    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2864			     OFFSET_Y(0),
2865			     OFFSET_Z(0),
2866			     SAMPLER_ID(0),
2867			     SRC_SEL_X(SQ_SEL_X),
2868			     SRC_SEL_Y(SQ_SEL_Y),
2869			     SRC_SEL_Z(SQ_SEL_0),
2870			     SRC_SEL_W(SQ_SEL_1));
2871    shader[i++] = TEX_DWORD_PAD;
2872    /* 28/29 - mask */
2873    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2874			     BC_FRAC_MODE(0),
2875			     FETCH_WHOLE_QUAD(0),
2876			     RESOURCE_ID(1),
2877			     SRC_GPR(1),
2878			     SRC_REL(ABSOLUTE),
2879			     R7xx_ALT_CONST(0));
2880    shader[i++] = TEX_DWORD1(DST_GPR(1),
2881			     DST_REL(ABSOLUTE),
2882			     DST_SEL_X(SQ_SEL_X),
2883			     DST_SEL_Y(SQ_SEL_Y),
2884			     DST_SEL_Z(SQ_SEL_Z),
2885			     DST_SEL_W(SQ_SEL_W),
2886			     LOD_BIAS(0),
2887			     COORD_TYPE_X(TEX_NORMALIZED),
2888			     COORD_TYPE_Y(TEX_NORMALIZED),
2889			     COORD_TYPE_Z(TEX_NORMALIZED),
2890			     COORD_TYPE_W(TEX_NORMALIZED));
2891    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2892			     OFFSET_Y(0),
2893			     OFFSET_Z(0),
2894			     SAMPLER_ID(1),
2895			     SRC_SEL_X(SQ_SEL_X),
2896			     SRC_SEL_Y(SQ_SEL_Y),
2897			     SRC_SEL_Z(SQ_SEL_0),
2898			     SRC_SEL_W(SQ_SEL_1));
2899    shader[i++] = TEX_DWORD_PAD;
2900
2901    return i;
2902}
2903