r600_shader.c revision 2f39173d
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "radeon.h"
34#include "r600_shader.h"
35#include "r600_reg.h"
36
37/* solid vs --------------------------------------- */
38int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39{
40    int i = 0;
41
42    /* 0 */
43    shader[i++] = CF_DWORD0(ADDR(4));
44    shader[i++] = CF_DWORD1(POP_COUNT(0),
45			    CF_CONST(0),
46			    COND(SQ_CF_COND_ACTIVE),
47			    I_COUNT(1),
48			    CALL_COUNT(0),
49			    END_OF_PROGRAM(0),
50			    VALID_PIXEL_MODE(0),
51			    CF_INST(SQ_CF_INST_VTX),
52			    WHOLE_QUAD_MODE(0),
53			    BARRIER(1));
54    /* 1 */
55    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56					  TYPE(SQ_EXPORT_POS),
57					  RW_GPR(1),
58					  RW_REL(ABSOLUTE),
59					  INDEX_GPR(0),
60					  ELEM_SIZE(0));
61    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62					       SRC_SEL_Y(SQ_SEL_Y),
63					       SRC_SEL_Z(SQ_SEL_Z),
64					       SRC_SEL_W(SQ_SEL_W),
65					       R6xx_ELEM_LOOP(0),
66					       BURST_COUNT(1),
67					       END_OF_PROGRAM(0),
68					       VALID_PIXEL_MODE(0),
69					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70					       WHOLE_QUAD_MODE(0),
71					       BARRIER(1));
72    /* 2 - always export a param whether it's used or not */
73    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74					  TYPE(SQ_EXPORT_PARAM),
75					  RW_GPR(0),
76					  RW_REL(ABSOLUTE),
77					  INDEX_GPR(0),
78					  ELEM_SIZE(0));
79    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80					       SRC_SEL_Y(SQ_SEL_Y),
81					       SRC_SEL_Z(SQ_SEL_Z),
82					       SRC_SEL_W(SQ_SEL_W),
83					       R6xx_ELEM_LOOP(0),
84					       BURST_COUNT(0),
85					       END_OF_PROGRAM(1),
86					       VALID_PIXEL_MODE(0),
87					       CF_INST(SQ_CF_INST_EXPORT_DONE),
88					       WHOLE_QUAD_MODE(0),
89					       BARRIER(0));
90    /* 3 - padding */
91    shader[i++] = 0x00000000;
92    shader[i++] = 0x00000000;
93    /* 4/5 */
94    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96			     FETCH_WHOLE_QUAD(0),
97			     BUFFER_ID(0),
98			     SRC_GPR(0),
99			     SRC_REL(ABSOLUTE),
100			     SRC_SEL_X(SQ_SEL_X),
101			     MEGA_FETCH_COUNT(8));
102    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103				 DST_REL(0),
104				 DST_SEL_X(SQ_SEL_X),
105				 DST_SEL_Y(SQ_SEL_Y),
106				 DST_SEL_Z(SQ_SEL_0),
107				 DST_SEL_W(SQ_SEL_1),
108				 USE_CONST_FIELDS(0),
109				 DATA_FORMAT(FMT_32_32_FLOAT),
110				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
111				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
112				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113    shader[i++] = VTX_DWORD2(OFFSET(0),
114			     ENDIAN_SWAP(ENDIAN_NONE),
115			     CONST_BUF_NO_STRIDE(0),
116			     MEGA_FETCH(1));
117    shader[i++] = VTX_DWORD_PAD;
118
119    return i;
120}
121
122/* solid ps --------------------------------------- */
123int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
124{
125    int i = 0;
126
127    /* 0 */
128    shader[i++] = CF_ALU_DWORD0(ADDR(2),
129				KCACHE_BANK0(0),
130				KCACHE_BANK1(0),
131				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
132    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
133				KCACHE_ADDR0(0),
134				KCACHE_ADDR1(0),
135				I_COUNT(4),
136				USES_WATERFALL(0),
137				CF_INST(SQ_CF_INST_ALU),
138				WHOLE_QUAD_MODE(0),
139				BARRIER(1));
140    /* 1 */
141    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
142					  TYPE(SQ_EXPORT_PIXEL),
143					  RW_GPR(0),
144					  RW_REL(ABSOLUTE),
145					  INDEX_GPR(0),
146					  ELEM_SIZE(1));
147    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
148					       SRC_SEL_Y(SQ_SEL_Y),
149					       SRC_SEL_Z(SQ_SEL_Z),
150					       SRC_SEL_W(SQ_SEL_W),
151					       R6xx_ELEM_LOOP(0),
152					       BURST_COUNT(1),
153					       END_OF_PROGRAM(1),
154					       VALID_PIXEL_MODE(0),
155					       CF_INST(SQ_CF_INST_EXPORT_DONE),
156					       WHOLE_QUAD_MODE(0),
157					       BARRIER(1));
158
159    /* 2 */
160    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
161			     SRC0_REL(ABSOLUTE),
162			     SRC0_ELEM(ELEM_X),
163			     SRC0_NEG(0),
164			     SRC1_SEL(0),
165			     SRC1_REL(ABSOLUTE),
166			     SRC1_ELEM(ELEM_X),
167			     SRC1_NEG(0),
168			     INDEX_MODE(SQ_INDEX_AR_X),
169			     PRED_SEL(SQ_PRED_SEL_OFF),
170			     LAST(0));
171    shader[i++] = ALU_DWORD1_OP2(ChipSet,
172				 SRC0_ABS(0),
173				 SRC1_ABS(0),
174				 UPDATE_EXECUTE_MASK(0),
175				 UPDATE_PRED(0),
176				 WRITE_MASK(1),
177				 FOG_MERGE(0),
178				 OMOD(SQ_ALU_OMOD_OFF),
179				 ALU_INST(SQ_OP2_INST_MOV),
180				 BANK_SWIZZLE(SQ_ALU_VEC_012),
181				 DST_GPR(0),
182				 DST_REL(ABSOLUTE),
183				 DST_ELEM(ELEM_X),
184				 CLAMP(1));
185    /* 3 */
186    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
187			     SRC0_REL(ABSOLUTE),
188			     SRC0_ELEM(ELEM_Y),
189			     SRC0_NEG(0),
190			     SRC1_SEL(0),
191			     SRC1_REL(ABSOLUTE),
192			     SRC1_ELEM(ELEM_Y),
193			     SRC1_NEG(0),
194			     INDEX_MODE(SQ_INDEX_AR_X),
195			     PRED_SEL(SQ_PRED_SEL_OFF),
196			     LAST(0));
197    shader[i++] = ALU_DWORD1_OP2(ChipSet,
198				 SRC0_ABS(0),
199				 SRC1_ABS(0),
200				 UPDATE_EXECUTE_MASK(0),
201				 UPDATE_PRED(0),
202				 WRITE_MASK(1),
203				 FOG_MERGE(0),
204				 OMOD(SQ_ALU_OMOD_OFF),
205				 ALU_INST(SQ_OP2_INST_MOV),
206				 BANK_SWIZZLE(SQ_ALU_VEC_012),
207				 DST_GPR(0),
208				 DST_REL(ABSOLUTE),
209				 DST_ELEM(ELEM_Y),
210				 CLAMP(1));
211    /* 4 */
212    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
213			     SRC0_REL(ABSOLUTE),
214			     SRC0_ELEM(ELEM_Z),
215			     SRC0_NEG(0),
216			     SRC1_SEL(0),
217			     SRC1_REL(ABSOLUTE),
218			     SRC1_ELEM(ELEM_Z),
219			     SRC1_NEG(0),
220			     INDEX_MODE(SQ_INDEX_AR_X),
221			     PRED_SEL(SQ_PRED_SEL_OFF),
222			     LAST(0));
223    shader[i++] = ALU_DWORD1_OP2(ChipSet,
224				 SRC0_ABS(0),
225				 SRC1_ABS(0),
226				 UPDATE_EXECUTE_MASK(0),
227				 UPDATE_PRED(0),
228				 WRITE_MASK(1),
229				 FOG_MERGE(0),
230				 OMOD(SQ_ALU_OMOD_OFF),
231				 ALU_INST(SQ_OP2_INST_MOV),
232				 BANK_SWIZZLE(SQ_ALU_VEC_012),
233				 DST_GPR(0),
234				 DST_REL(ABSOLUTE),
235				 DST_ELEM(ELEM_Z),
236				 CLAMP(1));
237    /* 5 */
238    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
239			     SRC0_REL(ABSOLUTE),
240			     SRC0_ELEM(ELEM_W),
241			     SRC0_NEG(0),
242			     SRC1_SEL(0),
243			     SRC1_REL(ABSOLUTE),
244			     SRC1_ELEM(ELEM_W),
245			     SRC1_NEG(0),
246			     INDEX_MODE(SQ_INDEX_AR_X),
247			     PRED_SEL(SQ_PRED_SEL_OFF),
248			     LAST(1));
249    shader[i++] = ALU_DWORD1_OP2(ChipSet,
250				 SRC0_ABS(0),
251				 SRC1_ABS(0),
252				 UPDATE_EXECUTE_MASK(0),
253				 UPDATE_PRED(0),
254				 WRITE_MASK(1),
255				 FOG_MERGE(0),
256				 OMOD(SQ_ALU_OMOD_OFF),
257				 ALU_INST(SQ_OP2_INST_MOV),
258				 BANK_SWIZZLE(SQ_ALU_VEC_012),
259				 DST_GPR(0),
260				 DST_REL(ABSOLUTE),
261				 DST_ELEM(ELEM_W),
262				 CLAMP(1));
263
264    return i;
265}
266
267/* copy vs --------------------------------------- */
268int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
269{
270    int i = 0;
271
272    /* 0 */
273    shader[i++] = CF_DWORD0(ADDR(4));
274    shader[i++] = CF_DWORD1(POP_COUNT(0),
275			    CF_CONST(0),
276			    COND(SQ_CF_COND_ACTIVE),
277			    I_COUNT(2),
278			    CALL_COUNT(0),
279			    END_OF_PROGRAM(0),
280			    VALID_PIXEL_MODE(0),
281			    CF_INST(SQ_CF_INST_VTX),
282			    WHOLE_QUAD_MODE(0),
283			    BARRIER(1));
284    /* 1 */
285    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
286					  TYPE(SQ_EXPORT_POS),
287					  RW_GPR(1),
288					  RW_REL(ABSOLUTE),
289					  INDEX_GPR(0),
290					  ELEM_SIZE(0));
291    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
292					       SRC_SEL_Y(SQ_SEL_Y),
293					       SRC_SEL_Z(SQ_SEL_Z),
294					       SRC_SEL_W(SQ_SEL_W),
295					       R6xx_ELEM_LOOP(0),
296					       BURST_COUNT(0),
297					       END_OF_PROGRAM(0),
298					       VALID_PIXEL_MODE(0),
299					       CF_INST(SQ_CF_INST_EXPORT_DONE),
300					       WHOLE_QUAD_MODE(0),
301					       BARRIER(1));
302    /* 2 */
303    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
304					  TYPE(SQ_EXPORT_PARAM),
305					  RW_GPR(0),
306					  RW_REL(ABSOLUTE),
307					  INDEX_GPR(0),
308					  ELEM_SIZE(0));
309    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
310					       SRC_SEL_Y(SQ_SEL_Y),
311					       SRC_SEL_Z(SQ_SEL_Z),
312					       SRC_SEL_W(SQ_SEL_W),
313					       R6xx_ELEM_LOOP(0),
314					       BURST_COUNT(0),
315					       END_OF_PROGRAM(1),
316					       VALID_PIXEL_MODE(0),
317					       CF_INST(SQ_CF_INST_EXPORT_DONE),
318					       WHOLE_QUAD_MODE(0),
319					       BARRIER(0));
320    /* 3 */
321    shader[i++] = 0x00000000;
322    shader[i++] = 0x00000000;
323    /* 4/5 */
324    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
325			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
326			     FETCH_WHOLE_QUAD(0),
327			     BUFFER_ID(0),
328			     SRC_GPR(0),
329			     SRC_REL(ABSOLUTE),
330			     SRC_SEL_X(SQ_SEL_X),
331			     MEGA_FETCH_COUNT(16));
332    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
333				 DST_REL(0),
334				 DST_SEL_X(SQ_SEL_X),
335				 DST_SEL_Y(SQ_SEL_Y),
336				 DST_SEL_Z(SQ_SEL_0),
337				 DST_SEL_W(SQ_SEL_1),
338				 USE_CONST_FIELDS(0),
339				 DATA_FORMAT(FMT_32_32_FLOAT),
340				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
341				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
342				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
343    shader[i++] = VTX_DWORD2(OFFSET(0),
344			     ENDIAN_SWAP(ENDIAN_NONE),
345			     CONST_BUF_NO_STRIDE(0),
346			     MEGA_FETCH(1));
347    shader[i++] = VTX_DWORD_PAD;
348    /* 6/7 */
349    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
350			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
351			     FETCH_WHOLE_QUAD(0),
352			     BUFFER_ID(0),
353			     SRC_GPR(0),
354			     SRC_REL(ABSOLUTE),
355			     SRC_SEL_X(SQ_SEL_X),
356			     MEGA_FETCH_COUNT(8));
357    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
358				 DST_REL(0),
359				 DST_SEL_X(SQ_SEL_X),
360				 DST_SEL_Y(SQ_SEL_Y),
361				 DST_SEL_Z(SQ_SEL_0),
362				 DST_SEL_W(SQ_SEL_1),
363				 USE_CONST_FIELDS(0),
364				 DATA_FORMAT(FMT_32_32_FLOAT),
365				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
366				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
367				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
368    shader[i++] = VTX_DWORD2(OFFSET(8),
369			     ENDIAN_SWAP(ENDIAN_NONE),
370			     CONST_BUF_NO_STRIDE(0),
371			     MEGA_FETCH(0));
372    shader[i++] = VTX_DWORD_PAD;
373
374    return i;
375}
376
377/* copy ps --------------------------------------- */
378int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
379{
380    int i=0;
381
382    /* CF INST 0 */
383    shader[i++] = CF_DWORD0(ADDR(2));
384    shader[i++] = CF_DWORD1(POP_COUNT(0),
385			    CF_CONST(0),
386			    COND(SQ_CF_COND_ACTIVE),
387			    I_COUNT(1),
388			    CALL_COUNT(0),
389			    END_OF_PROGRAM(0),
390			    VALID_PIXEL_MODE(0),
391			    CF_INST(SQ_CF_INST_TEX),
392			    WHOLE_QUAD_MODE(0),
393			    BARRIER(1));
394    /* CF INST 1 */
395    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
396					  TYPE(SQ_EXPORT_PIXEL),
397					  RW_GPR(0),
398					  RW_REL(ABSOLUTE),
399					  INDEX_GPR(0),
400					  ELEM_SIZE(1));
401    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
402					       SRC_SEL_Y(SQ_SEL_Y),
403					       SRC_SEL_Z(SQ_SEL_Z),
404					       SRC_SEL_W(SQ_SEL_W),
405					       R6xx_ELEM_LOOP(0),
406					       BURST_COUNT(1),
407					       END_OF_PROGRAM(1),
408					       VALID_PIXEL_MODE(0),
409					       CF_INST(SQ_CF_INST_EXPORT_DONE),
410					       WHOLE_QUAD_MODE(0),
411					       BARRIER(1));
412    /* TEX INST 0 */
413    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
414			     BC_FRAC_MODE(0),
415			     FETCH_WHOLE_QUAD(0),
416			     RESOURCE_ID(0),
417			     SRC_GPR(0),
418			     SRC_REL(ABSOLUTE),
419			     R7xx_ALT_CONST(0));
420    shader[i++] = TEX_DWORD1(DST_GPR(0),
421			     DST_REL(ABSOLUTE),
422			     DST_SEL_X(SQ_SEL_X), /* R */
423			     DST_SEL_Y(SQ_SEL_Y), /* G */
424			     DST_SEL_Z(SQ_SEL_Z), /* B */
425			     DST_SEL_W(SQ_SEL_W), /* A */
426			     LOD_BIAS(0),
427			     COORD_TYPE_X(TEX_UNNORMALIZED),
428			     COORD_TYPE_Y(TEX_UNNORMALIZED),
429			     COORD_TYPE_Z(TEX_UNNORMALIZED),
430			     COORD_TYPE_W(TEX_UNNORMALIZED));
431    shader[i++] = TEX_DWORD2(OFFSET_X(0),
432			     OFFSET_Y(0),
433			     OFFSET_Z(0),
434			     SAMPLER_ID(0),
435			     SRC_SEL_X(SQ_SEL_X),
436			     SRC_SEL_Y(SQ_SEL_Y),
437			     SRC_SEL_Z(SQ_SEL_0),
438			     SRC_SEL_W(SQ_SEL_1));
439    shader[i++] = TEX_DWORD_PAD;
440
441    return i;
442}
443
444/*
445 * ; xv vertex shader
446 * 00 VTX: ADDR(4) CNT(2)
447 *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT)
448 *          FORMAT_COMP(SIGNED)
449 *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
450 *          FORMAT_COMP(SIGNED)
451 * 01 EXP_DONE: POS0, R1
452 * 02 EXP_DONE: PARAM0, R0  NO_BARRIER
453 * END_OF_PROGRAM
454 */
455int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
456{
457    int i = 0;
458
459    /* 0 */
460    shader[i++] = CF_DWORD0(ADDR(6));
461    shader[i++] = CF_DWORD1(POP_COUNT(0),
462                            CF_CONST(0),
463                            COND(SQ_CF_COND_ACTIVE),
464                            I_COUNT(2),
465                            CALL_COUNT(0),
466                            END_OF_PROGRAM(0),
467                            VALID_PIXEL_MODE(0),
468                            CF_INST(SQ_CF_INST_VTX),
469                            WHOLE_QUAD_MODE(0),
470                            BARRIER(1));
471
472    /* 1 - ALU */
473    shader[i++] = CF_ALU_DWORD0(ADDR(4),
474				KCACHE_BANK0(0),
475				KCACHE_BANK1(0),
476				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
477    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
478				KCACHE_ADDR0(0),
479				KCACHE_ADDR1(0),
480				I_COUNT(2),
481				USES_WATERFALL(0),
482				CF_INST(SQ_CF_INST_ALU),
483				WHOLE_QUAD_MODE(0),
484				BARRIER(1));
485
486    /* 2 */
487    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
488                                          TYPE(SQ_EXPORT_POS),
489                                          RW_GPR(1),
490                                          RW_REL(ABSOLUTE),
491                                          INDEX_GPR(0),
492                                          ELEM_SIZE(3));
493    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
494                                               SRC_SEL_Y(SQ_SEL_Y),
495                                               SRC_SEL_Z(SQ_SEL_Z),
496                                               SRC_SEL_W(SQ_SEL_W),
497                                               R6xx_ELEM_LOOP(0),
498                                               BURST_COUNT(1),
499                                               END_OF_PROGRAM(0),
500                                               VALID_PIXEL_MODE(0),
501                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
502                                               WHOLE_QUAD_MODE(0),
503                                               BARRIER(1));
504    /* 3 */
505    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
506                                          TYPE(SQ_EXPORT_PARAM),
507                                          RW_GPR(0),
508                                          RW_REL(ABSOLUTE),
509                                          INDEX_GPR(0),
510                                          ELEM_SIZE(3));
511    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
512                                               SRC_SEL_Y(SQ_SEL_Y),
513                                               SRC_SEL_Z(SQ_SEL_Z),
514                                               SRC_SEL_W(SQ_SEL_W),
515                                               R6xx_ELEM_LOOP(0),
516                                               BURST_COUNT(1),
517                                               END_OF_PROGRAM(1),
518                                               VALID_PIXEL_MODE(0),
519                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
520                                               WHOLE_QUAD_MODE(0),
521                                               BARRIER(0));
522
523
524    /* 4 texX / w */
525    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
526                             SRC0_REL(ABSOLUTE),
527                             SRC0_ELEM(ELEM_X),
528                             SRC0_NEG(0),
529                             SRC1_SEL(256),
530                             SRC1_REL(ABSOLUTE),
531                             SRC1_ELEM(ELEM_X),
532                             SRC1_NEG(0),
533                             INDEX_MODE(SQ_INDEX_AR_X),
534                             PRED_SEL(SQ_PRED_SEL_OFF),
535                             LAST(0));
536    shader[i++] = ALU_DWORD1_OP2(ChipSet,
537                                 SRC0_ABS(0),
538                                 SRC1_ABS(0),
539                                 UPDATE_EXECUTE_MASK(0),
540                                 UPDATE_PRED(0),
541                                 WRITE_MASK(1),
542                                 FOG_MERGE(0),
543                                 OMOD(SQ_ALU_OMOD_OFF),
544                                 ALU_INST(SQ_OP2_INST_MUL),
545                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
546                                 DST_GPR(0),
547                                 DST_REL(ABSOLUTE),
548                                 DST_ELEM(ELEM_X),
549                                 CLAMP(0));
550
551    /* 5 texY / h */
552    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
553                             SRC0_REL(ABSOLUTE),
554                             SRC0_ELEM(ELEM_Y),
555                             SRC0_NEG(0),
556                             SRC1_SEL(256),
557                             SRC1_REL(ABSOLUTE),
558                             SRC1_ELEM(ELEM_Y),
559                             SRC1_NEG(0),
560                             INDEX_MODE(SQ_INDEX_AR_X),
561                             PRED_SEL(SQ_PRED_SEL_OFF),
562                             LAST(1));
563    shader[i++] = ALU_DWORD1_OP2(ChipSet,
564                                 SRC0_ABS(0),
565                                 SRC1_ABS(0),
566                                 UPDATE_EXECUTE_MASK(0),
567                                 UPDATE_PRED(0),
568                                 WRITE_MASK(1),
569                                 FOG_MERGE(0),
570                                 OMOD(SQ_ALU_OMOD_OFF),
571                                 ALU_INST(SQ_OP2_INST_MUL),
572                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
573                                 DST_GPR(0),
574                                 DST_REL(ABSOLUTE),
575                                 DST_ELEM(ELEM_Y),
576                                 CLAMP(0));
577
578    /* 6/7 */
579    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
580                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
581                             FETCH_WHOLE_QUAD(0),
582                             BUFFER_ID(0),
583                             SRC_GPR(0),
584                             SRC_REL(ABSOLUTE),
585                             SRC_SEL_X(SQ_SEL_X),
586                             MEGA_FETCH_COUNT(16));
587    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
588                                 DST_REL(ABSOLUTE),
589                                 DST_SEL_X(SQ_SEL_X),
590                                 DST_SEL_Y(SQ_SEL_Y),
591                                 DST_SEL_Z(SQ_SEL_0),
592                                 DST_SEL_W(SQ_SEL_1),
593                                 USE_CONST_FIELDS(0),
594                                 DATA_FORMAT(FMT_32_32_FLOAT),
595                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
596                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
597                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
598    shader[i++] = VTX_DWORD2(OFFSET(0),
599                             ENDIAN_SWAP(ENDIAN_NONE),
600                             CONST_BUF_NO_STRIDE(0),
601                             MEGA_FETCH(1));
602    shader[i++] = VTX_DWORD_PAD;
603    /* 8/9 */
604    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
605                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
606                             FETCH_WHOLE_QUAD(0),
607                             BUFFER_ID(0),
608                             SRC_GPR(0),
609                             SRC_REL(ABSOLUTE),
610                             SRC_SEL_X(SQ_SEL_X),
611                             MEGA_FETCH_COUNT(8));
612    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
613                                 DST_REL(ABSOLUTE),
614                                 DST_SEL_X(SQ_SEL_X),
615                                 DST_SEL_Y(SQ_SEL_Y),
616                                 DST_SEL_Z(SQ_SEL_0),
617                                 DST_SEL_W(SQ_SEL_1),
618                                 USE_CONST_FIELDS(0),
619                                 DATA_FORMAT(FMT_32_32_FLOAT),
620                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
621                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
622                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
623    shader[i++] = VTX_DWORD2(OFFSET(8),
624                             ENDIAN_SWAP(ENDIAN_NONE),
625                             CONST_BUF_NO_STRIDE(0),
626                             MEGA_FETCH(0));
627    shader[i++] = VTX_DWORD_PAD;
628
629    return i;
630}
631
632int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
633{
634    int i = 0;
635
636    /* 0 */
637    shader[i++] = CF_DWORD0(ADDR(16));
638    shader[i++] = CF_DWORD1(POP_COUNT(0),
639                            CF_CONST(0),
640                            COND(SQ_CF_COND_BOOL),
641                            I_COUNT(0),
642                            CALL_COUNT(0),
643                            END_OF_PROGRAM(0),
644                            VALID_PIXEL_MODE(0),
645                            CF_INST(SQ_CF_INST_CALL),
646                            WHOLE_QUAD_MODE(0),
647                            BARRIER(0));
648    /* 1 */
649    shader[i++] = CF_DWORD0(ADDR(24));
650    shader[i++] = CF_DWORD1(POP_COUNT(0),
651                            CF_CONST(0),
652                            COND(SQ_CF_COND_NOT_BOOL),
653                            I_COUNT(0),
654                            CALL_COUNT(0),
655                            END_OF_PROGRAM(0),
656                            VALID_PIXEL_MODE(0),
657                            CF_INST(SQ_CF_INST_CALL),
658                            WHOLE_QUAD_MODE(0),
659                            BARRIER(0));
660    /* 2 */
661    shader[i++] = CF_ALU_DWORD0(ADDR(4),
662                                KCACHE_BANK0(0),
663                                KCACHE_BANK1(0),
664                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
665    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
666                                KCACHE_ADDR0(0),
667                                KCACHE_ADDR1(0),
668                                I_COUNT(12),
669                                USES_WATERFALL(0),
670                                CF_INST(SQ_CF_INST_ALU),
671                                WHOLE_QUAD_MODE(0),
672                                BARRIER(1));
673    /* 3 */
674    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
675                                          TYPE(SQ_EXPORT_PIXEL),
676                                          RW_GPR(2),
677                                          RW_REL(ABSOLUTE),
678                                          INDEX_GPR(0),
679                                          ELEM_SIZE(3));
680    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
681                                               SRC_SEL_Y(SQ_SEL_Y),
682                                               SRC_SEL_Z(SQ_SEL_Z),
683                                               SRC_SEL_W(SQ_SEL_W),
684                                               R6xx_ELEM_LOOP(0),
685                                               BURST_COUNT(1),
686                                               END_OF_PROGRAM(1),
687                                               VALID_PIXEL_MODE(0),
688                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
689                                               WHOLE_QUAD_MODE(0),
690                                               BARRIER(1));
691    /* 4,5,6,7 */
692    /* r2.x = MAD(c0.w, r1.x, c0.x) */
693    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
694                             SRC0_REL(ABSOLUTE),
695                             SRC0_ELEM(ELEM_W),
696                             SRC0_NEG(0),
697                             SRC1_SEL(1),
698                             SRC1_REL(ABSOLUTE),
699                             SRC1_ELEM(ELEM_X),
700                             SRC1_NEG(0),
701                             INDEX_MODE(SQ_INDEX_LOOP),
702                             PRED_SEL(SQ_PRED_SEL_OFF),
703                             LAST(0));
704    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
705                                 SRC2_REL(ABSOLUTE),
706                                 SRC2_ELEM(ELEM_X),
707                                 SRC2_NEG(0),
708                                 ALU_INST(SQ_OP3_INST_MULADD),
709                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
710                                 DST_GPR(2),
711                                 DST_REL(ABSOLUTE),
712                                 DST_ELEM(ELEM_X),
713                                 CLAMP(0));
714    /* r2.y = MAD(c0.w, r1.x, c0.y) */
715    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
716                             SRC0_REL(ABSOLUTE),
717                             SRC0_ELEM(ELEM_W),
718                             SRC0_NEG(0),
719                             SRC1_SEL(1),
720                             SRC1_REL(ABSOLUTE),
721                             SRC1_ELEM(ELEM_X),
722                             SRC1_NEG(0),
723                             INDEX_MODE(SQ_INDEX_LOOP),
724                             PRED_SEL(SQ_PRED_SEL_OFF),
725                             LAST(0));
726    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
727                                 SRC2_REL(ABSOLUTE),
728                                 SRC2_ELEM(ELEM_Y),
729                                 SRC2_NEG(0),
730                                 ALU_INST(SQ_OP3_INST_MULADD),
731                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
732                                 DST_GPR(2),
733                                 DST_REL(ABSOLUTE),
734                                 DST_ELEM(ELEM_Y),
735                                 CLAMP(0));
736    /* r2.z = MAD(c0.w, r1.x, c0.z) */
737    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
738                             SRC0_REL(ABSOLUTE),
739                             SRC0_ELEM(ELEM_W),
740                             SRC0_NEG(0),
741                             SRC1_SEL(1),
742                             SRC1_REL(ABSOLUTE),
743                             SRC1_ELEM(ELEM_X),
744                             SRC1_NEG(0),
745                             INDEX_MODE(SQ_INDEX_LOOP),
746                             PRED_SEL(SQ_PRED_SEL_OFF),
747                             LAST(0));
748    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
749                                 SRC2_REL(ABSOLUTE),
750                                 SRC2_ELEM(ELEM_Z),
751                                 SRC2_NEG(0),
752                                 ALU_INST(SQ_OP3_INST_MULADD),
753                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
754                                 DST_GPR(2),
755                                 DST_REL(ABSOLUTE),
756                                 DST_ELEM(ELEM_Z),
757                                 CLAMP(0));
758    /* r2.w = MAD(0, 0, 1) */
759    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
760                             SRC0_REL(ABSOLUTE),
761                             SRC0_ELEM(ELEM_X),
762                             SRC0_NEG(0),
763                             SRC1_SEL(SQ_ALU_SRC_0),
764                             SRC1_REL(ABSOLUTE),
765                             SRC1_ELEM(ELEM_X),
766                             SRC1_NEG(0),
767                             INDEX_MODE(SQ_INDEX_LOOP),
768                             PRED_SEL(SQ_PRED_SEL_OFF),
769                             LAST(1));
770    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
771                                 SRC2_REL(ABSOLUTE),
772                                 SRC2_ELEM(ELEM_X),
773                                 SRC2_NEG(0),
774                                 ALU_INST(SQ_OP3_INST_MULADD),
775                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
776                                 DST_GPR(2),
777                                 DST_REL(ABSOLUTE),
778                                 DST_ELEM(ELEM_W),
779                                 CLAMP(0));
780
781    /* 8,9,10,11 */
782    /* r2.x = MAD(c1.x, r1.y, pv.x) */
783    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
784                             SRC0_REL(ABSOLUTE),
785                             SRC0_ELEM(ELEM_X),
786                             SRC0_NEG(0),
787                             SRC1_SEL(1),
788                             SRC1_REL(ABSOLUTE),
789                             SRC1_ELEM(ELEM_Y),
790                             SRC1_NEG(0),
791                             INDEX_MODE(SQ_INDEX_LOOP),
792                             PRED_SEL(SQ_PRED_SEL_OFF),
793                             LAST(0));
794    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
795                                 SRC2_REL(ABSOLUTE),
796                                 SRC2_ELEM(ELEM_X),
797                                 SRC2_NEG(0),
798                                 ALU_INST(SQ_OP3_INST_MULADD),
799                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
800                                 DST_GPR(2),
801                                 DST_REL(ABSOLUTE),
802                                 DST_ELEM(ELEM_X),
803                                 CLAMP(0));
804    /* r2.y = MAD(c1.y, r1.y, pv.y) */
805    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
806                             SRC0_REL(ABSOLUTE),
807                             SRC0_ELEM(ELEM_Y),
808                             SRC0_NEG(0),
809                             SRC1_SEL(1),
810                             SRC1_REL(ABSOLUTE),
811                             SRC1_ELEM(ELEM_Y),
812                             SRC1_NEG(0),
813                             INDEX_MODE(SQ_INDEX_LOOP),
814                             PRED_SEL(SQ_PRED_SEL_OFF),
815                             LAST(0));
816    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
817                                 SRC2_REL(ABSOLUTE),
818                                 SRC2_ELEM(ELEM_Y),
819                                 SRC2_NEG(0),
820                                 ALU_INST(SQ_OP3_INST_MULADD),
821                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
822                                 DST_GPR(2),
823                                 DST_REL(ABSOLUTE),
824                                 DST_ELEM(ELEM_Y),
825                                 CLAMP(0));
826    /* r2.z = MAD(c1.z, r1.y, pv.z) */
827    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
828                             SRC0_REL(ABSOLUTE),
829                             SRC0_ELEM(ELEM_Z),
830                             SRC0_NEG(0),
831                             SRC1_SEL(1),
832                             SRC1_REL(ABSOLUTE),
833                             SRC1_ELEM(ELEM_Y),
834                             SRC1_NEG(0),
835                             INDEX_MODE(SQ_INDEX_LOOP),
836                             PRED_SEL(SQ_PRED_SEL_OFF),
837                             LAST(0));
838    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
839                                 SRC2_REL(ABSOLUTE),
840                                 SRC2_ELEM(ELEM_Z),
841                                 SRC2_NEG(0),
842                                 ALU_INST(SQ_OP3_INST_MULADD),
843                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
844                                 DST_GPR(2),
845                                 DST_REL(ABSOLUTE),
846                                 DST_ELEM(ELEM_Z),
847                                 CLAMP(0));
848    /* r2.w = MAD(0, 0, 1) */
849    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
850                             SRC0_REL(ABSOLUTE),
851                             SRC0_ELEM(ELEM_X),
852                             SRC0_NEG(0),
853                             SRC1_SEL(SQ_ALU_SRC_0),
854                             SRC1_REL(ABSOLUTE),
855                             SRC1_ELEM(ELEM_X),
856                             SRC1_NEG(0),
857                             INDEX_MODE(SQ_INDEX_LOOP),
858                             PRED_SEL(SQ_PRED_SEL_OFF),
859                             LAST(1));
860    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
861                                 SRC2_REL(ABSOLUTE),
862                                 SRC2_ELEM(ELEM_W),
863                                 SRC2_NEG(0),
864                                 ALU_INST(SQ_OP3_INST_MULADD),
865                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
866                                 DST_GPR(2),
867                                 DST_REL(ABSOLUTE),
868                                 DST_ELEM(ELEM_W),
869                                 CLAMP(0));
870    /* 12,13,14,15 */
871    /* r2.x = MAD(c2.x, r1.z, pv.x) */
872    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
873                             SRC0_REL(ABSOLUTE),
874                             SRC0_ELEM(ELEM_X),
875                             SRC0_NEG(0),
876                             SRC1_SEL(1),
877                             SRC1_REL(ABSOLUTE),
878                             SRC1_ELEM(ELEM_Z),
879                             SRC1_NEG(0),
880                             INDEX_MODE(SQ_INDEX_LOOP),
881                             PRED_SEL(SQ_PRED_SEL_OFF),
882                             LAST(0));
883    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
884                                 SRC2_REL(ABSOLUTE),
885                                 SRC2_ELEM(ELEM_X),
886                                 SRC2_NEG(0),
887                                 ALU_INST(SQ_OP3_INST_MULADD),
888                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
889                                 DST_GPR(2),
890                                 DST_REL(ABSOLUTE),
891                                 DST_ELEM(ELEM_X),
892                                 CLAMP(1));
893    /* r2.y = MAD(c2.y, r1.z, pv.y) */
894    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
895                             SRC0_REL(ABSOLUTE),
896                             SRC0_ELEM(ELEM_Y),
897                             SRC0_NEG(0),
898                             SRC1_SEL(1),
899                             SRC1_REL(ABSOLUTE),
900                             SRC1_ELEM(ELEM_Z),
901                             SRC1_NEG(0),
902                             INDEX_MODE(SQ_INDEX_LOOP),
903                             PRED_SEL(SQ_PRED_SEL_OFF),
904                             LAST(0));
905    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
906                                 SRC2_REL(ABSOLUTE),
907                                 SRC2_ELEM(ELEM_Y),
908                                 SRC2_NEG(0),
909                                 ALU_INST(SQ_OP3_INST_MULADD),
910                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
911                                 DST_GPR(2),
912                                 DST_REL(ABSOLUTE),
913                                 DST_ELEM(ELEM_Y),
914                                 CLAMP(1));
915    /* r2.z = MAD(c2.z, r1.z, pv.z) */
916    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
917                             SRC0_REL(ABSOLUTE),
918                             SRC0_ELEM(ELEM_Z),
919                             SRC0_NEG(0),
920                             SRC1_SEL(1),
921                             SRC1_REL(ABSOLUTE),
922                             SRC1_ELEM(ELEM_Z),
923                             SRC1_NEG(0),
924                             INDEX_MODE(SQ_INDEX_LOOP),
925                             PRED_SEL(SQ_PRED_SEL_OFF),
926                             LAST(0));
927    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
928                                 SRC2_REL(ABSOLUTE),
929                                 SRC2_ELEM(ELEM_Z),
930                                 SRC2_NEG(0),
931                                 ALU_INST(SQ_OP3_INST_MULADD),
932                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
933                                 DST_GPR(2),
934                                 DST_REL(ABSOLUTE),
935                                 DST_ELEM(ELEM_Z),
936                                 CLAMP(1));
937    /* r2.w = MAD(0, 0, 1) */
938    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
939                             SRC0_REL(ABSOLUTE),
940                             SRC0_ELEM(ELEM_X),
941                             SRC0_NEG(0),
942                             SRC1_SEL(SQ_ALU_SRC_0),
943                             SRC1_REL(ABSOLUTE),
944                             SRC1_ELEM(ELEM_X),
945                             SRC1_NEG(0),
946                             INDEX_MODE(SQ_INDEX_LOOP),
947                             PRED_SEL(SQ_PRED_SEL_OFF),
948                             LAST(1));
949    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
950                                 SRC2_REL(ABSOLUTE),
951                                 SRC2_ELEM(ELEM_X),
952                                 SRC2_NEG(0),
953                                 ALU_INST(SQ_OP3_INST_MULADD),
954                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
955                                 DST_GPR(2),
956                                 DST_REL(ABSOLUTE),
957                                 DST_ELEM(ELEM_W),
958                                 CLAMP(1));
959
960    /* 16 */
961    shader[i++] = CF_DWORD0(ADDR(18));
962    shader[i++] = CF_DWORD1(POP_COUNT(0),
963                            CF_CONST(0),
964                            COND(SQ_CF_COND_ACTIVE),
965                            I_COUNT(3),
966                            CALL_COUNT(0),
967                            END_OF_PROGRAM(0),
968                            VALID_PIXEL_MODE(0),
969                            CF_INST(SQ_CF_INST_TEX),
970                            WHOLE_QUAD_MODE(0),
971                            BARRIER(1));
972    /* 17 */
973    shader[i++] = CF_DWORD0(ADDR(0));
974    shader[i++] = CF_DWORD1(POP_COUNT(0),
975			    CF_CONST(0),
976			    COND(SQ_CF_COND_ACTIVE),
977			    I_COUNT(0),
978			    CALL_COUNT(0),
979			    END_OF_PROGRAM(0),
980			    VALID_PIXEL_MODE(0),
981			    CF_INST(SQ_CF_INST_RETURN),
982			    WHOLE_QUAD_MODE(0),
983			    BARRIER(1));
984    /* 18/19 */
985    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
986                             BC_FRAC_MODE(0),
987                             FETCH_WHOLE_QUAD(0),
988                             RESOURCE_ID(0),
989                             SRC_GPR(0),
990                             SRC_REL(ABSOLUTE),
991                             R7xx_ALT_CONST(0));
992    shader[i++] = TEX_DWORD1(DST_GPR(1),
993                             DST_REL(ABSOLUTE),
994                             DST_SEL_X(SQ_SEL_X),
995                             DST_SEL_Y(SQ_SEL_MASK),
996                             DST_SEL_Z(SQ_SEL_MASK),
997                             DST_SEL_W(SQ_SEL_1),
998                             LOD_BIAS(0),
999                             COORD_TYPE_X(TEX_NORMALIZED),
1000                             COORD_TYPE_Y(TEX_NORMALIZED),
1001                             COORD_TYPE_Z(TEX_NORMALIZED),
1002                             COORD_TYPE_W(TEX_NORMALIZED));
1003    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1004                             OFFSET_Y(0),
1005                             OFFSET_Z(0),
1006                             SAMPLER_ID(0),
1007                             SRC_SEL_X(SQ_SEL_X),
1008                             SRC_SEL_Y(SQ_SEL_Y),
1009                             SRC_SEL_Z(SQ_SEL_0),
1010                             SRC_SEL_W(SQ_SEL_1));
1011    shader[i++] = TEX_DWORD_PAD;
1012    /* 20/21 */
1013    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1014                             BC_FRAC_MODE(0),
1015                             FETCH_WHOLE_QUAD(0),
1016                             RESOURCE_ID(1),
1017                             SRC_GPR(0),
1018                             SRC_REL(ABSOLUTE),
1019                             R7xx_ALT_CONST(0));
1020    shader[i++] = TEX_DWORD1(DST_GPR(1),
1021                             DST_REL(ABSOLUTE),
1022                             DST_SEL_X(SQ_SEL_MASK),
1023                             DST_SEL_Y(SQ_SEL_MASK),
1024                             DST_SEL_Z(SQ_SEL_X),
1025                             DST_SEL_W(SQ_SEL_MASK),
1026                             LOD_BIAS(0),
1027                             COORD_TYPE_X(TEX_NORMALIZED),
1028                             COORD_TYPE_Y(TEX_NORMALIZED),
1029                             COORD_TYPE_Z(TEX_NORMALIZED),
1030                             COORD_TYPE_W(TEX_NORMALIZED));
1031    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1032                             OFFSET_Y(0),
1033                             OFFSET_Z(0),
1034                             SAMPLER_ID(1),
1035                             SRC_SEL_X(SQ_SEL_X),
1036                             SRC_SEL_Y(SQ_SEL_Y),
1037                             SRC_SEL_Z(SQ_SEL_0),
1038                             SRC_SEL_W(SQ_SEL_1));
1039    shader[i++] = TEX_DWORD_PAD;
1040    /* 22/23 */
1041    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1042                             BC_FRAC_MODE(0),
1043                             FETCH_WHOLE_QUAD(0),
1044                             RESOURCE_ID(2),
1045                             SRC_GPR(0),
1046                             SRC_REL(ABSOLUTE),
1047                             R7xx_ALT_CONST(0));
1048    shader[i++] = TEX_DWORD1(DST_GPR(1),
1049                             DST_REL(ABSOLUTE),
1050                             DST_SEL_X(SQ_SEL_MASK),
1051                             DST_SEL_Y(SQ_SEL_X),
1052                             DST_SEL_Z(SQ_SEL_MASK),
1053                             DST_SEL_W(SQ_SEL_MASK),
1054                             LOD_BIAS(0),
1055                             COORD_TYPE_X(TEX_NORMALIZED),
1056                             COORD_TYPE_Y(TEX_NORMALIZED),
1057                             COORD_TYPE_Z(TEX_NORMALIZED),
1058                             COORD_TYPE_W(TEX_NORMALIZED));
1059    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1060                             OFFSET_Y(0),
1061                             OFFSET_Z(0),
1062                             SAMPLER_ID(2),
1063                             SRC_SEL_X(SQ_SEL_X),
1064                             SRC_SEL_Y(SQ_SEL_Y),
1065                             SRC_SEL_Z(SQ_SEL_0),
1066                             SRC_SEL_W(SQ_SEL_1));
1067    shader[i++] = TEX_DWORD_PAD;
1068    /* 24 */
1069    shader[i++] = CF_DWORD0(ADDR(26));
1070    shader[i++] = CF_DWORD1(POP_COUNT(0),
1071                            CF_CONST(0),
1072                            COND(SQ_CF_COND_ACTIVE),
1073                            I_COUNT(2),
1074                            CALL_COUNT(0),
1075                            END_OF_PROGRAM(0),
1076                            VALID_PIXEL_MODE(0),
1077                            CF_INST(SQ_CF_INST_TEX),
1078                            WHOLE_QUAD_MODE(0),
1079                            BARRIER(1));
1080    /* 25 */
1081    shader[i++] = CF_DWORD0(ADDR(0));
1082    shader[i++] = CF_DWORD1(POP_COUNT(0),
1083			    CF_CONST(0),
1084			    COND(SQ_CF_COND_ACTIVE),
1085			    I_COUNT(0),
1086			    CALL_COUNT(0),
1087			    END_OF_PROGRAM(0),
1088			    VALID_PIXEL_MODE(0),
1089			    CF_INST(SQ_CF_INST_RETURN),
1090			    WHOLE_QUAD_MODE(0),
1091			    BARRIER(1));
1092    /* 26/27 */
1093    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1094                             BC_FRAC_MODE(0),
1095                             FETCH_WHOLE_QUAD(0),
1096                             RESOURCE_ID(0),
1097                             SRC_GPR(0),
1098                             SRC_REL(ABSOLUTE),
1099                             R7xx_ALT_CONST(0));
1100    shader[i++] = TEX_DWORD1(DST_GPR(1),
1101                             DST_REL(ABSOLUTE),
1102                             DST_SEL_X(SQ_SEL_X),
1103                             DST_SEL_Y(SQ_SEL_MASK),
1104                             DST_SEL_Z(SQ_SEL_MASK),
1105                             DST_SEL_W(SQ_SEL_1),
1106                             LOD_BIAS(0),
1107                             COORD_TYPE_X(TEX_NORMALIZED),
1108                             COORD_TYPE_Y(TEX_NORMALIZED),
1109                             COORD_TYPE_Z(TEX_NORMALIZED),
1110                             COORD_TYPE_W(TEX_NORMALIZED));
1111    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1112                             OFFSET_Y(0),
1113                             OFFSET_Z(0),
1114                             SAMPLER_ID(0),
1115                             SRC_SEL_X(SQ_SEL_X),
1116                             SRC_SEL_Y(SQ_SEL_Y),
1117                             SRC_SEL_Z(SQ_SEL_0),
1118                             SRC_SEL_W(SQ_SEL_1));
1119    shader[i++] = TEX_DWORD_PAD;
1120    /* 28/29 */
1121    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1122                             BC_FRAC_MODE(0),
1123                             FETCH_WHOLE_QUAD(0),
1124                             RESOURCE_ID(1),
1125                             SRC_GPR(0),
1126                             SRC_REL(ABSOLUTE),
1127                             R7xx_ALT_CONST(0));
1128    shader[i++] = TEX_DWORD1(DST_GPR(1),
1129                             DST_REL(ABSOLUTE),
1130                             DST_SEL_X(SQ_SEL_MASK),
1131                             DST_SEL_Y(SQ_SEL_X),
1132                             DST_SEL_Z(SQ_SEL_Y),
1133                             DST_SEL_W(SQ_SEL_MASK),
1134                             LOD_BIAS(0),
1135                             COORD_TYPE_X(TEX_NORMALIZED),
1136                             COORD_TYPE_Y(TEX_NORMALIZED),
1137                             COORD_TYPE_Z(TEX_NORMALIZED),
1138                             COORD_TYPE_W(TEX_NORMALIZED));
1139    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1140                             OFFSET_Y(0),
1141                             OFFSET_Z(0),
1142                             SAMPLER_ID(1),
1143                             SRC_SEL_X(SQ_SEL_X),
1144                             SRC_SEL_Y(SQ_SEL_Y),
1145                             SRC_SEL_Z(SQ_SEL_0),
1146                             SRC_SEL_W(SQ_SEL_1));
1147    shader[i++] = TEX_DWORD_PAD;
1148
1149    return i;
1150}
1151
1152/* comp mask ps --------------------------------------- */
1153int R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* shader)
1154{
1155    int i = 0;
1156
1157    /* 0 */
1158    shader[i++] = CF_DWORD0(ADDR(8));
1159    shader[i++] = CF_DWORD1(POP_COUNT(0),
1160			    CF_CONST(0),
1161			    COND(SQ_CF_COND_ACTIVE),
1162			    I_COUNT(2),
1163			    CALL_COUNT(0),
1164			    END_OF_PROGRAM(0),
1165			    VALID_PIXEL_MODE(0),
1166			    CF_INST(SQ_CF_INST_TEX),
1167			    WHOLE_QUAD_MODE(0),
1168			    BARRIER(1));
1169
1170    /* 1 */
1171    shader[i++] = CF_ALU_DWORD0(ADDR(3),
1172				KCACHE_BANK0(0),
1173				KCACHE_BANK1(0),
1174				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1175    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1176				KCACHE_ADDR0(0),
1177				KCACHE_ADDR1(0),
1178				I_COUNT(4),
1179				USES_WATERFALL(0),
1180				CF_INST(SQ_CF_INST_ALU),
1181				WHOLE_QUAD_MODE(0),
1182				BARRIER(1));
1183
1184    /* 2 */
1185    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
1186					  TYPE(SQ_EXPORT_PIXEL),
1187					  RW_GPR(2),
1188					  RW_REL(ABSOLUTE),
1189					  INDEX_GPR(0),
1190					  ELEM_SIZE(1));
1191
1192    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1193					       SRC_SEL_Y(SQ_SEL_Y),
1194					       SRC_SEL_Z(SQ_SEL_Z),
1195					       SRC_SEL_W(SQ_SEL_W),
1196					       R6xx_ELEM_LOOP(0),
1197					       BURST_COUNT(1),
1198					       END_OF_PROGRAM(1),
1199					       VALID_PIXEL_MODE(0),
1200					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1201					       WHOLE_QUAD_MODE(0),
1202					       BARRIER(1));
1203
1204    /* 3 - alu 0 */
1205    /* MUL gpr[2].x gpr[1].x gpr[0].x */
1206    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1207			     SRC0_REL(ABSOLUTE),
1208			     SRC0_ELEM(ELEM_X),
1209			     SRC0_NEG(0),
1210			     SRC1_SEL(0),
1211			     SRC1_REL(ABSOLUTE),
1212			     SRC1_ELEM(ELEM_X),
1213			     SRC1_NEG(0),
1214			     INDEX_MODE(SQ_INDEX_LOOP),
1215			     PRED_SEL(SQ_PRED_SEL_OFF),
1216			     LAST(0));
1217    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1218				 SRC0_ABS(0),
1219				 SRC1_ABS(0),
1220				 UPDATE_EXECUTE_MASK(0),
1221				 UPDATE_PRED(0),
1222				 WRITE_MASK(1),
1223				 FOG_MERGE(0),
1224				 OMOD(SQ_ALU_OMOD_OFF),
1225				 ALU_INST(SQ_OP2_INST_MUL),
1226				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1227				 DST_GPR(2),
1228				 DST_REL(ABSOLUTE),
1229				 DST_ELEM(ELEM_X),
1230				 CLAMP(1));
1231    /* 4 - alu 1 */
1232    /* MUL gpr[2].y gpr[1].y gpr[0].y */
1233    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1234			     SRC0_REL(ABSOLUTE),
1235			     SRC0_ELEM(ELEM_Y),
1236			     SRC0_NEG(0),
1237			     SRC1_SEL(0),
1238			     SRC1_REL(ABSOLUTE),
1239			     SRC1_ELEM(ELEM_Y),
1240			     SRC1_NEG(0),
1241			     INDEX_MODE(SQ_INDEX_LOOP),
1242			     PRED_SEL(SQ_PRED_SEL_OFF),
1243			     LAST(0));
1244    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1245				 SRC0_ABS(0),
1246				 SRC1_ABS(0),
1247				 UPDATE_EXECUTE_MASK(0),
1248				 UPDATE_PRED(0),
1249				 WRITE_MASK(1),
1250				 FOG_MERGE(0),
1251				 OMOD(SQ_ALU_OMOD_OFF),
1252				 ALU_INST(SQ_OP2_INST_MUL),
1253				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1254				 DST_GPR(2),
1255				 DST_REL(ABSOLUTE),
1256				 DST_ELEM(ELEM_Y),
1257				 CLAMP(1));
1258    /* 5 - alu 2 */
1259    /* MUL gpr[2].z gpr[1].z gpr[0].z */
1260    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1261			     SRC0_REL(ABSOLUTE),
1262			     SRC0_ELEM(ELEM_Z),
1263			     SRC0_NEG(0),
1264			     SRC1_SEL(0),
1265			     SRC1_REL(ABSOLUTE),
1266			     SRC1_ELEM(ELEM_Z),
1267			     SRC1_NEG(0),
1268			     INDEX_MODE(SQ_INDEX_LOOP),
1269			     PRED_SEL(SQ_PRED_SEL_OFF),
1270			     LAST(0));
1271    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1272				 SRC0_ABS(0),
1273				 SRC1_ABS(0),
1274				 UPDATE_EXECUTE_MASK(0),
1275				 UPDATE_PRED(0),
1276				 WRITE_MASK(1),
1277				 FOG_MERGE(0),
1278				 OMOD(SQ_ALU_OMOD_OFF),
1279				 ALU_INST(SQ_OP2_INST_MUL),
1280				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1281				 DST_GPR(2),
1282				 DST_REL(ABSOLUTE),
1283				 DST_ELEM(ELEM_Z),
1284				 CLAMP(1));
1285    /* 6 - alu 3 */
1286    /* MUL gpr[2].w gpr[1].w gpr[0].w */
1287    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1288			     SRC0_REL(ABSOLUTE),
1289			     SRC0_ELEM(ELEM_W),
1290			     SRC0_NEG(0),
1291			     SRC1_SEL(0),
1292			     SRC1_REL(ABSOLUTE),
1293			     SRC1_ELEM(ELEM_W),
1294			     SRC1_NEG(0),
1295			     INDEX_MODE(SQ_INDEX_LOOP),
1296			     PRED_SEL(SQ_PRED_SEL_OFF),
1297			     LAST(1));
1298    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1299				 SRC0_ABS(0),
1300				 SRC1_ABS(0),
1301				 UPDATE_EXECUTE_MASK(0),
1302				 UPDATE_PRED(0),
1303				 WRITE_MASK(1),
1304				 FOG_MERGE(0),
1305				 OMOD(SQ_ALU_OMOD_OFF),
1306				 ALU_INST(SQ_OP2_INST_MUL),
1307				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1308				 DST_GPR(2),
1309				 DST_REL(ABSOLUTE),
1310				 DST_ELEM(ELEM_W),
1311				 CLAMP(1));
1312    /* 7 */
1313    shader[i++] = 0x00000000;
1314    shader[i++] = 0x00000000;
1315
1316    /* 8/9 - src */
1317    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1318			     BC_FRAC_MODE(0),
1319			     FETCH_WHOLE_QUAD(0),
1320			     RESOURCE_ID(0),
1321			     SRC_GPR(0),
1322			     SRC_REL(ABSOLUTE),
1323			     R7xx_ALT_CONST(0));
1324    shader[i++] = TEX_DWORD1(DST_GPR(0),
1325			     DST_REL(ABSOLUTE),
1326			     DST_SEL_X(SQ_SEL_X),
1327			     DST_SEL_Y(SQ_SEL_Y),
1328			     DST_SEL_Z(SQ_SEL_Z),
1329			     DST_SEL_W(SQ_SEL_W),
1330			     LOD_BIAS(0),
1331			     COORD_TYPE_X(TEX_NORMALIZED),
1332			     COORD_TYPE_Y(TEX_NORMALIZED),
1333			     COORD_TYPE_Z(TEX_NORMALIZED),
1334			     COORD_TYPE_W(TEX_NORMALIZED));
1335    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1336			     OFFSET_Y(0),
1337			     OFFSET_Z(0),
1338			     SAMPLER_ID(0),
1339			     SRC_SEL_X(SQ_SEL_X),
1340			     SRC_SEL_Y(SQ_SEL_Y),
1341			     SRC_SEL_Z(SQ_SEL_0),
1342			     SRC_SEL_W(SQ_SEL_1));
1343    shader[i++] = TEX_DWORD_PAD;
1344    /* 10/11 - mask */
1345    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1346			     BC_FRAC_MODE(0),
1347			     FETCH_WHOLE_QUAD(0),
1348			     RESOURCE_ID(1),
1349			     SRC_GPR(1),
1350			     SRC_REL(ABSOLUTE),
1351			     R7xx_ALT_CONST(0));
1352    shader[i++] = TEX_DWORD1(DST_GPR(1),
1353			     DST_REL(ABSOLUTE),
1354			     DST_SEL_X(SQ_SEL_X),
1355			     DST_SEL_Y(SQ_SEL_Y),
1356			     DST_SEL_Z(SQ_SEL_Z),
1357			     DST_SEL_W(SQ_SEL_W),
1358			     LOD_BIAS(0),
1359			     COORD_TYPE_X(TEX_NORMALIZED),
1360			     COORD_TYPE_Y(TEX_NORMALIZED),
1361			     COORD_TYPE_Z(TEX_NORMALIZED),
1362			     COORD_TYPE_W(TEX_NORMALIZED));
1363    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1364			     OFFSET_Y(0),
1365			     OFFSET_Z(0),
1366			     SAMPLER_ID(1),
1367			     SRC_SEL_X(SQ_SEL_X),
1368			     SRC_SEL_Y(SQ_SEL_Y),
1369			     SRC_SEL_Z(SQ_SEL_0),
1370			     SRC_SEL_W(SQ_SEL_1));
1371    shader[i++] = TEX_DWORD_PAD;
1372
1373    return i;
1374}
1375
1376/* comp vs --------------------------------------- */
1377int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1378{
1379    int i = 0;
1380
1381    /* 0 */
1382    shader[i++] = CF_DWORD0(ADDR(3));
1383    shader[i++] = CF_DWORD1(POP_COUNT(0),
1384                            CF_CONST(0),
1385                            COND(SQ_CF_COND_BOOL),
1386                            I_COUNT(0),
1387                            CALL_COUNT(0),
1388                            END_OF_PROGRAM(0),
1389                            VALID_PIXEL_MODE(0),
1390                            CF_INST(SQ_CF_INST_CALL),
1391                            WHOLE_QUAD_MODE(0),
1392                            BARRIER(0));
1393    /* 1 */
1394    shader[i++] = CF_DWORD0(ADDR(28));
1395    shader[i++] = CF_DWORD1(POP_COUNT(0),
1396                            CF_CONST(0),
1397                            COND(SQ_CF_COND_NOT_BOOL),
1398                            I_COUNT(0),
1399                            CALL_COUNT(0),
1400                            END_OF_PROGRAM(0),
1401                            VALID_PIXEL_MODE(0),
1402                            CF_INST(SQ_CF_INST_CALL),
1403                            WHOLE_QUAD_MODE(0),
1404                            BARRIER(0));
1405    /* 2 */
1406    shader[i++] = CF_DWORD0(ADDR(0));
1407    shader[i++] = CF_DWORD1(POP_COUNT(0),
1408                            CF_CONST(0),
1409                            COND(SQ_CF_COND_ACTIVE),
1410                            I_COUNT(0),
1411                            CALL_COUNT(0),
1412                            END_OF_PROGRAM(1),
1413                            VALID_PIXEL_MODE(0),
1414                            CF_INST(SQ_CF_INST_NOP),
1415                            WHOLE_QUAD_MODE(0),
1416                            BARRIER(1));
1417    /* 3 - mask sub */
1418    shader[i++] = CF_DWORD0(ADDR(22));
1419    shader[i++] = CF_DWORD1(POP_COUNT(0),
1420			    CF_CONST(0),
1421			    COND(SQ_CF_COND_ACTIVE),
1422			    I_COUNT(3),
1423			    CALL_COUNT(0),
1424			    END_OF_PROGRAM(0),
1425			    VALID_PIXEL_MODE(0),
1426			    CF_INST(SQ_CF_INST_VTX),
1427			    WHOLE_QUAD_MODE(0),
1428			    BARRIER(1));
1429
1430    /* 4 - ALU */
1431    shader[i++] = CF_ALU_DWORD0(ADDR(9),
1432				KCACHE_BANK0(0),
1433				KCACHE_BANK1(0),
1434				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1435    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1436				KCACHE_ADDR0(0),
1437				KCACHE_ADDR1(0),
1438				I_COUNT(12),
1439				USES_WATERFALL(0),
1440				CF_INST(SQ_CF_INST_ALU),
1441				WHOLE_QUAD_MODE(0),
1442				BARRIER(1));
1443
1444    /* 5 - dst */
1445    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1446					  TYPE(SQ_EXPORT_POS),
1447					  RW_GPR(2),
1448					  RW_REL(ABSOLUTE),
1449					  INDEX_GPR(0),
1450					  ELEM_SIZE(0));
1451    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1452					       SRC_SEL_Y(SQ_SEL_Y),
1453					       SRC_SEL_Z(SQ_SEL_0),
1454					       SRC_SEL_W(SQ_SEL_1),
1455					       R6xx_ELEM_LOOP(0),
1456					       BURST_COUNT(1),
1457					       END_OF_PROGRAM(0),
1458					       VALID_PIXEL_MODE(0),
1459					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1460					       WHOLE_QUAD_MODE(0),
1461					       BARRIER(1));
1462    /* 6 - src */
1463    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1464					  TYPE(SQ_EXPORT_PARAM),
1465					  RW_GPR(1),
1466					  RW_REL(ABSOLUTE),
1467					  INDEX_GPR(0),
1468					  ELEM_SIZE(0));
1469    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1470					       SRC_SEL_Y(SQ_SEL_Y),
1471					       SRC_SEL_Z(SQ_SEL_0),
1472					       SRC_SEL_W(SQ_SEL_1),
1473					       R6xx_ELEM_LOOP(0),
1474					       BURST_COUNT(1),
1475					       END_OF_PROGRAM(0),
1476					       VALID_PIXEL_MODE(0),
1477					       CF_INST(SQ_CF_INST_EXPORT),
1478					       WHOLE_QUAD_MODE(0),
1479					       BARRIER(0));
1480    /* 7 - mask */
1481    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1482					  TYPE(SQ_EXPORT_PARAM),
1483					  RW_GPR(0),
1484					  RW_REL(ABSOLUTE),
1485					  INDEX_GPR(0),
1486					  ELEM_SIZE(0));
1487    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1488					       SRC_SEL_Y(SQ_SEL_Y),
1489					       SRC_SEL_Z(SQ_SEL_0),
1490					       SRC_SEL_W(SQ_SEL_1),
1491					       R6xx_ELEM_LOOP(0),
1492					       BURST_COUNT(1),
1493					       END_OF_PROGRAM(0),
1494					       VALID_PIXEL_MODE(0),
1495					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1496					       WHOLE_QUAD_MODE(0),
1497					       BARRIER(0));
1498    /* 8 */
1499    shader[i++] = CF_DWORD0(ADDR(0));
1500    shader[i++] = CF_DWORD1(POP_COUNT(0),
1501			    CF_CONST(0),
1502			    COND(SQ_CF_COND_ACTIVE),
1503			    I_COUNT(0),
1504			    CALL_COUNT(0),
1505			    END_OF_PROGRAM(0),
1506			    VALID_PIXEL_MODE(0),
1507			    CF_INST(SQ_CF_INST_RETURN),
1508			    WHOLE_QUAD_MODE(0),
1509			    BARRIER(1));
1510
1511
1512    /* 9 srcX MAD */
1513    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
1514                             SRC0_REL(ABSOLUTE),
1515                             SRC0_ELEM(ELEM_Y),
1516                             SRC0_NEG(0),
1517                             SRC1_SEL(1),
1518                             SRC1_REL(ABSOLUTE),
1519                             SRC1_ELEM(ELEM_Y),
1520                             SRC1_NEG(0),
1521                             INDEX_MODE(SQ_INDEX_LOOP),
1522                             PRED_SEL(SQ_PRED_SEL_OFF),
1523                             LAST(1));
1524    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
1525                                 SRC2_REL(ABSOLUTE),
1526                                 SRC2_ELEM(ELEM_Z),
1527                                 SRC2_NEG(0),
1528                                 ALU_INST(SQ_OP3_INST_MULADD),
1529                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1530                                 DST_GPR(1),
1531                                 DST_REL(ABSOLUTE),
1532                                 DST_ELEM(ELEM_Z),
1533                                 CLAMP(0));
1534    /* 10 srcY MAD */
1535    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
1536                             SRC0_REL(ABSOLUTE),
1537                             SRC0_ELEM(ELEM_Y),
1538                             SRC0_NEG(0),
1539                             SRC1_SEL(1),
1540                             SRC1_REL(ABSOLUTE),
1541                             SRC1_ELEM(ELEM_Y),
1542                             SRC1_NEG(0),
1543                             INDEX_MODE(SQ_INDEX_LOOP),
1544                             PRED_SEL(SQ_PRED_SEL_OFF),
1545                             LAST(1));
1546    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257),
1547                                 SRC2_REL(ABSOLUTE),
1548                                 SRC2_ELEM(ELEM_Z),
1549                                 SRC2_NEG(0),
1550                                 ALU_INST(SQ_OP3_INST_MULADD),
1551                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1552                                 DST_GPR(1),
1553                                 DST_REL(ABSOLUTE),
1554                                 DST_ELEM(ELEM_W),
1555                                 CLAMP(0));
1556
1557    /* 11 srcX MAD */
1558    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
1559                             SRC0_REL(ABSOLUTE),
1560                             SRC0_ELEM(ELEM_X),
1561                             SRC0_NEG(0),
1562                             SRC1_SEL(1),
1563                             SRC1_REL(ABSOLUTE),
1564                             SRC1_ELEM(ELEM_X),
1565                             SRC1_NEG(0),
1566                             INDEX_MODE(SQ_INDEX_LOOP),
1567                             PRED_SEL(SQ_PRED_SEL_OFF),
1568                             LAST(0));
1569    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1),
1570                                 SRC2_REL(ABSOLUTE),
1571                                 SRC2_ELEM(ELEM_Z),
1572                                 SRC2_NEG(0),
1573                                 ALU_INST(SQ_OP3_INST_MULADD),
1574                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1575                                 DST_GPR(1),
1576                                 DST_REL(ABSOLUTE),
1577                                 DST_ELEM(ELEM_X),
1578                                 CLAMP(0));
1579    /* 12 srcY MAD */
1580    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
1581                             SRC0_REL(ABSOLUTE),
1582                             SRC0_ELEM(ELEM_X),
1583                             SRC0_NEG(0),
1584                             SRC1_SEL(1),
1585                             SRC1_REL(ABSOLUTE),
1586                             SRC1_ELEM(ELEM_X),
1587                             SRC1_NEG(0),
1588                             INDEX_MODE(SQ_INDEX_LOOP),
1589                             PRED_SEL(SQ_PRED_SEL_OFF),
1590                             LAST(1));
1591    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1),
1592                                 SRC2_REL(ABSOLUTE),
1593                                 SRC2_ELEM(ELEM_W),
1594                                 SRC2_NEG(0),
1595                                 ALU_INST(SQ_OP3_INST_MULADD),
1596                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1597                                 DST_GPR(1),
1598                                 DST_REL(ABSOLUTE),
1599                                 DST_ELEM(ELEM_Y),
1600                                 CLAMP(0));
1601
1602    /* 13 maskX MAD */
1603    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
1604                             SRC0_REL(ABSOLUTE),
1605                             SRC0_ELEM(ELEM_Y),
1606                             SRC0_NEG(0),
1607                             SRC1_SEL(0),
1608                             SRC1_REL(ABSOLUTE),
1609                             SRC1_ELEM(ELEM_Y),
1610                             SRC1_NEG(0),
1611                             INDEX_MODE(SQ_INDEX_LOOP),
1612                             PRED_SEL(SQ_PRED_SEL_OFF),
1613                             LAST(1));
1614    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(258),
1615                                 SRC2_REL(ABSOLUTE),
1616                                 SRC2_ELEM(ELEM_Z),
1617                                 SRC2_NEG(0),
1618                                 ALU_INST(SQ_OP3_INST_MULADD),
1619                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1620                                 DST_GPR(0),
1621                                 DST_REL(ABSOLUTE),
1622                                 DST_ELEM(ELEM_Z),
1623                                 CLAMP(0));
1624
1625    /* 14 maskY MAD */
1626    shader[i++] = ALU_DWORD0(SRC0_SEL(259),
1627                             SRC0_REL(ABSOLUTE),
1628                             SRC0_ELEM(ELEM_Y),
1629                             SRC0_NEG(0),
1630                             SRC1_SEL(0),
1631                             SRC1_REL(ABSOLUTE),
1632                             SRC1_ELEM(ELEM_Y),
1633                             SRC1_NEG(0),
1634                             INDEX_MODE(SQ_INDEX_LOOP),
1635                             PRED_SEL(SQ_PRED_SEL_OFF),
1636                             LAST(1));
1637    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
1638                                 SRC2_REL(ABSOLUTE),
1639                                 SRC2_ELEM(ELEM_Z),
1640                                 SRC2_NEG(0),
1641                                 ALU_INST(SQ_OP3_INST_MULADD),
1642                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1643                                 DST_GPR(0),
1644                                 DST_REL(ABSOLUTE),
1645                                 DST_ELEM(ELEM_W),
1646                                 CLAMP(0));
1647
1648    /* 15 srcX MAD */
1649    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
1650                             SRC0_REL(ABSOLUTE),
1651                             SRC0_ELEM(ELEM_X),
1652                             SRC0_NEG(0),
1653                             SRC1_SEL(0),
1654                             SRC1_REL(ABSOLUTE),
1655                             SRC1_ELEM(ELEM_X),
1656                             SRC1_NEG(0),
1657                             INDEX_MODE(SQ_INDEX_LOOP),
1658                             PRED_SEL(SQ_PRED_SEL_OFF),
1659                             LAST(0));
1660    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
1661                                 SRC2_REL(ABSOLUTE),
1662                                 SRC2_ELEM(ELEM_Z),
1663                                 SRC2_NEG(0),
1664                                 ALU_INST(SQ_OP3_INST_MULADD),
1665                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1666                                 DST_GPR(0),
1667                                 DST_REL(ABSOLUTE),
1668                                 DST_ELEM(ELEM_X),
1669                                 CLAMP(0));
1670    /* 16 srcY MAD */
1671    shader[i++] = ALU_DWORD0(SRC0_SEL(259),
1672                             SRC0_REL(ABSOLUTE),
1673                             SRC0_ELEM(ELEM_X),
1674                             SRC0_NEG(0),
1675                             SRC1_SEL(0),
1676                             SRC1_REL(ABSOLUTE),
1677                             SRC1_ELEM(ELEM_X),
1678                             SRC1_NEG(0),
1679                             INDEX_MODE(SQ_INDEX_LOOP),
1680                             PRED_SEL(SQ_PRED_SEL_OFF),
1681                             LAST(1));
1682    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
1683                                 SRC2_REL(ABSOLUTE),
1684                                 SRC2_ELEM(ELEM_W),
1685                                 SRC2_NEG(0),
1686                                 ALU_INST(SQ_OP3_INST_MULADD),
1687                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1688                                 DST_GPR(0),
1689                                 DST_REL(ABSOLUTE),
1690                                 DST_ELEM(ELEM_Y),
1691                                 CLAMP(0));
1692
1693    /* 17 srcX / w */
1694    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1695                             SRC0_REL(ABSOLUTE),
1696                             SRC0_ELEM(ELEM_X),
1697                             SRC0_NEG(0),
1698                             SRC1_SEL(256),
1699                             SRC1_REL(ABSOLUTE),
1700                             SRC1_ELEM(ELEM_W),
1701                             SRC1_NEG(0),
1702                             INDEX_MODE(SQ_INDEX_AR_X),
1703                             PRED_SEL(SQ_PRED_SEL_OFF),
1704                             LAST(1));
1705    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1706                                 SRC0_ABS(0),
1707                                 SRC1_ABS(0),
1708                                 UPDATE_EXECUTE_MASK(0),
1709                                 UPDATE_PRED(0),
1710                                 WRITE_MASK(1),
1711                                 FOG_MERGE(0),
1712                                 OMOD(SQ_ALU_OMOD_OFF),
1713                                 ALU_INST(SQ_OP2_INST_MUL),
1714                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1715                                 DST_GPR(1),
1716                                 DST_REL(ABSOLUTE),
1717                                 DST_ELEM(ELEM_X),
1718                                 CLAMP(0));
1719
1720    /* 18 srcY / h */
1721    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1722                             SRC0_REL(ABSOLUTE),
1723                             SRC0_ELEM(ELEM_Y),
1724                             SRC0_NEG(0),
1725                             SRC1_SEL(257),
1726                             SRC1_REL(ABSOLUTE),
1727                             SRC1_ELEM(ELEM_W),
1728                             SRC1_NEG(0),
1729                             INDEX_MODE(SQ_INDEX_AR_X),
1730                             PRED_SEL(SQ_PRED_SEL_OFF),
1731                             LAST(1));
1732    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1733                                 SRC0_ABS(0),
1734                                 SRC1_ABS(0),
1735                                 UPDATE_EXECUTE_MASK(0),
1736                                 UPDATE_PRED(0),
1737                                 WRITE_MASK(1),
1738                                 FOG_MERGE(0),
1739                                 OMOD(SQ_ALU_OMOD_OFF),
1740                                 ALU_INST(SQ_OP2_INST_MUL),
1741                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1742                                 DST_GPR(1),
1743                                 DST_REL(ABSOLUTE),
1744                                 DST_ELEM(ELEM_Y),
1745                                 CLAMP(0));
1746
1747    /* 19 maskX / w */
1748    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
1749                             SRC0_REL(ABSOLUTE),
1750                             SRC0_ELEM(ELEM_X),
1751                             SRC0_NEG(0),
1752                             SRC1_SEL(258),
1753                             SRC1_REL(ABSOLUTE),
1754                             SRC1_ELEM(ELEM_W),
1755                             SRC1_NEG(0),
1756                             INDEX_MODE(SQ_INDEX_AR_X),
1757                             PRED_SEL(SQ_PRED_SEL_OFF),
1758                             LAST(1));
1759    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1760                                 SRC0_ABS(0),
1761                                 SRC1_ABS(0),
1762                                 UPDATE_EXECUTE_MASK(0),
1763                                 UPDATE_PRED(0),
1764                                 WRITE_MASK(1),
1765                                 FOG_MERGE(0),
1766                                 OMOD(SQ_ALU_OMOD_OFF),
1767                                 ALU_INST(SQ_OP2_INST_MUL),
1768                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1769                                 DST_GPR(0),
1770                                 DST_REL(ABSOLUTE),
1771                                 DST_ELEM(ELEM_X),
1772                                 CLAMP(0));
1773
1774    /* 20 maskY / h */
1775    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
1776                             SRC0_REL(ABSOLUTE),
1777                             SRC0_ELEM(ELEM_Y),
1778                             SRC0_NEG(0),
1779                             SRC1_SEL(259),
1780                             SRC1_REL(ABSOLUTE),
1781                             SRC1_ELEM(ELEM_W),
1782                             SRC1_NEG(0),
1783                             INDEX_MODE(SQ_INDEX_AR_X),
1784                             PRED_SEL(SQ_PRED_SEL_OFF),
1785                             LAST(1));
1786    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1787                                 SRC0_ABS(0),
1788                                 SRC1_ABS(0),
1789                                 UPDATE_EXECUTE_MASK(0),
1790                                 UPDATE_PRED(0),
1791                                 WRITE_MASK(1),
1792                                 FOG_MERGE(0),
1793                                 OMOD(SQ_ALU_OMOD_OFF),
1794                                 ALU_INST(SQ_OP2_INST_MUL),
1795                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1796                                 DST_GPR(0),
1797                                 DST_REL(ABSOLUTE),
1798                                 DST_ELEM(ELEM_Y),
1799                                 CLAMP(0));
1800    /* 21 */
1801    shader[i++] = 0x00000000;
1802    shader[i++] = 0x00000000;
1803
1804    /* 22/23 - dst */
1805    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1806			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1807			     FETCH_WHOLE_QUAD(0),
1808			     BUFFER_ID(0),
1809			     SRC_GPR(0),
1810			     SRC_REL(ABSOLUTE),
1811			     SRC_SEL_X(SQ_SEL_X),
1812			     MEGA_FETCH_COUNT(24));
1813    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
1814				 DST_REL(0),
1815				 DST_SEL_X(SQ_SEL_X),
1816				 DST_SEL_Y(SQ_SEL_Y),
1817				 DST_SEL_Z(SQ_SEL_0),
1818				 DST_SEL_W(SQ_SEL_1),
1819				 USE_CONST_FIELDS(0),
1820				 DATA_FORMAT(FMT_32_32_FLOAT),
1821				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
1822				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
1823				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1824    shader[i++] = VTX_DWORD2(OFFSET(0),
1825			     ENDIAN_SWAP(ENDIAN_NONE),
1826			     CONST_BUF_NO_STRIDE(0),
1827			     MEGA_FETCH(1));
1828    shader[i++] = VTX_DWORD_PAD;
1829    /* 24/25 - src */
1830    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1831			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1832			     FETCH_WHOLE_QUAD(0),
1833			     BUFFER_ID(0),
1834			     SRC_GPR(0),
1835			     SRC_REL(ABSOLUTE),
1836			     SRC_SEL_X(SQ_SEL_X),
1837			     MEGA_FETCH_COUNT(8));
1838    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
1839				 DST_REL(0),
1840				 DST_SEL_X(SQ_SEL_X),
1841				 DST_SEL_Y(SQ_SEL_Y),
1842				 DST_SEL_Z(SQ_SEL_1),
1843				 DST_SEL_W(SQ_SEL_0),
1844				 USE_CONST_FIELDS(0),
1845				 DATA_FORMAT(FMT_32_32_FLOAT),
1846				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
1847				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
1848				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1849    shader[i++] = VTX_DWORD2(OFFSET(8),
1850			     ENDIAN_SWAP(ENDIAN_NONE),
1851			     CONST_BUF_NO_STRIDE(0),
1852			     MEGA_FETCH(0));
1853    shader[i++] = VTX_DWORD_PAD;
1854    /* 26/27 - mask */
1855    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1856			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1857			     FETCH_WHOLE_QUAD(0),
1858			     BUFFER_ID(0),
1859			     SRC_GPR(0),
1860			     SRC_REL(ABSOLUTE),
1861			     SRC_SEL_X(SQ_SEL_X),
1862			     MEGA_FETCH_COUNT(8));
1863    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
1864				 DST_REL(0),
1865				 DST_SEL_X(SQ_SEL_X),
1866				 DST_SEL_Y(SQ_SEL_Y),
1867				 DST_SEL_Z(SQ_SEL_1),
1868				 DST_SEL_W(SQ_SEL_0),
1869				 USE_CONST_FIELDS(0),
1870				 DATA_FORMAT(FMT_32_32_FLOAT),
1871				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
1872				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
1873				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1874    shader[i++] = VTX_DWORD2(OFFSET(16),
1875			     ENDIAN_SWAP(ENDIAN_NONE),
1876			     CONST_BUF_NO_STRIDE(0),
1877			     MEGA_FETCH(0));
1878    shader[i++] = VTX_DWORD_PAD;
1879
1880    /* 28 - non-mask sub */
1881    shader[i++] = CF_DWORD0(ADDR(40));
1882    shader[i++] = CF_DWORD1(POP_COUNT(0),
1883			    CF_CONST(0),
1884			    COND(SQ_CF_COND_ACTIVE),
1885			    I_COUNT(2),
1886			    CALL_COUNT(0),
1887			    END_OF_PROGRAM(0),
1888			    VALID_PIXEL_MODE(0),
1889			    CF_INST(SQ_CF_INST_VTX),
1890			    WHOLE_QUAD_MODE(0),
1891			    BARRIER(1));
1892
1893    /* 29 - ALU */
1894    shader[i++] = CF_ALU_DWORD0(ADDR(33),
1895				KCACHE_BANK0(0),
1896				KCACHE_BANK1(0),
1897				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1898    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1899				KCACHE_ADDR0(0),
1900				KCACHE_ADDR1(0),
1901				I_COUNT(6),
1902				USES_WATERFALL(0),
1903				CF_INST(SQ_CF_INST_ALU),
1904				WHOLE_QUAD_MODE(0),
1905				BARRIER(1));
1906
1907    /* 30 - dst */
1908    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1909					  TYPE(SQ_EXPORT_POS),
1910					  RW_GPR(1),
1911					  RW_REL(ABSOLUTE),
1912					  INDEX_GPR(0),
1913					  ELEM_SIZE(0));
1914    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1915					       SRC_SEL_Y(SQ_SEL_Y),
1916					       SRC_SEL_Z(SQ_SEL_0),
1917					       SRC_SEL_W(SQ_SEL_1),
1918					       R6xx_ELEM_LOOP(0),
1919					       BURST_COUNT(0),
1920					       END_OF_PROGRAM(0),
1921					       VALID_PIXEL_MODE(0),
1922					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1923					       WHOLE_QUAD_MODE(0),
1924					       BARRIER(1));
1925    /* 31 - src */
1926    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1927					  TYPE(SQ_EXPORT_PARAM),
1928					  RW_GPR(0),
1929					  RW_REL(ABSOLUTE),
1930					  INDEX_GPR(0),
1931					  ELEM_SIZE(0));
1932    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1933					       SRC_SEL_Y(SQ_SEL_Y),
1934					       SRC_SEL_Z(SQ_SEL_0),
1935					       SRC_SEL_W(SQ_SEL_1),
1936					       R6xx_ELEM_LOOP(0),
1937					       BURST_COUNT(0),
1938					       END_OF_PROGRAM(0),
1939					       VALID_PIXEL_MODE(0),
1940					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1941					       WHOLE_QUAD_MODE(0),
1942					       BARRIER(0));
1943    /* 32 */
1944    shader[i++] = CF_DWORD0(ADDR(0));
1945    shader[i++] = CF_DWORD1(POP_COUNT(0),
1946			    CF_CONST(0),
1947			    COND(SQ_CF_COND_ACTIVE),
1948			    I_COUNT(0),
1949			    CALL_COUNT(0),
1950			    END_OF_PROGRAM(0),
1951			    VALID_PIXEL_MODE(0),
1952			    CF_INST(SQ_CF_INST_RETURN),
1953			    WHOLE_QUAD_MODE(0),
1954			    BARRIER(1));
1955
1956
1957    /* 33 srcX MAD */
1958    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
1959                             SRC0_REL(ABSOLUTE),
1960                             SRC0_ELEM(ELEM_Y),
1961                             SRC0_NEG(0),
1962                             SRC1_SEL(0),
1963                             SRC1_REL(ABSOLUTE),
1964                             SRC1_ELEM(ELEM_Y),
1965                             SRC1_NEG(0),
1966                             INDEX_MODE(SQ_INDEX_LOOP),
1967                             PRED_SEL(SQ_PRED_SEL_OFF),
1968                             LAST(1));
1969    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
1970                                 SRC2_REL(ABSOLUTE),
1971                                 SRC2_ELEM(ELEM_Z),
1972                                 SRC2_NEG(0),
1973                                 ALU_INST(SQ_OP3_INST_MULADD),
1974                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1975                                 DST_GPR(0),
1976                                 DST_REL(ABSOLUTE),
1977                                 DST_ELEM(ELEM_Z),
1978                                 CLAMP(0));
1979    /* 34 srcY MAD */
1980    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
1981                             SRC0_REL(ABSOLUTE),
1982                             SRC0_ELEM(ELEM_Y),
1983                             SRC0_NEG(0),
1984                             SRC1_SEL(0),
1985                             SRC1_REL(ABSOLUTE),
1986                             SRC1_ELEM(ELEM_Y),
1987                             SRC1_NEG(0),
1988                             INDEX_MODE(SQ_INDEX_LOOP),
1989                             PRED_SEL(SQ_PRED_SEL_OFF),
1990                             LAST(1));
1991    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257),
1992                                 SRC2_REL(ABSOLUTE),
1993                                 SRC2_ELEM(ELEM_Z),
1994                                 SRC2_NEG(0),
1995                                 ALU_INST(SQ_OP3_INST_MULADD),
1996                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1997                                 DST_GPR(0),
1998                                 DST_REL(ABSOLUTE),
1999                                 DST_ELEM(ELEM_W),
2000                                 CLAMP(0));
2001
2002    /* 35 srcX MAD */
2003    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
2004                             SRC0_REL(ABSOLUTE),
2005                             SRC0_ELEM(ELEM_X),
2006                             SRC0_NEG(0),
2007                             SRC1_SEL(0),
2008                             SRC1_REL(ABSOLUTE),
2009                             SRC1_ELEM(ELEM_X),
2010                             SRC1_NEG(0),
2011                             INDEX_MODE(SQ_INDEX_LOOP),
2012                             PRED_SEL(SQ_PRED_SEL_OFF),
2013                             LAST(0));
2014    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
2015                                 SRC2_REL(ABSOLUTE),
2016                                 SRC2_ELEM(ELEM_Z),
2017                                 SRC2_NEG(0),
2018                                 ALU_INST(SQ_OP3_INST_MULADD),
2019                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2020                                 DST_GPR(0),
2021                                 DST_REL(ABSOLUTE),
2022                                 DST_ELEM(ELEM_X),
2023                                 CLAMP(0));
2024    /* 36 srcY MAD */
2025    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
2026                             SRC0_REL(ABSOLUTE),
2027                             SRC0_ELEM(ELEM_X),
2028                             SRC0_NEG(0),
2029                             SRC1_SEL(0),
2030                             SRC1_REL(ABSOLUTE),
2031                             SRC1_ELEM(ELEM_X),
2032                             SRC1_NEG(0),
2033                             INDEX_MODE(SQ_INDEX_LOOP),
2034                             PRED_SEL(SQ_PRED_SEL_OFF),
2035                             LAST(1));
2036    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
2037                                 SRC2_REL(ABSOLUTE),
2038                                 SRC2_ELEM(ELEM_W),
2039                                 SRC2_NEG(0),
2040                                 ALU_INST(SQ_OP3_INST_MULADD),
2041                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2042                                 DST_GPR(0),
2043                                 DST_REL(ABSOLUTE),
2044                                 DST_ELEM(ELEM_Y),
2045                                 CLAMP(0));
2046    /* 37 srcX / w */
2047    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
2048                             SRC0_REL(ABSOLUTE),
2049                             SRC0_ELEM(ELEM_X),
2050                             SRC0_NEG(0),
2051                             SRC1_SEL(256),
2052                             SRC1_REL(ABSOLUTE),
2053                             SRC1_ELEM(ELEM_W),
2054                             SRC1_NEG(0),
2055                             INDEX_MODE(SQ_INDEX_AR_X),
2056                             PRED_SEL(SQ_PRED_SEL_OFF),
2057                             LAST(1));
2058    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2059                                 SRC0_ABS(0),
2060                                 SRC1_ABS(0),
2061                                 UPDATE_EXECUTE_MASK(0),
2062                                 UPDATE_PRED(0),
2063                                 WRITE_MASK(1),
2064                                 FOG_MERGE(0),
2065                                 OMOD(SQ_ALU_OMOD_OFF),
2066                                 ALU_INST(SQ_OP2_INST_MUL),
2067                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2068                                 DST_GPR(0),
2069                                 DST_REL(ABSOLUTE),
2070                                 DST_ELEM(ELEM_X),
2071                                 CLAMP(0));
2072
2073    /* 38 srcY / h */
2074    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
2075                             SRC0_REL(ABSOLUTE),
2076                             SRC0_ELEM(ELEM_Y),
2077                             SRC0_NEG(0),
2078                             SRC1_SEL(257),
2079                             SRC1_REL(ABSOLUTE),
2080                             SRC1_ELEM(ELEM_W),
2081                             SRC1_NEG(0),
2082                             INDEX_MODE(SQ_INDEX_AR_X),
2083                             PRED_SEL(SQ_PRED_SEL_OFF),
2084                             LAST(1));
2085    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2086                                 SRC0_ABS(0),
2087                                 SRC1_ABS(0),
2088                                 UPDATE_EXECUTE_MASK(0),
2089                                 UPDATE_PRED(0),
2090                                 WRITE_MASK(1),
2091                                 FOG_MERGE(0),
2092                                 OMOD(SQ_ALU_OMOD_OFF),
2093                                 ALU_INST(SQ_OP2_INST_MUL),
2094                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2095                                 DST_GPR(0),
2096                                 DST_REL(ABSOLUTE),
2097                                 DST_ELEM(ELEM_Y),
2098                                 CLAMP(0));
2099
2100    /* 39 */
2101    shader[i++] = 0x00000000;
2102    shader[i++] = 0x00000000;
2103
2104    /* 40/41 - dst */
2105    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2106			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2107			     FETCH_WHOLE_QUAD(0),
2108			     BUFFER_ID(0),
2109			     SRC_GPR(0),
2110			     SRC_REL(ABSOLUTE),
2111			     SRC_SEL_X(SQ_SEL_X),
2112			     MEGA_FETCH_COUNT(16));
2113    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2114				 DST_REL(0),
2115				 DST_SEL_X(SQ_SEL_X),
2116				 DST_SEL_Y(SQ_SEL_Y),
2117				 DST_SEL_Z(SQ_SEL_0),
2118				 DST_SEL_W(SQ_SEL_1),
2119				 USE_CONST_FIELDS(0),
2120				 DATA_FORMAT(FMT_32_32_FLOAT),
2121				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2122				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2123				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2124    shader[i++] = VTX_DWORD2(OFFSET(0),
2125			     ENDIAN_SWAP(ENDIAN_NONE),
2126			     CONST_BUF_NO_STRIDE(0),
2127			     MEGA_FETCH(1));
2128    shader[i++] = VTX_DWORD_PAD;
2129    /* 42/43 - src */
2130    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2131			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2132			     FETCH_WHOLE_QUAD(0),
2133			     BUFFER_ID(0),
2134			     SRC_GPR(0),
2135			     SRC_REL(ABSOLUTE),
2136			     SRC_SEL_X(SQ_SEL_X),
2137			     MEGA_FETCH_COUNT(8));
2138    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2139				 DST_REL(0),
2140				 DST_SEL_X(SQ_SEL_X),
2141				 DST_SEL_Y(SQ_SEL_Y),
2142				 DST_SEL_Z(SQ_SEL_1),
2143				 DST_SEL_W(SQ_SEL_0),
2144				 USE_CONST_FIELDS(0),
2145				 DATA_FORMAT(FMT_32_32_FLOAT),
2146				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2147				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2148				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2149    shader[i++] = VTX_DWORD2(OFFSET(8),
2150			     ENDIAN_SWAP(ENDIAN_NONE),
2151			     CONST_BUF_NO_STRIDE(0),
2152			     MEGA_FETCH(0));
2153    shader[i++] = VTX_DWORD_PAD;
2154
2155    return i;
2156}
2157
2158/* comp ps --------------------------------------- */
2159int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2160{
2161    int i = 0;
2162
2163    /* 0 */
2164    shader[i++] = CF_DWORD0(ADDR(2));
2165    shader[i++] = CF_DWORD1(POP_COUNT(0),
2166			    CF_CONST(0),
2167			    COND(SQ_CF_COND_ACTIVE),
2168			    I_COUNT(1),
2169			    CALL_COUNT(0),
2170			    END_OF_PROGRAM(0),
2171			    VALID_PIXEL_MODE(0),
2172			    CF_INST(SQ_CF_INST_TEX),
2173			    WHOLE_QUAD_MODE(0),
2174			    BARRIER(1));
2175    /* 1 */
2176    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2177					  TYPE(SQ_EXPORT_PIXEL),
2178					  RW_GPR(0),
2179					  RW_REL(ABSOLUTE),
2180					  INDEX_GPR(0),
2181					  ELEM_SIZE(1));
2182
2183    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2184					       SRC_SEL_Y(SQ_SEL_Y),
2185					       SRC_SEL_Z(SQ_SEL_Z),
2186					       SRC_SEL_W(SQ_SEL_W),
2187					       R6xx_ELEM_LOOP(0),
2188					       BURST_COUNT(1),
2189					       END_OF_PROGRAM(1),
2190					       VALID_PIXEL_MODE(0),
2191					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2192					       WHOLE_QUAD_MODE(0),
2193					       BARRIER(1));
2194
2195
2196    /* 2/3 - src */
2197    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2198			     BC_FRAC_MODE(0),
2199			     FETCH_WHOLE_QUAD(0),
2200			     RESOURCE_ID(0),
2201			     SRC_GPR(0),
2202			     SRC_REL(ABSOLUTE),
2203			     R7xx_ALT_CONST(0));
2204    shader[i++] = TEX_DWORD1(DST_GPR(0),
2205			     DST_REL(ABSOLUTE),
2206			     DST_SEL_X(SQ_SEL_X),
2207			     DST_SEL_Y(SQ_SEL_Y),
2208			     DST_SEL_Z(SQ_SEL_Z),
2209			     DST_SEL_W(SQ_SEL_W),
2210			     LOD_BIAS(0),
2211			     COORD_TYPE_X(TEX_NORMALIZED),
2212			     COORD_TYPE_Y(TEX_NORMALIZED),
2213			     COORD_TYPE_Z(TEX_NORMALIZED),
2214			     COORD_TYPE_W(TEX_NORMALIZED));
2215    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2216			     OFFSET_Y(0),
2217			     OFFSET_Z(0),
2218			     SAMPLER_ID(0),
2219			     SRC_SEL_X(SQ_SEL_X),
2220			     SRC_SEL_Y(SQ_SEL_Y),
2221			     SRC_SEL_Z(SQ_SEL_0),
2222			     SRC_SEL_W(SQ_SEL_1));
2223    shader[i++] = TEX_DWORD_PAD;
2224
2225    return i;
2226}
2227