r600_shader.c revision b7e1c893
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "radeon.h"
34#include "r600_shader.h"
35#include "r600_reg.h"
36
37/* solid vs --------------------------------------- */
38int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39{
40    int i = 0;
41
42    /* 0 */
43    shader[i++] = CF_DWORD0(ADDR(4));
44    shader[i++] = CF_DWORD1(POP_COUNT(0),
45			    CF_CONST(0),
46			    COND(SQ_CF_COND_ACTIVE),
47			    I_COUNT(1),
48			    CALL_COUNT(0),
49			    END_OF_PROGRAM(0),
50			    VALID_PIXEL_MODE(0),
51			    CF_INST(SQ_CF_INST_VTX),
52			    WHOLE_QUAD_MODE(0),
53			    BARRIER(1));
54    /* 1 */
55    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56					  TYPE(SQ_EXPORT_POS),
57					  RW_GPR(1),
58					  RW_REL(ABSOLUTE),
59					  INDEX_GPR(0),
60					  ELEM_SIZE(0));
61    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62					       SRC_SEL_Y(SQ_SEL_Y),
63					       SRC_SEL_Z(SQ_SEL_Z),
64					       SRC_SEL_W(SQ_SEL_W),
65					       R6xx_ELEM_LOOP(0),
66					       BURST_COUNT(1),
67					       END_OF_PROGRAM(0),
68					       VALID_PIXEL_MODE(0),
69					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70					       WHOLE_QUAD_MODE(0),
71					       BARRIER(1));
72    /* 2 - always export a param whether it's used or not */
73    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74					  TYPE(SQ_EXPORT_PARAM),
75					  RW_GPR(0),
76					  RW_REL(ABSOLUTE),
77					  INDEX_GPR(0),
78					  ELEM_SIZE(0));
79    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80					       SRC_SEL_Y(SQ_SEL_Y),
81					       SRC_SEL_Z(SQ_SEL_Z),
82					       SRC_SEL_W(SQ_SEL_W),
83					       R6xx_ELEM_LOOP(0),
84					       BURST_COUNT(0),
85					       END_OF_PROGRAM(1),
86					       VALID_PIXEL_MODE(0),
87					       CF_INST(SQ_CF_INST_EXPORT_DONE),
88					       WHOLE_QUAD_MODE(0),
89					       BARRIER(0));
90    /* 3 - padding */
91    shader[i++] = 0x00000000;
92    shader[i++] = 0x00000000;
93    /* 4/5 */
94    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96			     FETCH_WHOLE_QUAD(0),
97			     BUFFER_ID(0),
98			     SRC_GPR(0),
99			     SRC_REL(ABSOLUTE),
100			     SRC_SEL_X(SQ_SEL_X),
101			     MEGA_FETCH_COUNT(8));
102    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103				 DST_REL(0),
104				 DST_SEL_X(SQ_SEL_X),
105				 DST_SEL_Y(SQ_SEL_Y),
106				 DST_SEL_Z(SQ_SEL_0),
107				 DST_SEL_W(SQ_SEL_1),
108				 USE_CONST_FIELDS(0),
109				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
110				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
111				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
112				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113    shader[i++] = VTX_DWORD2(OFFSET(0),
114			     ENDIAN_SWAP(ENDIAN_NONE),
115			     CONST_BUF_NO_STRIDE(0),
116			     MEGA_FETCH(1));
117    shader[i++] = VTX_DWORD_PAD;
118
119    return i;
120}
121
122/* solid ps --------------------------------------- */
123int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
124{
125    int i = 0;
126
127    /* 0 */
128    shader[i++] = CF_ALU_DWORD0(ADDR(2),
129				KCACHE_BANK0(0),
130				KCACHE_BANK1(0),
131				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
132    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
133				KCACHE_ADDR0(0),
134				KCACHE_ADDR1(0),
135				I_COUNT(4),
136				USES_WATERFALL(0),
137				CF_INST(SQ_CF_INST_ALU),
138				WHOLE_QUAD_MODE(0),
139				BARRIER(1));
140    /* 1 */
141    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
142					  TYPE(SQ_EXPORT_PIXEL),
143					  RW_GPR(0),
144					  RW_REL(ABSOLUTE),
145					  INDEX_GPR(0),
146					  ELEM_SIZE(1));
147    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
148					       SRC_SEL_Y(SQ_SEL_Y),
149					       SRC_SEL_Z(SQ_SEL_Z),
150					       SRC_SEL_W(SQ_SEL_W),
151					       R6xx_ELEM_LOOP(0),
152					       BURST_COUNT(1),
153					       END_OF_PROGRAM(1),
154					       VALID_PIXEL_MODE(0),
155					       CF_INST(SQ_CF_INST_EXPORT_DONE),
156					       WHOLE_QUAD_MODE(0),
157					       BARRIER(1));
158
159    /* 2 */
160    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
161			     SRC0_REL(ABSOLUTE),
162			     SRC0_ELEM(ELEM_X),
163			     SRC0_NEG(0),
164			     SRC1_SEL(0),
165			     SRC1_REL(ABSOLUTE),
166			     SRC1_ELEM(ELEM_X),
167			     SRC1_NEG(0),
168			     INDEX_MODE(SQ_INDEX_AR_X),
169			     PRED_SEL(SQ_PRED_SEL_OFF),
170			     LAST(0));
171    shader[i++] = ALU_DWORD1_OP2(ChipSet,
172				 SRC0_ABS(0),
173				 SRC1_ABS(0),
174				 UPDATE_EXECUTE_MASK(0),
175				 UPDATE_PRED(0),
176				 WRITE_MASK(1),
177				 FOG_MERGE(0),
178				 OMOD(SQ_ALU_OMOD_OFF),
179				 ALU_INST(SQ_OP2_INST_MOV),
180				 BANK_SWIZZLE(SQ_ALU_VEC_012),
181				 DST_GPR(0),
182				 DST_REL(ABSOLUTE),
183				 DST_ELEM(ELEM_X),
184				 CLAMP(1));
185    /* 3 */
186    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
187			     SRC0_REL(ABSOLUTE),
188			     SRC0_ELEM(ELEM_Y),
189			     SRC0_NEG(0),
190			     SRC1_SEL(0),
191			     SRC1_REL(ABSOLUTE),
192			     SRC1_ELEM(ELEM_Y),
193			     SRC1_NEG(0),
194			     INDEX_MODE(SQ_INDEX_AR_X),
195			     PRED_SEL(SQ_PRED_SEL_OFF),
196			     LAST(0));
197    shader[i++] = ALU_DWORD1_OP2(ChipSet,
198				 SRC0_ABS(0),
199				 SRC1_ABS(0),
200				 UPDATE_EXECUTE_MASK(0),
201				 UPDATE_PRED(0),
202				 WRITE_MASK(1),
203				 FOG_MERGE(0),
204				 OMOD(SQ_ALU_OMOD_OFF),
205				 ALU_INST(SQ_OP2_INST_MOV),
206				 BANK_SWIZZLE(SQ_ALU_VEC_012),
207				 DST_GPR(0),
208				 DST_REL(ABSOLUTE),
209				 DST_ELEM(ELEM_Y),
210				 CLAMP(1));
211    /* 4 */
212    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
213			     SRC0_REL(ABSOLUTE),
214			     SRC0_ELEM(ELEM_Z),
215			     SRC0_NEG(0),
216			     SRC1_SEL(0),
217			     SRC1_REL(ABSOLUTE),
218			     SRC1_ELEM(ELEM_Z),
219			     SRC1_NEG(0),
220			     INDEX_MODE(SQ_INDEX_AR_X),
221			     PRED_SEL(SQ_PRED_SEL_OFF),
222			     LAST(0));
223    shader[i++] = ALU_DWORD1_OP2(ChipSet,
224				 SRC0_ABS(0),
225				 SRC1_ABS(0),
226				 UPDATE_EXECUTE_MASK(0),
227				 UPDATE_PRED(0),
228				 WRITE_MASK(1),
229				 FOG_MERGE(0),
230				 OMOD(SQ_ALU_OMOD_OFF),
231				 ALU_INST(SQ_OP2_INST_MOV),
232				 BANK_SWIZZLE(SQ_ALU_VEC_012),
233				 DST_GPR(0),
234				 DST_REL(ABSOLUTE),
235				 DST_ELEM(ELEM_Z),
236				 CLAMP(1));
237    /* 5 */
238    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
239			     SRC0_REL(ABSOLUTE),
240			     SRC0_ELEM(ELEM_W),
241			     SRC0_NEG(0),
242			     SRC1_SEL(0),
243			     SRC1_REL(ABSOLUTE),
244			     SRC1_ELEM(ELEM_W),
245			     SRC1_NEG(0),
246			     INDEX_MODE(SQ_INDEX_AR_X),
247			     PRED_SEL(SQ_PRED_SEL_OFF),
248			     LAST(1));
249    shader[i++] = ALU_DWORD1_OP2(ChipSet,
250				 SRC0_ABS(0),
251				 SRC1_ABS(0),
252				 UPDATE_EXECUTE_MASK(0),
253				 UPDATE_PRED(0),
254				 WRITE_MASK(1),
255				 FOG_MERGE(0),
256				 OMOD(SQ_ALU_OMOD_OFF),
257				 ALU_INST(SQ_OP2_INST_MOV),
258				 BANK_SWIZZLE(SQ_ALU_VEC_012),
259				 DST_GPR(0),
260				 DST_REL(ABSOLUTE),
261				 DST_ELEM(ELEM_W),
262				 CLAMP(1));
263
264    return i;
265}
266
267/* copy vs --------------------------------------- */
268int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
269{
270    int i = 0;
271
272    /* 0 */
273    shader[i++] = CF_DWORD0(ADDR(4));
274    shader[i++] = CF_DWORD1(POP_COUNT(0),
275			    CF_CONST(0),
276			    COND(SQ_CF_COND_ACTIVE),
277			    I_COUNT(2),
278			    CALL_COUNT(0),
279			    END_OF_PROGRAM(0),
280			    VALID_PIXEL_MODE(0),
281			    CF_INST(SQ_CF_INST_VTX),
282			    WHOLE_QUAD_MODE(0),
283			    BARRIER(1));
284    /* 1 */
285    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
286					  TYPE(SQ_EXPORT_POS),
287					  RW_GPR(1),
288					  RW_REL(ABSOLUTE),
289					  INDEX_GPR(0),
290					  ELEM_SIZE(0));
291    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
292					       SRC_SEL_Y(SQ_SEL_Y),
293					       SRC_SEL_Z(SQ_SEL_Z),
294					       SRC_SEL_W(SQ_SEL_W),
295					       R6xx_ELEM_LOOP(0),
296					       BURST_COUNT(0),
297					       END_OF_PROGRAM(0),
298					       VALID_PIXEL_MODE(0),
299					       CF_INST(SQ_CF_INST_EXPORT_DONE),
300					       WHOLE_QUAD_MODE(0),
301					       BARRIER(1));
302    /* 2 */
303    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
304					  TYPE(SQ_EXPORT_PARAM),
305					  RW_GPR(0),
306					  RW_REL(ABSOLUTE),
307					  INDEX_GPR(0),
308					  ELEM_SIZE(0));
309    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
310					       SRC_SEL_Y(SQ_SEL_Y),
311					       SRC_SEL_Z(SQ_SEL_Z),
312					       SRC_SEL_W(SQ_SEL_W),
313					       R6xx_ELEM_LOOP(0),
314					       BURST_COUNT(0),
315					       END_OF_PROGRAM(1),
316					       VALID_PIXEL_MODE(0),
317					       CF_INST(SQ_CF_INST_EXPORT_DONE),
318					       WHOLE_QUAD_MODE(0),
319					       BARRIER(0));
320    /* 3 */
321    shader[i++] = 0x00000000;
322    shader[i++] = 0x00000000;
323    /* 4/5 */
324    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
325			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
326			     FETCH_WHOLE_QUAD(0),
327			     BUFFER_ID(0),
328			     SRC_GPR(0),
329			     SRC_REL(ABSOLUTE),
330			     SRC_SEL_X(SQ_SEL_X),
331			     MEGA_FETCH_COUNT(16));
332    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
333				 DST_REL(0),
334				 DST_SEL_X(SQ_SEL_X),
335				 DST_SEL_Y(SQ_SEL_Y),
336				 DST_SEL_Z(SQ_SEL_0),
337				 DST_SEL_W(SQ_SEL_1),
338				 USE_CONST_FIELDS(0),
339				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
340				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
341				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
342				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
343    shader[i++] = VTX_DWORD2(OFFSET(0),
344			     ENDIAN_SWAP(ENDIAN_NONE),
345			     CONST_BUF_NO_STRIDE(0),
346			     MEGA_FETCH(1));
347    shader[i++] = VTX_DWORD_PAD;
348    /* 6/7 */
349    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
350			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
351			     FETCH_WHOLE_QUAD(0),
352			     BUFFER_ID(0),
353			     SRC_GPR(0),
354			     SRC_REL(ABSOLUTE),
355			     SRC_SEL_X(SQ_SEL_X),
356			     MEGA_FETCH_COUNT(8));
357    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
358				 DST_REL(0),
359				 DST_SEL_X(SQ_SEL_X),
360				 DST_SEL_Y(SQ_SEL_Y),
361				 DST_SEL_Z(SQ_SEL_0),
362				 DST_SEL_W(SQ_SEL_1),
363				 USE_CONST_FIELDS(0),
364				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
365				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
366				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
367				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
368    shader[i++] = VTX_DWORD2(OFFSET(8),
369			     ENDIAN_SWAP(ENDIAN_NONE),
370			     CONST_BUF_NO_STRIDE(0),
371			     MEGA_FETCH(0));
372    shader[i++] = VTX_DWORD_PAD;
373
374    return i;
375}
376
377/* copy ps --------------------------------------- */
378int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
379{
380    int i=0;
381
382    /* CF INST 0 */
383    shader[i++] = CF_DWORD0(ADDR(2));
384    shader[i++] = CF_DWORD1(POP_COUNT(0),
385			    CF_CONST(0),
386			    COND(SQ_CF_COND_ACTIVE),
387			    I_COUNT(1),
388			    CALL_COUNT(0),
389			    END_OF_PROGRAM(0),
390			    VALID_PIXEL_MODE(0),
391			    CF_INST(SQ_CF_INST_TEX),
392			    WHOLE_QUAD_MODE(0),
393			    BARRIER(1));
394    /* CF INST 1 */
395    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
396					  TYPE(SQ_EXPORT_PIXEL),
397					  RW_GPR(0),
398					  RW_REL(ABSOLUTE),
399					  INDEX_GPR(0),
400					  ELEM_SIZE(1));
401    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
402					       SRC_SEL_Y(SQ_SEL_Y),
403					       SRC_SEL_Z(SQ_SEL_Z),
404					       SRC_SEL_W(SQ_SEL_W),
405					       R6xx_ELEM_LOOP(0),
406					       BURST_COUNT(1),
407					       END_OF_PROGRAM(1),
408					       VALID_PIXEL_MODE(0),
409					       CF_INST(SQ_CF_INST_EXPORT_DONE),
410					       WHOLE_QUAD_MODE(0),
411					       BARRIER(1));
412    /* TEX INST 0 */
413    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
414			     BC_FRAC_MODE(0),
415			     FETCH_WHOLE_QUAD(0),
416			     RESOURCE_ID(0),
417			     SRC_GPR(0),
418			     SRC_REL(ABSOLUTE),
419			     R7xx_ALT_CONST(0));
420    shader[i++] = TEX_DWORD1(DST_GPR(0),
421			     DST_REL(ABSOLUTE),
422			     DST_SEL_X(SQ_SEL_X), /* R */
423			     DST_SEL_Y(SQ_SEL_Y), /* G */
424			     DST_SEL_Z(SQ_SEL_Z), /* B */
425			     DST_SEL_W(SQ_SEL_W), /* A */
426			     LOD_BIAS(0),
427			     COORD_TYPE_X(TEX_UNNORMALIZED),
428			     COORD_TYPE_Y(TEX_UNNORMALIZED),
429			     COORD_TYPE_Z(TEX_UNNORMALIZED),
430			     COORD_TYPE_W(TEX_UNNORMALIZED));
431    shader[i++] = TEX_DWORD2(OFFSET_X(0),
432			     OFFSET_Y(0),
433			     OFFSET_Z(0),
434			     SAMPLER_ID(0),
435			     SRC_SEL_X(SQ_SEL_X),
436			     SRC_SEL_Y(SQ_SEL_Y),
437			     SRC_SEL_Z(SQ_SEL_0),
438			     SRC_SEL_W(SQ_SEL_1));
439    shader[i++] = TEX_DWORD_PAD;
440
441    return i;
442}
443
444/*
445 * ; xv vertex shader
446 * 00 VTX: ADDR(4) CNT(2)
447 *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT)
448 *          FORMAT_COMP(SIGNED)
449 *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
450 *          FORMAT_COMP(SIGNED)
451 * 01 EXP_DONE: POS0, R1
452 * 02 EXP_DONE: PARAM0, R0  NO_BARRIER
453 * END_OF_PROGRAM
454 */
455int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
456{
457    int i = 0;
458
459    /* 0 */
460    shader[i++] = CF_DWORD0(ADDR(4));
461    shader[i++] = CF_DWORD1(POP_COUNT(0),
462                            CF_CONST(0),
463                            COND(SQ_CF_COND_ACTIVE),
464                            I_COUNT(2),
465                            CALL_COUNT(0),
466                            END_OF_PROGRAM(0),
467                            VALID_PIXEL_MODE(0),
468                            CF_INST(SQ_CF_INST_VTX),
469                            WHOLE_QUAD_MODE(0),
470                            BARRIER(1));
471    /* 1 */
472    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
473                                          TYPE(SQ_EXPORT_POS),
474                                          RW_GPR(1),
475                                          RW_REL(ABSOLUTE),
476                                          INDEX_GPR(0),
477                                          ELEM_SIZE(3));
478    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
479                                               SRC_SEL_Y(SQ_SEL_Y),
480                                               SRC_SEL_Z(SQ_SEL_Z),
481                                               SRC_SEL_W(SQ_SEL_W),
482                                               R6xx_ELEM_LOOP(0),
483                                               BURST_COUNT(1),
484                                               END_OF_PROGRAM(0),
485                                               VALID_PIXEL_MODE(0),
486                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
487                                               WHOLE_QUAD_MODE(0),
488                                               BARRIER(1));
489    /* 2 */
490    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
491                                          TYPE(SQ_EXPORT_PARAM),
492                                          RW_GPR(0),
493                                          RW_REL(ABSOLUTE),
494                                          INDEX_GPR(0),
495                                          ELEM_SIZE(3));
496    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
497                                               SRC_SEL_Y(SQ_SEL_Y),
498                                               SRC_SEL_Z(SQ_SEL_Z),
499                                               SRC_SEL_W(SQ_SEL_W),
500                                               R6xx_ELEM_LOOP(0),
501                                               BURST_COUNT(1),
502                                               END_OF_PROGRAM(1),
503                                               VALID_PIXEL_MODE(0),
504                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
505                                               WHOLE_QUAD_MODE(0),
506                                               BARRIER(0));
507    shader[i++] = 0x00000000;
508    shader[i++] = 0x00000000;
509    /* 4/5 */
510    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
511                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
512                             FETCH_WHOLE_QUAD(0),
513                             BUFFER_ID(0),
514                             SRC_GPR(0),
515                             SRC_REL(ABSOLUTE),
516                             SRC_SEL_X(SQ_SEL_X),
517                             MEGA_FETCH_COUNT(16));
518    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
519                                 DST_REL(ABSOLUTE),
520                                 DST_SEL_X(SQ_SEL_X),
521                                 DST_SEL_Y(SQ_SEL_Y),
522                                 DST_SEL_Z(SQ_SEL_0),
523                                 DST_SEL_W(SQ_SEL_1),
524                                 USE_CONST_FIELDS(0),
525                                 DATA_FORMAT(FMT_32_32_FLOAT),
526                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
527                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
528                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
529    shader[i++] = VTX_DWORD2(OFFSET(0),
530                             ENDIAN_SWAP(ENDIAN_NONE),
531                             CONST_BUF_NO_STRIDE(0),
532                             MEGA_FETCH(1));
533    shader[i++] = VTX_DWORD_PAD;
534    /* 6/7 */
535    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
536                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
537                             FETCH_WHOLE_QUAD(0),
538                             BUFFER_ID(0),
539                             SRC_GPR(0),
540                             SRC_REL(ABSOLUTE),
541                             SRC_SEL_X(SQ_SEL_X),
542                             MEGA_FETCH_COUNT(8));
543    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
544                                 DST_REL(ABSOLUTE),
545                                 DST_SEL_X(SQ_SEL_X),
546                                 DST_SEL_Y(SQ_SEL_Y),
547                                 DST_SEL_Z(SQ_SEL_0),
548                                 DST_SEL_W(SQ_SEL_1),
549                                 USE_CONST_FIELDS(0),
550                                 DATA_FORMAT(FMT_32_32_FLOAT),
551                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
552                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
553                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
554    shader[i++] = VTX_DWORD2(OFFSET(8),
555                             ENDIAN_SWAP(ENDIAN_NONE),
556                             CONST_BUF_NO_STRIDE(0),
557                             MEGA_FETCH(0));
558    shader[i++] = VTX_DWORD_PAD;
559
560    return i;
561}
562
563/*
564 * ; xv ps planar
565 * 00 TEX: ADDR(20) CNT(3) NO_BARRIER
566 *       0  SAMPLE R1.x__1, R0.xy01, t0, s0
567 *       1  SAMPLE R1.__x_, R0.xy01, t1, s1
568 *       2  SAMPLE R1._x__, R0.xy01, t2, s2
569 * 01 TEX: ADDR(28) CNT(2) NO_BARRIER
570 *       0  SAMPLE R1.x__1, R0.xy01, t0, s0
571 *       1  SAMPLE R1._xy_, R0.xy01, t1, s1
572 * 02 ALU: ADDR(4) CNT(16)
573 *       3  x: MULADD      R1.x,  R1.x,  C3.x,  C3.y      CLAMP
574 *          y: MULADD      R1.y,  R1.y,  C3.z,  C3.w
575 *          z: MULADD      R1.z,  R1.z,  C3.z,  C3.w
576 *          w: MOV         R1.w,  0.0f
577 *       4  x: DOT4        R2.x,  R1.x,  C0.x      CLAMP VEC_102
578 *          y: DOT4        ____,  R1.y,  C0.y      CLAMP VEC_102
579 *          z: DOT4        ____,  R1.z,  C0.z      CLAMP VEC_102
580 *          w: DOT4        ____,  R1.w,  C0.w      CLAMP VEC_021
581 *       5  x: DOT4        ____,  R1.x,  C1.x      CLAMP VEC_102
582 *          y: DOT4        R2.y,  R1.y,  C1.y      CLAMP VEC_102
583 *          z: DOT4        ____,  R1.z,  C1.z      CLAMP VEC_102
584 *          w: DOT4        ____,  R1.w,  C1.w      CLAMP VEC_021
585 *       6  x: DOT4        ____,  R1.x,  C2.x      CLAMP VEC_102
586 *          y: DOT4        ____,  R1.y,  C2.y      CLAMP VEC_102
587 *          z: DOT4        R2.z,  R1.z,  C2.z      CLAMP VEC_102
588 *          w: DOT4        ____,  R1.w,  C2.w      CLAMP VEC_021
589 * 03 EXP_DONE: PIX0, R2
590 * END_OF_PROGRAM
591 */
592int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
593{
594    int i = 0;
595
596    /* 0 */
597    shader[i++] = CF_DWORD0(ADDR(20));
598    shader[i++] = CF_DWORD1(POP_COUNT(0),
599                            CF_CONST(0),
600                            COND(SQ_CF_COND_BOOL),
601                            I_COUNT(0),
602                            CALL_COUNT(0),
603                            END_OF_PROGRAM(0),
604                            VALID_PIXEL_MODE(0),
605                            CF_INST(SQ_CF_INST_CALL),
606                            WHOLE_QUAD_MODE(0),
607                            BARRIER(0));
608    /* 1 */
609    shader[i++] = CF_DWORD0(ADDR(28));
610    shader[i++] = CF_DWORD1(POP_COUNT(0),
611                            CF_CONST(0),
612                            COND(SQ_CF_COND_NOT_BOOL),
613                            I_COUNT(0),
614                            CALL_COUNT(0),
615                            END_OF_PROGRAM(0),
616                            VALID_PIXEL_MODE(0),
617                            CF_INST(SQ_CF_INST_CALL),
618                            WHOLE_QUAD_MODE(0),
619                            BARRIER(0));
620    /* 2 */
621    shader[i++] = CF_ALU_DWORD0(ADDR(4),
622                                KCACHE_BANK0(0),
623                                KCACHE_BANK1(0),
624                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
625    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
626                                KCACHE_ADDR0(0),
627                                KCACHE_ADDR1(0),
628                                I_COUNT(16),
629                                USES_WATERFALL(0),
630                                CF_INST(SQ_CF_INST_ALU),
631                                WHOLE_QUAD_MODE(0),
632                                BARRIER(1));
633    /* 3 */
634    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
635                                          TYPE(SQ_EXPORT_PIXEL),
636                                          RW_GPR(2),
637                                          RW_REL(ABSOLUTE),
638                                          INDEX_GPR(0),
639                                          ELEM_SIZE(3));
640    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
641                                               SRC_SEL_Y(SQ_SEL_Y),
642                                               SRC_SEL_Z(SQ_SEL_Z),
643                                               SRC_SEL_W(SQ_SEL_W),
644                                               R6xx_ELEM_LOOP(0),
645                                               BURST_COUNT(1),
646                                               END_OF_PROGRAM(1),
647                                               VALID_PIXEL_MODE(0),
648                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
649                                               WHOLE_QUAD_MODE(0),
650                                               BARRIER(1));
651    /* 4 */
652    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
653                             SRC0_REL(ABSOLUTE),
654                             SRC0_ELEM(ELEM_X),
655                             SRC0_NEG(0),
656                             SRC1_SEL(259),
657                             SRC1_REL(ABSOLUTE),
658                             SRC1_ELEM(ELEM_X),
659                             SRC1_NEG(0),
660                             INDEX_MODE(SQ_INDEX_LOOP),
661                             PRED_SEL(SQ_PRED_SEL_OFF),
662                             LAST(0));
663    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
664                                 SRC2_REL(ABSOLUTE),
665                                 SRC2_ELEM(ELEM_Y),
666                                 SRC2_NEG(0),
667                                 ALU_INST(SQ_OP3_INST_MULADD),
668                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
669                                 DST_GPR(1),
670                                 DST_REL(ABSOLUTE),
671                                 DST_ELEM(ELEM_X),
672                                 CLAMP(1));
673    /* 5 */
674    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
675                             SRC0_REL(ABSOLUTE),
676                             SRC0_ELEM(ELEM_Y),
677                             SRC0_NEG(0),
678                             SRC1_SEL(259),
679                             SRC1_REL(ABSOLUTE),
680                             SRC1_ELEM(ELEM_Z),
681                             SRC1_NEG(0),
682                             INDEX_MODE(SQ_INDEX_LOOP),
683                             PRED_SEL(SQ_PRED_SEL_OFF),
684                             LAST(0));
685    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
686                                 SRC2_REL(ABSOLUTE),
687                                 SRC2_ELEM(ELEM_W),
688                                 SRC2_NEG(0),
689                                 ALU_INST(SQ_OP3_INST_MULADD),
690                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
691                                 DST_GPR(1),
692                                 DST_REL(ABSOLUTE),
693                                 DST_ELEM(ELEM_Y),
694                                 CLAMP(0));
695    /* 6 */
696    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
697                             SRC0_REL(ABSOLUTE),
698                             SRC0_ELEM(ELEM_Z),
699                             SRC0_NEG(0),
700                             SRC1_SEL(259),
701                             SRC1_REL(ABSOLUTE),
702                             SRC1_ELEM(ELEM_Z),
703                             SRC1_NEG(0),
704                             INDEX_MODE(SQ_INDEX_LOOP),
705                             PRED_SEL(SQ_PRED_SEL_OFF),
706                             LAST(0));
707    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
708                                 SRC2_REL(ABSOLUTE),
709                                 SRC2_ELEM(ELEM_W),
710                                 SRC2_NEG(0),
711                                 ALU_INST(SQ_OP3_INST_MULADD),
712                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
713                                 DST_GPR(1),
714                                 DST_REL(ABSOLUTE),
715                                 DST_ELEM(ELEM_Z),
716                                 CLAMP(0));
717    /* 7 */
718    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
719                             SRC0_REL(ABSOLUTE),
720                             SRC0_ELEM(ELEM_X),
721                             SRC0_NEG(0),
722                             SRC1_SEL(SQ_ALU_SRC_0),
723                             SRC1_REL(ABSOLUTE),
724                             SRC1_ELEM(ELEM_X),
725                             SRC1_NEG(0),
726                             INDEX_MODE(SQ_INDEX_LOOP),
727                             PRED_SEL(SQ_PRED_SEL_OFF),
728                             LAST(1));
729    shader[i++] = ALU_DWORD1_OP2(ChipSet,
730                                 SRC0_ABS(0),
731                                 SRC1_ABS(0),
732                                 UPDATE_EXECUTE_MASK(0),
733                                 UPDATE_PRED(0),
734                                 WRITE_MASK(1),
735                                 FOG_MERGE(0),
736                                 OMOD(SQ_ALU_OMOD_OFF),
737                                 ALU_INST(SQ_OP2_INST_MOV),
738                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
739                                 DST_GPR(1),
740                                 DST_REL(ABSOLUTE),
741                                 DST_ELEM(ELEM_W),
742                                 CLAMP(0));
743    /* 8 */
744    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
745                             SRC0_REL(ABSOLUTE),
746                             SRC0_ELEM(ELEM_X),
747                             SRC0_NEG(0),
748                             SRC1_SEL(256),
749                             SRC1_REL(ABSOLUTE),
750                             SRC1_ELEM(ELEM_X),
751                             SRC1_NEG(0),
752                             INDEX_MODE(SQ_INDEX_LOOP),
753                             PRED_SEL(SQ_PRED_SEL_OFF),
754                             LAST(0));
755    shader[i++] = ALU_DWORD1_OP2(ChipSet,
756                                 SRC0_ABS(0),
757                                 SRC1_ABS(0),
758                                 UPDATE_EXECUTE_MASK(0),
759                                 UPDATE_PRED(0),
760                                 WRITE_MASK(1),
761                                 FOG_MERGE(0),
762                                 OMOD(SQ_ALU_OMOD_OFF),
763                                 ALU_INST(SQ_OP2_INST_DOT4),
764                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
765                                 DST_GPR(2),
766                                 DST_REL(ABSOLUTE),
767                                 DST_ELEM(ELEM_X),
768                                 CLAMP(1));
769    /* 9 */
770    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
771                             SRC0_REL(ABSOLUTE),
772                             SRC0_ELEM(ELEM_Y),
773                             SRC0_NEG(0),
774                             SRC1_SEL(256),
775                             SRC1_REL(ABSOLUTE),
776                             SRC1_ELEM(ELEM_Y),
777                             SRC1_NEG(0),
778                             INDEX_MODE(SQ_INDEX_LOOP),
779                             PRED_SEL(SQ_PRED_SEL_OFF),
780                             LAST(0));
781    shader[i++] = ALU_DWORD1_OP2(ChipSet,
782                                 SRC0_ABS(0),
783                                 SRC1_ABS(0),
784                                 UPDATE_EXECUTE_MASK(0),
785                                 UPDATE_PRED(0),
786                                 WRITE_MASK(0),
787                                 FOG_MERGE(0),
788                                 OMOD(SQ_ALU_OMOD_OFF),
789                                 ALU_INST(SQ_OP2_INST_DOT4),
790                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
791                                 DST_GPR(0),
792                                 DST_REL(ABSOLUTE),
793                                 DST_ELEM(ELEM_Y),
794                                 CLAMP(1));
795    /* 10 */
796    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
797                             SRC0_REL(ABSOLUTE),
798                             SRC0_ELEM(ELEM_Z),
799                             SRC0_NEG(0),
800                             SRC1_SEL(256),
801                             SRC1_REL(ABSOLUTE),
802                             SRC1_ELEM(ELEM_Z),
803                             SRC1_NEG(0),
804                             INDEX_MODE(SQ_INDEX_LOOP),
805                             PRED_SEL(SQ_PRED_SEL_OFF),
806                             LAST(0));
807    shader[i++] = ALU_DWORD1_OP2(ChipSet,
808                                 SRC0_ABS(0),
809                                 SRC1_ABS(0),
810                                 UPDATE_EXECUTE_MASK(0),
811                                 UPDATE_PRED(0),
812                                 WRITE_MASK(0),
813                                 FOG_MERGE(0),
814                                 OMOD(SQ_ALU_OMOD_OFF),
815                                 ALU_INST(SQ_OP2_INST_DOT4),
816                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
817                                 DST_GPR(0),
818                                 DST_REL(ABSOLUTE),
819                                 DST_ELEM(ELEM_Z),
820                                 CLAMP(1));
821    /* 11 */
822    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
823                             SRC0_REL(ABSOLUTE),
824                             SRC0_ELEM(ELEM_W),
825                             SRC0_NEG(0),
826                             SRC1_SEL(256),
827                             SRC1_REL(ABSOLUTE),
828                             SRC1_ELEM(ELEM_W),
829                             SRC1_NEG(0),
830                             INDEX_MODE(SQ_INDEX_LOOP),
831                             PRED_SEL(SQ_PRED_SEL_OFF),
832                             LAST(1));
833    shader[i++] = ALU_DWORD1_OP2(ChipSet,
834                                 SRC0_ABS(0),
835                                 SRC1_ABS(0),
836                                 UPDATE_EXECUTE_MASK(0),
837                                 UPDATE_PRED(0),
838                                 WRITE_MASK(0),
839                                 FOG_MERGE(0),
840                                 OMOD(SQ_ALU_OMOD_OFF),
841                                 ALU_INST(SQ_OP2_INST_DOT4),
842                                 BANK_SWIZZLE(SQ_ALU_VEC_021),
843                                 DST_GPR(0),
844                                 DST_REL(ABSOLUTE),
845                                 DST_ELEM(ELEM_W),
846                                 CLAMP(1));
847    /* 12 */
848    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
849                             SRC0_REL(ABSOLUTE),
850                             SRC0_ELEM(ELEM_X),
851                             SRC0_NEG(0),
852                             SRC1_SEL(257),
853                             SRC1_REL(ABSOLUTE),
854                             SRC1_ELEM(ELEM_X),
855                             SRC1_NEG(0),
856                             INDEX_MODE(SQ_INDEX_LOOP),
857                             PRED_SEL(SQ_PRED_SEL_OFF),
858                             LAST(0));
859    shader[i++] = ALU_DWORD1_OP2(ChipSet,
860                                 SRC0_ABS(0),
861                                 SRC1_ABS(0),
862                                 UPDATE_EXECUTE_MASK(0),
863                                 UPDATE_PRED(0),
864                                 WRITE_MASK(0),
865                                 FOG_MERGE(0),
866                                 OMOD(SQ_ALU_OMOD_OFF),
867                                 ALU_INST(SQ_OP2_INST_DOT4),
868                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
869                                 DST_GPR(0),
870                                 DST_REL(ABSOLUTE),
871                                 DST_ELEM(ELEM_X),
872                                 CLAMP(1));
873    /* 13 */
874    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
875                             SRC0_REL(ABSOLUTE),
876                             SRC0_ELEM(ELEM_Y),
877                             SRC0_NEG(0),
878                             SRC1_SEL(257),
879                             SRC1_REL(ABSOLUTE),
880                             SRC1_ELEM(ELEM_Y),
881                             SRC1_NEG(0),
882                             INDEX_MODE(SQ_INDEX_LOOP),
883                             PRED_SEL(SQ_PRED_SEL_OFF),
884                             LAST(0));
885    shader[i++] = ALU_DWORD1_OP2(ChipSet,
886                                 SRC0_ABS(0),
887                                 SRC1_ABS(0),
888                                 UPDATE_EXECUTE_MASK(0),
889                                 UPDATE_PRED(0),
890                                 WRITE_MASK(1),
891                                 FOG_MERGE(0),
892                                 OMOD(SQ_ALU_OMOD_OFF),
893                                 ALU_INST(SQ_OP2_INST_DOT4),
894                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
895                                 DST_GPR(2),
896                                 DST_REL(ABSOLUTE),
897                                 DST_ELEM(ELEM_Y),
898                                 CLAMP(1));
899    /* 14 */
900    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
901                             SRC0_REL(ABSOLUTE),
902                             SRC0_ELEM(ELEM_Z),
903                             SRC0_NEG(0),
904                             SRC1_SEL(257),
905                             SRC1_REL(ABSOLUTE),
906                             SRC1_ELEM(ELEM_Z),
907                             SRC1_NEG(0),
908                             INDEX_MODE(SQ_INDEX_LOOP),
909                             PRED_SEL(SQ_PRED_SEL_OFF),
910                             LAST(0));
911    shader[i++] = ALU_DWORD1_OP2(ChipSet,
912                                 SRC0_ABS(0),
913                                 SRC1_ABS(0),
914                                 UPDATE_EXECUTE_MASK(0),
915                                 UPDATE_PRED(0),
916                                 WRITE_MASK(0),
917                                 FOG_MERGE(0),
918                                 OMOD(SQ_ALU_OMOD_OFF),
919                                 ALU_INST(SQ_OP2_INST_DOT4),
920                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
921                                 DST_GPR(0),
922                                 DST_REL(ABSOLUTE),
923                                 DST_ELEM(ELEM_Z),
924                                 CLAMP(1));
925    /* 15 */
926    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
927                             SRC0_REL(ABSOLUTE),
928                             SRC0_ELEM(ELEM_W),
929                             SRC0_NEG(0),
930                             SRC1_SEL(257),
931                             SRC1_REL(ABSOLUTE),
932                             SRC1_ELEM(ELEM_W),
933                             SRC1_NEG(0),
934                             INDEX_MODE(SQ_INDEX_LOOP),
935                             PRED_SEL(SQ_PRED_SEL_OFF),
936                             LAST(1));
937    shader[i++] = ALU_DWORD1_OP2(ChipSet,
938                                 SRC0_ABS(0),
939                                 SRC1_ABS(0),
940                                 UPDATE_EXECUTE_MASK(0),
941                                 UPDATE_PRED(0),
942                                 WRITE_MASK(0),
943                                 FOG_MERGE(0),
944                                 OMOD(SQ_ALU_OMOD_OFF),
945                                 ALU_INST(SQ_OP2_INST_DOT4),
946                                 BANK_SWIZZLE(SQ_ALU_VEC_021),
947                                 DST_GPR(0),
948                                 DST_REL(ABSOLUTE),
949                                 DST_ELEM(ELEM_W),
950                                 CLAMP(1));
951    /* 16 */
952    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
953                             SRC0_REL(ABSOLUTE),
954                             SRC0_ELEM(ELEM_X),
955                             SRC0_NEG(0),
956                             SRC1_SEL(258),
957                             SRC1_REL(ABSOLUTE),
958                             SRC1_ELEM(ELEM_X),
959                             SRC1_NEG(0),
960                             INDEX_MODE(SQ_INDEX_LOOP),
961                             PRED_SEL(SQ_PRED_SEL_OFF),
962                             LAST(0));
963    shader[i++] = ALU_DWORD1_OP2(ChipSet,
964                                 SRC0_ABS(0),
965                                 SRC1_ABS(0),
966                                 UPDATE_EXECUTE_MASK(0),
967                                 UPDATE_PRED(0),
968                                 WRITE_MASK(0),
969                                 FOG_MERGE(0),
970                                 OMOD(SQ_ALU_OMOD_OFF),
971                                 ALU_INST(SQ_OP2_INST_DOT4),
972                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
973                                 DST_GPR(0),
974                                 DST_REL(ABSOLUTE),
975                                 DST_ELEM(ELEM_X),
976                                 CLAMP(1));
977    /* 17 */
978    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
979                             SRC0_REL(ABSOLUTE),
980                             SRC0_ELEM(ELEM_Y),
981                             SRC0_NEG(0),
982                             SRC1_SEL(258),
983                             SRC1_REL(ABSOLUTE),
984                             SRC1_ELEM(ELEM_Y),
985                             SRC1_NEG(0),
986                             INDEX_MODE(SQ_INDEX_LOOP),
987                             PRED_SEL(SQ_PRED_SEL_OFF),
988                             LAST(0));
989    shader[i++] = ALU_DWORD1_OP2(ChipSet,
990                                 SRC0_ABS(0),
991                                 SRC1_ABS(0),
992                                 UPDATE_EXECUTE_MASK(0),
993                                 UPDATE_PRED(0),
994                                 WRITE_MASK(0),
995                                 FOG_MERGE(0),
996                                 OMOD(SQ_ALU_OMOD_OFF),
997                                 ALU_INST(SQ_OP2_INST_DOT4),
998                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
999                                 DST_GPR(0),
1000                                 DST_REL(ABSOLUTE),
1001                                 DST_ELEM(ELEM_Y),
1002                                 CLAMP(1));
1003    /* 18 */
1004    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1005                             SRC0_REL(ABSOLUTE),
1006                             SRC0_ELEM(ELEM_Z),
1007                             SRC0_NEG(0),
1008                             SRC1_SEL(258),
1009                             SRC1_REL(ABSOLUTE),
1010                             SRC1_ELEM(ELEM_Z),
1011                             SRC1_NEG(0),
1012                             INDEX_MODE(SQ_INDEX_LOOP),
1013                             PRED_SEL(SQ_PRED_SEL_OFF),
1014                             LAST(0));
1015    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1016                                 SRC0_ABS(0),
1017                                 SRC1_ABS(0),
1018                                 UPDATE_EXECUTE_MASK(0),
1019                                 UPDATE_PRED(0),
1020                                 WRITE_MASK(1),
1021                                 FOG_MERGE(0),
1022                                 OMOD(SQ_ALU_OMOD_OFF),
1023                                 ALU_INST(SQ_OP2_INST_DOT4),
1024                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
1025                                 DST_GPR(2),
1026                                 DST_REL(ABSOLUTE),
1027                                 DST_ELEM(ELEM_Z),
1028                                 CLAMP(1));
1029    /* 19 */
1030    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1031                             SRC0_REL(ABSOLUTE),
1032                             SRC0_ELEM(ELEM_W),
1033                             SRC0_NEG(0),
1034                             SRC1_SEL(258),
1035                             SRC1_REL(ABSOLUTE),
1036                             SRC1_ELEM(ELEM_W),
1037                             SRC1_NEG(0),
1038                             INDEX_MODE(SQ_INDEX_LOOP),
1039                             PRED_SEL(SQ_PRED_SEL_OFF),
1040                             LAST(1));
1041    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1042                                 SRC0_ABS(0),
1043                                 SRC1_ABS(0),
1044                                 UPDATE_EXECUTE_MASK(0),
1045                                 UPDATE_PRED(0),
1046                                 WRITE_MASK(0),
1047                                 FOG_MERGE(0),
1048                                 OMOD(SQ_ALU_OMOD_OFF),
1049                                 ALU_INST(SQ_OP2_INST_DOT4),
1050                                 BANK_SWIZZLE(SQ_ALU_VEC_021),
1051                                 DST_GPR(0),
1052                                 DST_REL(ABSOLUTE),
1053                                 DST_ELEM(ELEM_W),
1054                                 CLAMP(1));
1055    /* 20 */
1056    shader[i++] = CF_DWORD0(ADDR(22));
1057    shader[i++] = CF_DWORD1(POP_COUNT(0),
1058                            CF_CONST(0),
1059                            COND(SQ_CF_COND_ACTIVE),
1060                            I_COUNT(3),
1061                            CALL_COUNT(0),
1062                            END_OF_PROGRAM(0),
1063                            VALID_PIXEL_MODE(0),
1064                            CF_INST(SQ_CF_INST_TEX),
1065                            WHOLE_QUAD_MODE(0),
1066                            BARRIER(1));
1067    /* 21 */
1068    shader[i++] = CF_DWORD0(ADDR(0));
1069    shader[i++] = CF_DWORD1(POP_COUNT(0),
1070			    CF_CONST(0),
1071			    COND(SQ_CF_COND_ACTIVE),
1072			    I_COUNT(0),
1073			    CALL_COUNT(0),
1074			    END_OF_PROGRAM(0),
1075			    VALID_PIXEL_MODE(0),
1076			    CF_INST(SQ_CF_INST_RETURN),
1077			    WHOLE_QUAD_MODE(0),
1078			    BARRIER(1));
1079    /* 22/23 */
1080    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1081                             BC_FRAC_MODE(0),
1082                             FETCH_WHOLE_QUAD(0),
1083                             RESOURCE_ID(0),
1084                             SRC_GPR(0),
1085                             SRC_REL(ABSOLUTE),
1086                             R7xx_ALT_CONST(0));
1087    shader[i++] = TEX_DWORD1(DST_GPR(1),
1088                             DST_REL(ABSOLUTE),
1089                             DST_SEL_X(SQ_SEL_X),
1090                             DST_SEL_Y(SQ_SEL_MASK),
1091                             DST_SEL_Z(SQ_SEL_MASK),
1092                             DST_SEL_W(SQ_SEL_1),
1093                             LOD_BIAS(0),
1094                             COORD_TYPE_X(TEX_NORMALIZED),
1095                             COORD_TYPE_Y(TEX_NORMALIZED),
1096                             COORD_TYPE_Z(TEX_NORMALIZED),
1097                             COORD_TYPE_W(TEX_NORMALIZED));
1098    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1099                             OFFSET_Y(0),
1100                             OFFSET_Z(0),
1101                             SAMPLER_ID(0),
1102                             SRC_SEL_X(SQ_SEL_X),
1103                             SRC_SEL_Y(SQ_SEL_Y),
1104                             SRC_SEL_Z(SQ_SEL_0),
1105                             SRC_SEL_W(SQ_SEL_1));
1106    shader[i++] = TEX_DWORD_PAD;
1107    /* 24/25 */
1108    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1109                             BC_FRAC_MODE(0),
1110                             FETCH_WHOLE_QUAD(0),
1111                             RESOURCE_ID(1),
1112                             SRC_GPR(0),
1113                             SRC_REL(ABSOLUTE),
1114                             R7xx_ALT_CONST(0));
1115    shader[i++] = TEX_DWORD1(DST_GPR(1),
1116                             DST_REL(ABSOLUTE),
1117                             DST_SEL_X(SQ_SEL_MASK),
1118                             DST_SEL_Y(SQ_SEL_MASK),
1119                             DST_SEL_Z(SQ_SEL_X),
1120                             DST_SEL_W(SQ_SEL_MASK),
1121                             LOD_BIAS(0),
1122                             COORD_TYPE_X(TEX_NORMALIZED),
1123                             COORD_TYPE_Y(TEX_NORMALIZED),
1124                             COORD_TYPE_Z(TEX_NORMALIZED),
1125                             COORD_TYPE_W(TEX_NORMALIZED));
1126    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1127                             OFFSET_Y(0),
1128                             OFFSET_Z(0),
1129                             SAMPLER_ID(1),
1130                             SRC_SEL_X(SQ_SEL_X),
1131                             SRC_SEL_Y(SQ_SEL_Y),
1132                             SRC_SEL_Z(SQ_SEL_0),
1133                             SRC_SEL_W(SQ_SEL_1));
1134    shader[i++] = TEX_DWORD_PAD;
1135    /* 26/27 */
1136    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1137                             BC_FRAC_MODE(0),
1138                             FETCH_WHOLE_QUAD(0),
1139                             RESOURCE_ID(2),
1140                             SRC_GPR(0),
1141                             SRC_REL(ABSOLUTE),
1142                             R7xx_ALT_CONST(0));
1143    shader[i++] = TEX_DWORD1(DST_GPR(1),
1144                             DST_REL(ABSOLUTE),
1145                             DST_SEL_X(SQ_SEL_MASK),
1146                             DST_SEL_Y(SQ_SEL_X),
1147                             DST_SEL_Z(SQ_SEL_MASK),
1148                             DST_SEL_W(SQ_SEL_MASK),
1149                             LOD_BIAS(0),
1150                             COORD_TYPE_X(TEX_NORMALIZED),
1151                             COORD_TYPE_Y(TEX_NORMALIZED),
1152                             COORD_TYPE_Z(TEX_NORMALIZED),
1153                             COORD_TYPE_W(TEX_NORMALIZED));
1154    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1155                             OFFSET_Y(0),
1156                             OFFSET_Z(0),
1157                             SAMPLER_ID(2),
1158                             SRC_SEL_X(SQ_SEL_X),
1159                             SRC_SEL_Y(SQ_SEL_Y),
1160                             SRC_SEL_Z(SQ_SEL_0),
1161                             SRC_SEL_W(SQ_SEL_1));
1162    shader[i++] = TEX_DWORD_PAD;
1163    /* 28 */
1164    shader[i++] = CF_DWORD0(ADDR(30));
1165    shader[i++] = CF_DWORD1(POP_COUNT(0),
1166                            CF_CONST(0),
1167                            COND(SQ_CF_COND_ACTIVE),
1168                            I_COUNT(2),
1169                            CALL_COUNT(0),
1170                            END_OF_PROGRAM(0),
1171                            VALID_PIXEL_MODE(0),
1172                            CF_INST(SQ_CF_INST_TEX),
1173                            WHOLE_QUAD_MODE(0),
1174                            BARRIER(1));
1175    /* 29 */
1176    shader[i++] = CF_DWORD0(ADDR(0));
1177    shader[i++] = CF_DWORD1(POP_COUNT(0),
1178			    CF_CONST(0),
1179			    COND(SQ_CF_COND_ACTIVE),
1180			    I_COUNT(0),
1181			    CALL_COUNT(0),
1182			    END_OF_PROGRAM(0),
1183			    VALID_PIXEL_MODE(0),
1184			    CF_INST(SQ_CF_INST_RETURN),
1185			    WHOLE_QUAD_MODE(0),
1186			    BARRIER(1));
1187    /* 30/31 */
1188    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1189                             BC_FRAC_MODE(0),
1190                             FETCH_WHOLE_QUAD(0),
1191                             RESOURCE_ID(0),
1192                             SRC_GPR(0),
1193                             SRC_REL(ABSOLUTE),
1194                             R7xx_ALT_CONST(0));
1195    shader[i++] = TEX_DWORD1(DST_GPR(1),
1196                             DST_REL(ABSOLUTE),
1197                             DST_SEL_X(SQ_SEL_X),
1198                             DST_SEL_Y(SQ_SEL_MASK),
1199                             DST_SEL_Z(SQ_SEL_MASK),
1200                             DST_SEL_W(SQ_SEL_1),
1201                             LOD_BIAS(0),
1202                             COORD_TYPE_X(TEX_NORMALIZED),
1203                             COORD_TYPE_Y(TEX_NORMALIZED),
1204                             COORD_TYPE_Z(TEX_NORMALIZED),
1205                             COORD_TYPE_W(TEX_NORMALIZED));
1206    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1207                             OFFSET_Y(0),
1208                             OFFSET_Z(0),
1209                             SAMPLER_ID(0),
1210                             SRC_SEL_X(SQ_SEL_X),
1211                             SRC_SEL_Y(SQ_SEL_Y),
1212                             SRC_SEL_Z(SQ_SEL_0),
1213                             SRC_SEL_W(SQ_SEL_1));
1214    shader[i++] = TEX_DWORD_PAD;
1215    /* 32/33 */
1216    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1217                             BC_FRAC_MODE(0),
1218                             FETCH_WHOLE_QUAD(0),
1219                             RESOURCE_ID(1),
1220                             SRC_GPR(0),
1221                             SRC_REL(ABSOLUTE),
1222                             R7xx_ALT_CONST(0));
1223    shader[i++] = TEX_DWORD1(DST_GPR(1),
1224                             DST_REL(ABSOLUTE),
1225                             DST_SEL_X(SQ_SEL_MASK),
1226                             DST_SEL_Y(SQ_SEL_X),
1227                             DST_SEL_Z(SQ_SEL_Y),
1228                             DST_SEL_W(SQ_SEL_MASK),
1229                             LOD_BIAS(0),
1230                             COORD_TYPE_X(TEX_NORMALIZED),
1231                             COORD_TYPE_Y(TEX_NORMALIZED),
1232                             COORD_TYPE_Z(TEX_NORMALIZED),
1233                             COORD_TYPE_W(TEX_NORMALIZED));
1234    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1235                             OFFSET_Y(0),
1236                             OFFSET_Z(0),
1237                             SAMPLER_ID(1),
1238                             SRC_SEL_X(SQ_SEL_X),
1239                             SRC_SEL_Y(SQ_SEL_Y),
1240                             SRC_SEL_Z(SQ_SEL_0),
1241                             SRC_SEL_W(SQ_SEL_1));
1242    shader[i++] = TEX_DWORD_PAD;
1243
1244    return i;
1245}
1246
1247/* comp mask ps --------------------------------------- */
1248int R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* shader)
1249{
1250    int i = 0;
1251
1252    /* 0 */
1253    shader[i++] = CF_DWORD0(ADDR(8));
1254    shader[i++] = CF_DWORD1(POP_COUNT(0),
1255			    CF_CONST(0),
1256			    COND(SQ_CF_COND_ACTIVE),
1257			    I_COUNT(2),
1258			    CALL_COUNT(0),
1259			    END_OF_PROGRAM(0),
1260			    VALID_PIXEL_MODE(0),
1261			    CF_INST(SQ_CF_INST_TEX),
1262			    WHOLE_QUAD_MODE(0),
1263			    BARRIER(1));
1264
1265    /* 1 */
1266    shader[i++] = CF_ALU_DWORD0(ADDR(3),
1267				KCACHE_BANK0(0),
1268				KCACHE_BANK1(0),
1269				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1270    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1271				KCACHE_ADDR0(0),
1272				KCACHE_ADDR1(0),
1273				I_COUNT(4),
1274				USES_WATERFALL(0),
1275				CF_INST(SQ_CF_INST_ALU),
1276				WHOLE_QUAD_MODE(0),
1277				BARRIER(1));
1278
1279    /* 2 */
1280    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
1281					  TYPE(SQ_EXPORT_PIXEL),
1282					  RW_GPR(2),
1283					  RW_REL(ABSOLUTE),
1284					  INDEX_GPR(0),
1285					  ELEM_SIZE(1));
1286
1287    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1288					       SRC_SEL_Y(SQ_SEL_Y),
1289					       SRC_SEL_Z(SQ_SEL_Z),
1290					       SRC_SEL_W(SQ_SEL_W),
1291					       R6xx_ELEM_LOOP(0),
1292					       BURST_COUNT(1),
1293					       END_OF_PROGRAM(1),
1294					       VALID_PIXEL_MODE(0),
1295					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1296					       WHOLE_QUAD_MODE(0),
1297					       BARRIER(1));
1298
1299    /* 3 - alu 0 */
1300    /* MUL gpr[2].x gpr[1].x gpr[0].x */
1301    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1302			     SRC0_REL(ABSOLUTE),
1303			     SRC0_ELEM(ELEM_X),
1304			     SRC0_NEG(0),
1305			     SRC1_SEL(0),
1306			     SRC1_REL(ABSOLUTE),
1307			     SRC1_ELEM(ELEM_X),
1308			     SRC1_NEG(0),
1309			     INDEX_MODE(SQ_INDEX_LOOP),
1310			     PRED_SEL(SQ_PRED_SEL_OFF),
1311			     LAST(0));
1312    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1313				 SRC0_ABS(0),
1314				 SRC1_ABS(0),
1315				 UPDATE_EXECUTE_MASK(0),
1316				 UPDATE_PRED(0),
1317				 WRITE_MASK(1),
1318				 FOG_MERGE(0),
1319				 OMOD(SQ_ALU_OMOD_OFF),
1320				 ALU_INST(SQ_OP2_INST_MUL),
1321				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1322				 DST_GPR(2),
1323				 DST_REL(ABSOLUTE),
1324				 DST_ELEM(ELEM_X),
1325				 CLAMP(1));
1326    /* 4 - alu 1 */
1327    /* MUL gpr[2].y gpr[1].y gpr[0].y */
1328    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1329			     SRC0_REL(ABSOLUTE),
1330			     SRC0_ELEM(ELEM_Y),
1331			     SRC0_NEG(0),
1332			     SRC1_SEL(0),
1333			     SRC1_REL(ABSOLUTE),
1334			     SRC1_ELEM(ELEM_Y),
1335			     SRC1_NEG(0),
1336			     INDEX_MODE(SQ_INDEX_LOOP),
1337			     PRED_SEL(SQ_PRED_SEL_OFF),
1338			     LAST(0));
1339    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1340				 SRC0_ABS(0),
1341				 SRC1_ABS(0),
1342				 UPDATE_EXECUTE_MASK(0),
1343				 UPDATE_PRED(0),
1344				 WRITE_MASK(1),
1345				 FOG_MERGE(0),
1346				 OMOD(SQ_ALU_OMOD_OFF),
1347				 ALU_INST(SQ_OP2_INST_MUL),
1348				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1349				 DST_GPR(2),
1350				 DST_REL(ABSOLUTE),
1351				 DST_ELEM(ELEM_Y),
1352				 CLAMP(1));
1353    /* 5 - alu 2 */
1354    /* MUL gpr[2].z gpr[1].z gpr[0].z */
1355    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1356			     SRC0_REL(ABSOLUTE),
1357			     SRC0_ELEM(ELEM_Z),
1358			     SRC0_NEG(0),
1359			     SRC1_SEL(0),
1360			     SRC1_REL(ABSOLUTE),
1361			     SRC1_ELEM(ELEM_Z),
1362			     SRC1_NEG(0),
1363			     INDEX_MODE(SQ_INDEX_LOOP),
1364			     PRED_SEL(SQ_PRED_SEL_OFF),
1365			     LAST(0));
1366    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1367				 SRC0_ABS(0),
1368				 SRC1_ABS(0),
1369				 UPDATE_EXECUTE_MASK(0),
1370				 UPDATE_PRED(0),
1371				 WRITE_MASK(1),
1372				 FOG_MERGE(0),
1373				 OMOD(SQ_ALU_OMOD_OFF),
1374				 ALU_INST(SQ_OP2_INST_MUL),
1375				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1376				 DST_GPR(2),
1377				 DST_REL(ABSOLUTE),
1378				 DST_ELEM(ELEM_Z),
1379				 CLAMP(1));
1380    /* 6 - alu 3 */
1381    /* MUL gpr[2].w gpr[1].w gpr[0].w */
1382    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1383			     SRC0_REL(ABSOLUTE),
1384			     SRC0_ELEM(ELEM_W),
1385			     SRC0_NEG(0),
1386			     SRC1_SEL(0),
1387			     SRC1_REL(ABSOLUTE),
1388			     SRC1_ELEM(ELEM_W),
1389			     SRC1_NEG(0),
1390			     INDEX_MODE(SQ_INDEX_LOOP),
1391			     PRED_SEL(SQ_PRED_SEL_OFF),
1392			     LAST(1));
1393    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1394				 SRC0_ABS(0),
1395				 SRC1_ABS(0),
1396				 UPDATE_EXECUTE_MASK(0),
1397				 UPDATE_PRED(0),
1398				 WRITE_MASK(1),
1399				 FOG_MERGE(0),
1400				 OMOD(SQ_ALU_OMOD_OFF),
1401				 ALU_INST(SQ_OP2_INST_MUL),
1402				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1403				 DST_GPR(2),
1404				 DST_REL(ABSOLUTE),
1405				 DST_ELEM(ELEM_W),
1406				 CLAMP(1));
1407    /* 7 */
1408    shader[i++] = 0x00000000;
1409    shader[i++] = 0x00000000;
1410
1411    /* 8/9 - src */
1412    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1413			     BC_FRAC_MODE(0),
1414			     FETCH_WHOLE_QUAD(0),
1415			     RESOURCE_ID(0),
1416			     SRC_GPR(0),
1417			     SRC_REL(ABSOLUTE),
1418			     R7xx_ALT_CONST(0));
1419    shader[i++] = TEX_DWORD1(DST_GPR(0),
1420			     DST_REL(ABSOLUTE),
1421			     DST_SEL_X(SQ_SEL_X),
1422			     DST_SEL_Y(SQ_SEL_Y),
1423			     DST_SEL_Z(SQ_SEL_Z),
1424			     DST_SEL_W(SQ_SEL_W),
1425			     LOD_BIAS(0),
1426			     COORD_TYPE_X(TEX_NORMALIZED),
1427			     COORD_TYPE_Y(TEX_NORMALIZED),
1428			     COORD_TYPE_Z(TEX_NORMALIZED),
1429			     COORD_TYPE_W(TEX_NORMALIZED));
1430    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1431			     OFFSET_Y(0),
1432			     OFFSET_Z(0),
1433			     SAMPLER_ID(0),
1434			     SRC_SEL_X(SQ_SEL_X),
1435			     SRC_SEL_Y(SQ_SEL_Y),
1436			     SRC_SEL_Z(SQ_SEL_0),
1437			     SRC_SEL_W(SQ_SEL_1));
1438    shader[i++] = TEX_DWORD_PAD;
1439    /* 10/11 - mask */
1440    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1441			     BC_FRAC_MODE(0),
1442			     FETCH_WHOLE_QUAD(0),
1443			     RESOURCE_ID(1),
1444			     SRC_GPR(1),
1445			     SRC_REL(ABSOLUTE),
1446			     R7xx_ALT_CONST(0));
1447    shader[i++] = TEX_DWORD1(DST_GPR(1),
1448			     DST_REL(ABSOLUTE),
1449			     DST_SEL_X(SQ_SEL_X),
1450			     DST_SEL_Y(SQ_SEL_Y),
1451			     DST_SEL_Z(SQ_SEL_Z),
1452			     DST_SEL_W(SQ_SEL_W),
1453			     LOD_BIAS(0),
1454			     COORD_TYPE_X(TEX_NORMALIZED),
1455			     COORD_TYPE_Y(TEX_NORMALIZED),
1456			     COORD_TYPE_Z(TEX_NORMALIZED),
1457			     COORD_TYPE_W(TEX_NORMALIZED));
1458    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1459			     OFFSET_Y(0),
1460			     OFFSET_Z(0),
1461			     SAMPLER_ID(1),
1462			     SRC_SEL_X(SQ_SEL_X),
1463			     SRC_SEL_Y(SQ_SEL_Y),
1464			     SRC_SEL_Z(SQ_SEL_0),
1465			     SRC_SEL_W(SQ_SEL_1));
1466    shader[i++] = TEX_DWORD_PAD;
1467
1468    return i;
1469}
1470
1471/* comp vs --------------------------------------- */
1472int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1473{
1474    int i = 0;
1475
1476    /* 0 */
1477    shader[i++] = CF_DWORD0(ADDR(3));
1478    shader[i++] = CF_DWORD1(POP_COUNT(0),
1479                            CF_CONST(0),
1480                            COND(SQ_CF_COND_BOOL),
1481                            I_COUNT(0),
1482                            CALL_COUNT(0),
1483                            END_OF_PROGRAM(0),
1484                            VALID_PIXEL_MODE(0),
1485                            CF_INST(SQ_CF_INST_CALL),
1486                            WHOLE_QUAD_MODE(0),
1487                            BARRIER(0));
1488    /* 1 */
1489    shader[i++] = CF_DWORD0(ADDR(14));
1490    shader[i++] = CF_DWORD1(POP_COUNT(0),
1491                            CF_CONST(0),
1492                            COND(SQ_CF_COND_NOT_BOOL),
1493                            I_COUNT(0),
1494                            CALL_COUNT(0),
1495                            END_OF_PROGRAM(0),
1496                            VALID_PIXEL_MODE(0),
1497                            CF_INST(SQ_CF_INST_CALL),
1498                            WHOLE_QUAD_MODE(0),
1499                            BARRIER(0));
1500    /* 2 */
1501    shader[i++] = CF_DWORD0(0);
1502    shader[i++] = CF_DWORD1(POP_COUNT(0),
1503                            CF_CONST(0),
1504                            COND(SQ_CF_COND_ACTIVE),
1505                            I_COUNT(0),
1506                            CALL_COUNT(0),
1507                            END_OF_PROGRAM(1),
1508                            VALID_PIXEL_MODE(0),
1509                            CF_INST(SQ_CF_INST_NOP),
1510                            WHOLE_QUAD_MODE(0),
1511                            BARRIER(1));
1512    /* 3 - mask sub */
1513    shader[i++] = CF_DWORD0(ADDR(8));
1514    shader[i++] = CF_DWORD1(POP_COUNT(0),
1515			    CF_CONST(0),
1516			    COND(SQ_CF_COND_ACTIVE),
1517			    I_COUNT(3),
1518			    CALL_COUNT(0),
1519			    END_OF_PROGRAM(0),
1520			    VALID_PIXEL_MODE(0),
1521			    CF_INST(SQ_CF_INST_VTX),
1522			    WHOLE_QUAD_MODE(0),
1523			    BARRIER(1));
1524    /* 4 - dst */
1525    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1526					  TYPE(SQ_EXPORT_POS),
1527					  RW_GPR(2),
1528					  RW_REL(ABSOLUTE),
1529					  INDEX_GPR(0),
1530					  ELEM_SIZE(0));
1531    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1532					       SRC_SEL_Y(SQ_SEL_Y),
1533					       SRC_SEL_Z(SQ_SEL_Z),
1534					       SRC_SEL_W(SQ_SEL_W),
1535					       R6xx_ELEM_LOOP(0),
1536					       BURST_COUNT(1),
1537					       END_OF_PROGRAM(0),
1538					       VALID_PIXEL_MODE(0),
1539					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1540					       WHOLE_QUAD_MODE(0),
1541					       BARRIER(1));
1542    /* 5 - src */
1543    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1544					  TYPE(SQ_EXPORT_PARAM),
1545					  RW_GPR(1),
1546					  RW_REL(ABSOLUTE),
1547					  INDEX_GPR(0),
1548					  ELEM_SIZE(0));
1549    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1550					       SRC_SEL_Y(SQ_SEL_Y),
1551					       SRC_SEL_Z(SQ_SEL_Z),
1552					       SRC_SEL_W(SQ_SEL_W),
1553					       R6xx_ELEM_LOOP(0),
1554					       BURST_COUNT(1),
1555					       END_OF_PROGRAM(0),
1556					       VALID_PIXEL_MODE(0),
1557					       CF_INST(SQ_CF_INST_EXPORT),
1558					       WHOLE_QUAD_MODE(0),
1559					       BARRIER(0));
1560    /* 6 - mask */
1561    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1562					  TYPE(SQ_EXPORT_PARAM),
1563					  RW_GPR(0),
1564					  RW_REL(ABSOLUTE),
1565					  INDEX_GPR(0),
1566					  ELEM_SIZE(0));
1567    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1568					       SRC_SEL_Y(SQ_SEL_Y),
1569					       SRC_SEL_Z(SQ_SEL_Z),
1570					       SRC_SEL_W(SQ_SEL_W),
1571					       R6xx_ELEM_LOOP(0),
1572					       BURST_COUNT(1),
1573					       END_OF_PROGRAM(0),
1574					       VALID_PIXEL_MODE(0),
1575					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1576					       WHOLE_QUAD_MODE(0),
1577					       BARRIER(0));
1578    /* 7 */
1579    shader[i++] = CF_DWORD0(ADDR(0));
1580    shader[i++] = CF_DWORD1(POP_COUNT(0),
1581			    CF_CONST(0),
1582			    COND(SQ_CF_COND_ACTIVE),
1583			    I_COUNT(0),
1584			    CALL_COUNT(0),
1585			    END_OF_PROGRAM(0),
1586			    VALID_PIXEL_MODE(0),
1587			    CF_INST(SQ_CF_INST_RETURN),
1588			    WHOLE_QUAD_MODE(0),
1589			    BARRIER(1));
1590    /* 8/9 - dst */
1591    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1592			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1593			     FETCH_WHOLE_QUAD(0),
1594			     BUFFER_ID(0),
1595			     SRC_GPR(0),
1596			     SRC_REL(ABSOLUTE),
1597			     SRC_SEL_X(SQ_SEL_X),
1598			     MEGA_FETCH_COUNT(24));
1599    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
1600				 DST_REL(0),
1601				 DST_SEL_X(SQ_SEL_X),
1602				 DST_SEL_Y(SQ_SEL_Y),
1603				 DST_SEL_Z(SQ_SEL_0),
1604				 DST_SEL_W(SQ_SEL_1),
1605				 USE_CONST_FIELDS(0),
1606				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
1607				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
1608				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
1609				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1610    shader[i++] = VTX_DWORD2(OFFSET(0),
1611			     ENDIAN_SWAP(ENDIAN_NONE),
1612			     CONST_BUF_NO_STRIDE(0),
1613			     MEGA_FETCH(1));
1614    shader[i++] = VTX_DWORD_PAD;
1615    /* 10/11 - src */
1616    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1617			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1618			     FETCH_WHOLE_QUAD(0),
1619			     BUFFER_ID(0),
1620			     SRC_GPR(0),
1621			     SRC_REL(ABSOLUTE),
1622			     SRC_SEL_X(SQ_SEL_X),
1623			     MEGA_FETCH_COUNT(8));
1624    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
1625				 DST_REL(0),
1626				 DST_SEL_X(SQ_SEL_X),
1627				 DST_SEL_Y(SQ_SEL_Y),
1628				 DST_SEL_Z(SQ_SEL_0),
1629				 DST_SEL_W(SQ_SEL_1),
1630				 USE_CONST_FIELDS(0),
1631				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
1632				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
1633				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
1634				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1635    shader[i++] = VTX_DWORD2(OFFSET(8),
1636			     ENDIAN_SWAP(ENDIAN_NONE),
1637			     CONST_BUF_NO_STRIDE(0),
1638			     MEGA_FETCH(0));
1639    shader[i++] = VTX_DWORD_PAD;
1640    /* 12/13 - mask */
1641    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1642			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1643			     FETCH_WHOLE_QUAD(0),
1644			     BUFFER_ID(0),
1645			     SRC_GPR(0),
1646			     SRC_REL(ABSOLUTE),
1647			     SRC_SEL_X(SQ_SEL_X),
1648			     MEGA_FETCH_COUNT(8));
1649    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
1650				 DST_REL(0),
1651				 DST_SEL_X(SQ_SEL_X),
1652				 DST_SEL_Y(SQ_SEL_Y),
1653				 DST_SEL_Z(SQ_SEL_0),
1654				 DST_SEL_W(SQ_SEL_1),
1655				 USE_CONST_FIELDS(0),
1656				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
1657				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
1658				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
1659				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1660    shader[i++] = VTX_DWORD2(OFFSET(16),
1661			     ENDIAN_SWAP(ENDIAN_NONE),
1662			     CONST_BUF_NO_STRIDE(0),
1663			     MEGA_FETCH(0));
1664    shader[i++] = VTX_DWORD_PAD;
1665
1666    /* 14 - non-mask sub */
1667    shader[i++] = CF_DWORD0(ADDR(18));
1668    shader[i++] = CF_DWORD1(POP_COUNT(0),
1669			    CF_CONST(0),
1670			    COND(SQ_CF_COND_ACTIVE),
1671			    I_COUNT(2),
1672			    CALL_COUNT(0),
1673			    END_OF_PROGRAM(0),
1674			    VALID_PIXEL_MODE(0),
1675			    CF_INST(SQ_CF_INST_VTX),
1676			    WHOLE_QUAD_MODE(0),
1677			    BARRIER(1));
1678    /* 15 - dst */
1679    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1680					  TYPE(SQ_EXPORT_POS),
1681					  RW_GPR(1),
1682					  RW_REL(ABSOLUTE),
1683					  INDEX_GPR(0),
1684					  ELEM_SIZE(0));
1685    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1686					       SRC_SEL_Y(SQ_SEL_Y),
1687					       SRC_SEL_Z(SQ_SEL_Z),
1688					       SRC_SEL_W(SQ_SEL_W),
1689					       R6xx_ELEM_LOOP(0),
1690					       BURST_COUNT(0),
1691					       END_OF_PROGRAM(0),
1692					       VALID_PIXEL_MODE(0),
1693					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1694					       WHOLE_QUAD_MODE(0),
1695					       BARRIER(1));
1696    /* 16 - src */
1697    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1698					  TYPE(SQ_EXPORT_PARAM),
1699					  RW_GPR(0),
1700					  RW_REL(ABSOLUTE),
1701					  INDEX_GPR(0),
1702					  ELEM_SIZE(0));
1703    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1704					       SRC_SEL_Y(SQ_SEL_Y),
1705					       SRC_SEL_Z(SQ_SEL_Z),
1706					       SRC_SEL_W(SQ_SEL_W),
1707					       R6xx_ELEM_LOOP(0),
1708					       BURST_COUNT(0),
1709					       END_OF_PROGRAM(0),
1710					       VALID_PIXEL_MODE(0),
1711					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1712					       WHOLE_QUAD_MODE(0),
1713					       BARRIER(0));
1714    /* 17 */
1715    shader[i++] = CF_DWORD0(ADDR(0));
1716    shader[i++] = CF_DWORD1(POP_COUNT(0),
1717			    CF_CONST(0),
1718			    COND(SQ_CF_COND_ACTIVE),
1719			    I_COUNT(0),
1720			    CALL_COUNT(0),
1721			    END_OF_PROGRAM(0),
1722			    VALID_PIXEL_MODE(0),
1723			    CF_INST(SQ_CF_INST_RETURN),
1724			    WHOLE_QUAD_MODE(0),
1725			    BARRIER(1));
1726    /* 18/19 - dst */
1727    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1728			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1729			     FETCH_WHOLE_QUAD(0),
1730			     BUFFER_ID(0),
1731			     SRC_GPR(0),
1732			     SRC_REL(ABSOLUTE),
1733			     SRC_SEL_X(SQ_SEL_X),
1734			     MEGA_FETCH_COUNT(16));
1735    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
1736				 DST_REL(0),
1737				 DST_SEL_X(SQ_SEL_X),
1738				 DST_SEL_Y(SQ_SEL_Y),
1739				 DST_SEL_Z(SQ_SEL_0),
1740				 DST_SEL_W(SQ_SEL_1),
1741				 USE_CONST_FIELDS(0),
1742				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
1743				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
1744				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
1745				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1746    shader[i++] = VTX_DWORD2(OFFSET(0),
1747			     ENDIAN_SWAP(ENDIAN_NONE),
1748			     CONST_BUF_NO_STRIDE(0),
1749			     MEGA_FETCH(1));
1750    shader[i++] = VTX_DWORD_PAD;
1751    /* 20/21 - src */
1752    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1753			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1754			     FETCH_WHOLE_QUAD(0),
1755			     BUFFER_ID(0),
1756			     SRC_GPR(0),
1757			     SRC_REL(ABSOLUTE),
1758			     SRC_SEL_X(SQ_SEL_X),
1759			     MEGA_FETCH_COUNT(8));
1760    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
1761				 DST_REL(0),
1762				 DST_SEL_X(SQ_SEL_X),
1763				 DST_SEL_Y(SQ_SEL_Y),
1764				 DST_SEL_Z(SQ_SEL_0),
1765				 DST_SEL_W(SQ_SEL_1),
1766				 USE_CONST_FIELDS(0),
1767				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
1768				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
1769				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
1770				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1771    shader[i++] = VTX_DWORD2(OFFSET(8),
1772			     ENDIAN_SWAP(ENDIAN_NONE),
1773			     CONST_BUF_NO_STRIDE(0),
1774			     MEGA_FETCH(0));
1775    shader[i++] = VTX_DWORD_PAD;
1776
1777    return i;
1778}
1779
1780/* comp ps --------------------------------------- */
1781int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
1782{
1783    int i = 0;
1784
1785    /* 0 */
1786    shader[i++] = CF_DWORD0(ADDR(2));
1787    shader[i++] = CF_DWORD1(POP_COUNT(0),
1788			    CF_CONST(0),
1789			    COND(SQ_CF_COND_ACTIVE),
1790			    I_COUNT(1),
1791			    CALL_COUNT(0),
1792			    END_OF_PROGRAM(0),
1793			    VALID_PIXEL_MODE(0),
1794			    CF_INST(SQ_CF_INST_TEX),
1795			    WHOLE_QUAD_MODE(0),
1796			    BARRIER(1));
1797    /* 1 */
1798    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
1799					  TYPE(SQ_EXPORT_PIXEL),
1800					  RW_GPR(0),
1801					  RW_REL(ABSOLUTE),
1802					  INDEX_GPR(0),
1803					  ELEM_SIZE(1));
1804
1805    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1806					       SRC_SEL_Y(SQ_SEL_Y),
1807					       SRC_SEL_Z(SQ_SEL_Z),
1808					       SRC_SEL_W(SQ_SEL_W),
1809					       R6xx_ELEM_LOOP(0),
1810					       BURST_COUNT(1),
1811					       END_OF_PROGRAM(1),
1812					       VALID_PIXEL_MODE(0),
1813					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1814					       WHOLE_QUAD_MODE(0),
1815					       BARRIER(1));
1816
1817
1818    /* 2/3 - src */
1819    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1820			     BC_FRAC_MODE(0),
1821			     FETCH_WHOLE_QUAD(0),
1822			     RESOURCE_ID(0),
1823			     SRC_GPR(0),
1824			     SRC_REL(ABSOLUTE),
1825			     R7xx_ALT_CONST(0));
1826    shader[i++] = TEX_DWORD1(DST_GPR(0),
1827			     DST_REL(ABSOLUTE),
1828			     DST_SEL_X(SQ_SEL_X),
1829			     DST_SEL_Y(SQ_SEL_Y),
1830			     DST_SEL_Z(SQ_SEL_Z),
1831			     DST_SEL_W(SQ_SEL_W),
1832			     LOD_BIAS(0),
1833			     COORD_TYPE_X(TEX_NORMALIZED),
1834			     COORD_TYPE_Y(TEX_NORMALIZED),
1835			     COORD_TYPE_Z(TEX_NORMALIZED),
1836			     COORD_TYPE_W(TEX_NORMALIZED));
1837    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1838			     OFFSET_Y(0),
1839			     OFFSET_Z(0),
1840			     SAMPLER_ID(0),
1841			     SRC_SEL_X(SQ_SEL_X),
1842			     SRC_SEL_Y(SQ_SEL_Y),
1843			     SRC_SEL_Z(SQ_SEL_0),
1844			     SRC_SEL_W(SQ_SEL_1));
1845    shader[i++] = TEX_DWORD_PAD;
1846
1847    return i;
1848}
1849