r600_shader.c revision 0974d292
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "radeon.h"
34b7e1c893Smrg#include "r600_shader.h"
35b7e1c893Smrg#include "r600_reg.h"
36b7e1c893Smrg
37b7e1c893Smrg/* solid vs --------------------------------------- */
38b7e1c893Smrgint R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39b7e1c893Smrg{
40b7e1c893Smrg    int i = 0;
41b7e1c893Smrg
42b7e1c893Smrg    /* 0 */
43b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
44b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
45b7e1c893Smrg			    CF_CONST(0),
46b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
47b7e1c893Smrg			    I_COUNT(1),
48b7e1c893Smrg			    CALL_COUNT(0),
49b7e1c893Smrg			    END_OF_PROGRAM(0),
50b7e1c893Smrg			    VALID_PIXEL_MODE(0),
51b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
52b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
53b7e1c893Smrg			    BARRIER(1));
54b7e1c893Smrg    /* 1 */
55b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
57b7e1c893Smrg					  RW_GPR(1),
58b7e1c893Smrg					  RW_REL(ABSOLUTE),
59b7e1c893Smrg					  INDEX_GPR(0),
60b7e1c893Smrg					  ELEM_SIZE(0));
61b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
63b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
64b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
65b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
66b7e1c893Smrg					       BURST_COUNT(1),
67b7e1c893Smrg					       END_OF_PROGRAM(0),
68b7e1c893Smrg					       VALID_PIXEL_MODE(0),
69b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
71b7e1c893Smrg					       BARRIER(1));
72b7e1c893Smrg    /* 2 - always export a param whether it's used or not */
73b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
75b7e1c893Smrg					  RW_GPR(0),
76b7e1c893Smrg					  RW_REL(ABSOLUTE),
77b7e1c893Smrg					  INDEX_GPR(0),
78b7e1c893Smrg					  ELEM_SIZE(0));
79b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
81b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
82b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
83b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
84b7e1c893Smrg					       BURST_COUNT(0),
85b7e1c893Smrg					       END_OF_PROGRAM(1),
86b7e1c893Smrg					       VALID_PIXEL_MODE(0),
87b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
88b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
89b7e1c893Smrg					       BARRIER(0));
90b7e1c893Smrg    /* 3 - padding */
91b7e1c893Smrg    shader[i++] = 0x00000000;
92b7e1c893Smrg    shader[i++] = 0x00000000;
93b7e1c893Smrg    /* 4/5 */
94b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
97b7e1c893Smrg			     BUFFER_ID(0),
98b7e1c893Smrg			     SRC_GPR(0),
99b7e1c893Smrg			     SRC_REL(ABSOLUTE),
100b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
101b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
102b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103b7e1c893Smrg				 DST_REL(0),
104b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
105b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
106b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
107b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
108b7e1c893Smrg				 USE_CONST_FIELDS(0),
109ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
110ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
111ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
112b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
114b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
115b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
116b7e1c893Smrg			     MEGA_FETCH(1));
117b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
118b7e1c893Smrg
119b7e1c893Smrg    return i;
120b7e1c893Smrg}
121b7e1c893Smrg
122b7e1c893Smrg/* solid ps --------------------------------------- */
123b7e1c893Smrgint R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
124b7e1c893Smrg{
125b7e1c893Smrg    int i = 0;
126b7e1c893Smrg
127b7e1c893Smrg    /* 0 */
128b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(2),
129b7e1c893Smrg				KCACHE_BANK0(0),
130b7e1c893Smrg				KCACHE_BANK1(0),
131b7e1c893Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
132b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
133b7e1c893Smrg				KCACHE_ADDR0(0),
134b7e1c893Smrg				KCACHE_ADDR1(0),
135b7e1c893Smrg				I_COUNT(4),
136b7e1c893Smrg				USES_WATERFALL(0),
137b7e1c893Smrg				CF_INST(SQ_CF_INST_ALU),
138b7e1c893Smrg				WHOLE_QUAD_MODE(0),
139b7e1c893Smrg				BARRIER(1));
140b7e1c893Smrg    /* 1 */
141b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
142b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
143b7e1c893Smrg					  RW_GPR(0),
144b7e1c893Smrg					  RW_REL(ABSOLUTE),
145b7e1c893Smrg					  INDEX_GPR(0),
146b7e1c893Smrg					  ELEM_SIZE(1));
147b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
148b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
149b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
150b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
151b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
152b7e1c893Smrg					       BURST_COUNT(1),
153b7e1c893Smrg					       END_OF_PROGRAM(1),
154b7e1c893Smrg					       VALID_PIXEL_MODE(0),
155b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
156b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
157b7e1c893Smrg					       BARRIER(1));
158b7e1c893Smrg
159b7e1c893Smrg    /* 2 */
160b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
161b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
162b7e1c893Smrg			     SRC0_ELEM(ELEM_X),
163b7e1c893Smrg			     SRC0_NEG(0),
164b7e1c893Smrg			     SRC1_SEL(0),
165b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
166b7e1c893Smrg			     SRC1_ELEM(ELEM_X),
167b7e1c893Smrg			     SRC1_NEG(0),
168b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
169b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
170b7e1c893Smrg			     LAST(0));
171b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
172b7e1c893Smrg				 SRC0_ABS(0),
173b7e1c893Smrg				 SRC1_ABS(0),
174b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
175b7e1c893Smrg				 UPDATE_PRED(0),
176b7e1c893Smrg				 WRITE_MASK(1),
177b7e1c893Smrg				 FOG_MERGE(0),
178b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
179b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
180b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
181b7e1c893Smrg				 DST_GPR(0),
182b7e1c893Smrg				 DST_REL(ABSOLUTE),
183b7e1c893Smrg				 DST_ELEM(ELEM_X),
184b7e1c893Smrg				 CLAMP(1));
185b7e1c893Smrg    /* 3 */
186b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
187b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
188b7e1c893Smrg			     SRC0_ELEM(ELEM_Y),
189b7e1c893Smrg			     SRC0_NEG(0),
190b7e1c893Smrg			     SRC1_SEL(0),
191b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
192b7e1c893Smrg			     SRC1_ELEM(ELEM_Y),
193b7e1c893Smrg			     SRC1_NEG(0),
194b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
195b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
196b7e1c893Smrg			     LAST(0));
197b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
198b7e1c893Smrg				 SRC0_ABS(0),
199b7e1c893Smrg				 SRC1_ABS(0),
200b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
201b7e1c893Smrg				 UPDATE_PRED(0),
202b7e1c893Smrg				 WRITE_MASK(1),
203b7e1c893Smrg				 FOG_MERGE(0),
204b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
205b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
206b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
207b7e1c893Smrg				 DST_GPR(0),
208b7e1c893Smrg				 DST_REL(ABSOLUTE),
209b7e1c893Smrg				 DST_ELEM(ELEM_Y),
210b7e1c893Smrg				 CLAMP(1));
211b7e1c893Smrg    /* 4 */
212b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
213b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
214b7e1c893Smrg			     SRC0_ELEM(ELEM_Z),
215b7e1c893Smrg			     SRC0_NEG(0),
216b7e1c893Smrg			     SRC1_SEL(0),
217b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
218b7e1c893Smrg			     SRC1_ELEM(ELEM_Z),
219b7e1c893Smrg			     SRC1_NEG(0),
220b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
221b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
222b7e1c893Smrg			     LAST(0));
223b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
224b7e1c893Smrg				 SRC0_ABS(0),
225b7e1c893Smrg				 SRC1_ABS(0),
226b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
227b7e1c893Smrg				 UPDATE_PRED(0),
228b7e1c893Smrg				 WRITE_MASK(1),
229b7e1c893Smrg				 FOG_MERGE(0),
230b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
231b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
232b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
233b7e1c893Smrg				 DST_GPR(0),
234b7e1c893Smrg				 DST_REL(ABSOLUTE),
235b7e1c893Smrg				 DST_ELEM(ELEM_Z),
236b7e1c893Smrg				 CLAMP(1));
237b7e1c893Smrg    /* 5 */
238b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
239b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
240b7e1c893Smrg			     SRC0_ELEM(ELEM_W),
241b7e1c893Smrg			     SRC0_NEG(0),
242b7e1c893Smrg			     SRC1_SEL(0),
243b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
244b7e1c893Smrg			     SRC1_ELEM(ELEM_W),
245b7e1c893Smrg			     SRC1_NEG(0),
246b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
247b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
248b7e1c893Smrg			     LAST(1));
249b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
250b7e1c893Smrg				 SRC0_ABS(0),
251b7e1c893Smrg				 SRC1_ABS(0),
252b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
253b7e1c893Smrg				 UPDATE_PRED(0),
254b7e1c893Smrg				 WRITE_MASK(1),
255b7e1c893Smrg				 FOG_MERGE(0),
256b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
257b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
258b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
259b7e1c893Smrg				 DST_GPR(0),
260b7e1c893Smrg				 DST_REL(ABSOLUTE),
261b7e1c893Smrg				 DST_ELEM(ELEM_W),
262b7e1c893Smrg				 CLAMP(1));
263b7e1c893Smrg
264b7e1c893Smrg    return i;
265b7e1c893Smrg}
266b7e1c893Smrg
267b7e1c893Smrg/* copy vs --------------------------------------- */
268b7e1c893Smrgint R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
269b7e1c893Smrg{
270b7e1c893Smrg    int i = 0;
271b7e1c893Smrg
272b7e1c893Smrg    /* 0 */
273b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
274b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
275b7e1c893Smrg			    CF_CONST(0),
276b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
277b7e1c893Smrg			    I_COUNT(2),
278b7e1c893Smrg			    CALL_COUNT(0),
279b7e1c893Smrg			    END_OF_PROGRAM(0),
280b7e1c893Smrg			    VALID_PIXEL_MODE(0),
281b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
282b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
283b7e1c893Smrg			    BARRIER(1));
284b7e1c893Smrg    /* 1 */
285b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
286b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
287b7e1c893Smrg					  RW_GPR(1),
288b7e1c893Smrg					  RW_REL(ABSOLUTE),
289b7e1c893Smrg					  INDEX_GPR(0),
290b7e1c893Smrg					  ELEM_SIZE(0));
291b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
292b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
293b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
294b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
295b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
296b7e1c893Smrg					       BURST_COUNT(0),
297b7e1c893Smrg					       END_OF_PROGRAM(0),
298b7e1c893Smrg					       VALID_PIXEL_MODE(0),
299b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
300b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
301b7e1c893Smrg					       BARRIER(1));
302b7e1c893Smrg    /* 2 */
303b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
304b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
305b7e1c893Smrg					  RW_GPR(0),
306b7e1c893Smrg					  RW_REL(ABSOLUTE),
307b7e1c893Smrg					  INDEX_GPR(0),
308b7e1c893Smrg					  ELEM_SIZE(0));
309b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
310b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
311b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
312b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
313b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
314b7e1c893Smrg					       BURST_COUNT(0),
315b7e1c893Smrg					       END_OF_PROGRAM(1),
316b7e1c893Smrg					       VALID_PIXEL_MODE(0),
317b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
318b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
319b7e1c893Smrg					       BARRIER(0));
320b7e1c893Smrg    /* 3 */
321b7e1c893Smrg    shader[i++] = 0x00000000;
322b7e1c893Smrg    shader[i++] = 0x00000000;
323b7e1c893Smrg    /* 4/5 */
324b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
325b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
326b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
327b7e1c893Smrg			     BUFFER_ID(0),
328b7e1c893Smrg			     SRC_GPR(0),
329b7e1c893Smrg			     SRC_REL(ABSOLUTE),
330b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
331b7e1c893Smrg			     MEGA_FETCH_COUNT(16));
332b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
333b7e1c893Smrg				 DST_REL(0),
334b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
335b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
336b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
337b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
338b7e1c893Smrg				 USE_CONST_FIELDS(0),
339ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
340ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
341ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
342b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
343b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
344b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
345b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
346b7e1c893Smrg			     MEGA_FETCH(1));
347b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
348b7e1c893Smrg    /* 6/7 */
349b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
350b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
351b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
352b7e1c893Smrg			     BUFFER_ID(0),
353b7e1c893Smrg			     SRC_GPR(0),
354b7e1c893Smrg			     SRC_REL(ABSOLUTE),
355b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
356b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
357b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
358b7e1c893Smrg				 DST_REL(0),
359b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
360b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
361b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
362b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
363b7e1c893Smrg				 USE_CONST_FIELDS(0),
364ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
365ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
366ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
367b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
368b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
369b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
370b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
371b7e1c893Smrg			     MEGA_FETCH(0));
372b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
373b7e1c893Smrg
374b7e1c893Smrg    return i;
375b7e1c893Smrg}
376b7e1c893Smrg
377b7e1c893Smrg/* copy ps --------------------------------------- */
378b7e1c893Smrgint R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
379b7e1c893Smrg{
380b7e1c893Smrg    int i=0;
381b7e1c893Smrg
382b7e1c893Smrg    /* CF INST 0 */
383b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(2));
384b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
385b7e1c893Smrg			    CF_CONST(0),
386b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
387b7e1c893Smrg			    I_COUNT(1),
388b7e1c893Smrg			    CALL_COUNT(0),
389b7e1c893Smrg			    END_OF_PROGRAM(0),
390b7e1c893Smrg			    VALID_PIXEL_MODE(0),
391b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
392b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
393b7e1c893Smrg			    BARRIER(1));
394b7e1c893Smrg    /* CF INST 1 */
395b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
396b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
397b7e1c893Smrg					  RW_GPR(0),
398b7e1c893Smrg					  RW_REL(ABSOLUTE),
399b7e1c893Smrg					  INDEX_GPR(0),
400b7e1c893Smrg					  ELEM_SIZE(1));
401b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
402b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
403b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
404b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
405b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
406b7e1c893Smrg					       BURST_COUNT(1),
407b7e1c893Smrg					       END_OF_PROGRAM(1),
408b7e1c893Smrg					       VALID_PIXEL_MODE(0),
409b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
410b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
411b7e1c893Smrg					       BARRIER(1));
412b7e1c893Smrg    /* TEX INST 0 */
413b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
414b7e1c893Smrg			     BC_FRAC_MODE(0),
415b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
416b7e1c893Smrg			     RESOURCE_ID(0),
417b7e1c893Smrg			     SRC_GPR(0),
418b7e1c893Smrg			     SRC_REL(ABSOLUTE),
419b7e1c893Smrg			     R7xx_ALT_CONST(0));
420b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
421b7e1c893Smrg			     DST_REL(ABSOLUTE),
422b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X), /* R */
423b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y), /* G */
424b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z), /* B */
425b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W), /* A */
426b7e1c893Smrg			     LOD_BIAS(0),
427b7e1c893Smrg			     COORD_TYPE_X(TEX_UNNORMALIZED),
428b7e1c893Smrg			     COORD_TYPE_Y(TEX_UNNORMALIZED),
429b7e1c893Smrg			     COORD_TYPE_Z(TEX_UNNORMALIZED),
430b7e1c893Smrg			     COORD_TYPE_W(TEX_UNNORMALIZED));
431b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
432b7e1c893Smrg			     OFFSET_Y(0),
433b7e1c893Smrg			     OFFSET_Z(0),
434b7e1c893Smrg			     SAMPLER_ID(0),
435b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
436b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
437b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
438b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
439b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
440b7e1c893Smrg
441b7e1c893Smrg    return i;
442b7e1c893Smrg}
443b7e1c893Smrg
444b7e1c893Smrg/*
445b7e1c893Smrg * ; xv vertex shader
446b7e1c893Smrg * 00 VTX: ADDR(4) CNT(2)
447b7e1c893Smrg *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT)
448b7e1c893Smrg *          FORMAT_COMP(SIGNED)
449b7e1c893Smrg *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
450b7e1c893Smrg *          FORMAT_COMP(SIGNED)
451b7e1c893Smrg * 01 EXP_DONE: POS0, R1
452b7e1c893Smrg * 02 EXP_DONE: PARAM0, R0  NO_BARRIER
453b7e1c893Smrg * END_OF_PROGRAM
454b7e1c893Smrg */
455b7e1c893Smrgint R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
456b7e1c893Smrg{
457b7e1c893Smrg    int i = 0;
458b7e1c893Smrg
459b7e1c893Smrg    /* 0 */
460ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(6));
461b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
462b7e1c893Smrg                            CF_CONST(0),
463b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
464b7e1c893Smrg                            I_COUNT(2),
465b7e1c893Smrg                            CALL_COUNT(0),
466b7e1c893Smrg                            END_OF_PROGRAM(0),
467b7e1c893Smrg                            VALID_PIXEL_MODE(0),
468b7e1c893Smrg                            CF_INST(SQ_CF_INST_VTX),
469b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
470b7e1c893Smrg                            BARRIER(1));
471ad43ddacSmrg
472ad43ddacSmrg    /* 1 - ALU */
473ad43ddacSmrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
474ad43ddacSmrg				KCACHE_BANK0(0),
475ad43ddacSmrg				KCACHE_BANK1(0),
476ad43ddacSmrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
477ad43ddacSmrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
478ad43ddacSmrg				KCACHE_ADDR0(0),
479ad43ddacSmrg				KCACHE_ADDR1(0),
480ad43ddacSmrg				I_COUNT(2),
481ad43ddacSmrg				USES_WATERFALL(0),
482ad43ddacSmrg				CF_INST(SQ_CF_INST_ALU),
483ad43ddacSmrg				WHOLE_QUAD_MODE(0),
484ad43ddacSmrg				BARRIER(1));
485ad43ddacSmrg
486ad43ddacSmrg    /* 2 */
487b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
488b7e1c893Smrg                                          TYPE(SQ_EXPORT_POS),
489b7e1c893Smrg                                          RW_GPR(1),
490b7e1c893Smrg                                          RW_REL(ABSOLUTE),
491b7e1c893Smrg                                          INDEX_GPR(0),
492b7e1c893Smrg                                          ELEM_SIZE(3));
493b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
494b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
495b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
496b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
497b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
498b7e1c893Smrg                                               BURST_COUNT(1),
499b7e1c893Smrg                                               END_OF_PROGRAM(0),
500b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
501b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
502b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
503b7e1c893Smrg                                               BARRIER(1));
504ad43ddacSmrg    /* 3 */
505b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
506b7e1c893Smrg                                          TYPE(SQ_EXPORT_PARAM),
507b7e1c893Smrg                                          RW_GPR(0),
508b7e1c893Smrg                                          RW_REL(ABSOLUTE),
509b7e1c893Smrg                                          INDEX_GPR(0),
510b7e1c893Smrg                                          ELEM_SIZE(3));
511b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
512b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
513b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
514b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
515b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
516b7e1c893Smrg                                               BURST_COUNT(1),
517b7e1c893Smrg                                               END_OF_PROGRAM(1),
518b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
519b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
520b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
521b7e1c893Smrg                                               BARRIER(0));
522ad43ddacSmrg
523ad43ddacSmrg
524ad43ddacSmrg    /* 4 texX / w */
525ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
526ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
527ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
528ad43ddacSmrg                             SRC0_NEG(0),
529ad43ddacSmrg                             SRC1_SEL(256),
530ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
531ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
532ad43ddacSmrg                             SRC1_NEG(0),
533ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
534ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
535ad43ddacSmrg                             LAST(0));
536ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
537ad43ddacSmrg                                 SRC0_ABS(0),
538ad43ddacSmrg                                 SRC1_ABS(0),
539ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
540ad43ddacSmrg                                 UPDATE_PRED(0),
541ad43ddacSmrg                                 WRITE_MASK(1),
542ad43ddacSmrg                                 FOG_MERGE(0),
543ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
544ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
545ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
546ad43ddacSmrg                                 DST_GPR(0),
547ad43ddacSmrg                                 DST_REL(ABSOLUTE),
548ad43ddacSmrg                                 DST_ELEM(ELEM_X),
549ad43ddacSmrg                                 CLAMP(0));
550ad43ddacSmrg
551ad43ddacSmrg    /* 5 texY / h */
552ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
553ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
554ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
555ad43ddacSmrg                             SRC0_NEG(0),
556ad43ddacSmrg                             SRC1_SEL(256),
557ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
558ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
559ad43ddacSmrg                             SRC1_NEG(0),
560ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
561ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
562ad43ddacSmrg                             LAST(1));
563ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
564ad43ddacSmrg                                 SRC0_ABS(0),
565ad43ddacSmrg                                 SRC1_ABS(0),
566ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
567ad43ddacSmrg                                 UPDATE_PRED(0),
568ad43ddacSmrg                                 WRITE_MASK(1),
569ad43ddacSmrg                                 FOG_MERGE(0),
570ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
571ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
572ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
573ad43ddacSmrg                                 DST_GPR(0),
574ad43ddacSmrg                                 DST_REL(ABSOLUTE),
575ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
576ad43ddacSmrg                                 CLAMP(0));
577ad43ddacSmrg
578ad43ddacSmrg    /* 6/7 */
579b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
580b7e1c893Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
581b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
582b7e1c893Smrg                             BUFFER_ID(0),
583b7e1c893Smrg                             SRC_GPR(0),
584b7e1c893Smrg                             SRC_REL(ABSOLUTE),
585b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
586b7e1c893Smrg                             MEGA_FETCH_COUNT(16));
587b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
588b7e1c893Smrg                                 DST_REL(ABSOLUTE),
589b7e1c893Smrg                                 DST_SEL_X(SQ_SEL_X),
590b7e1c893Smrg                                 DST_SEL_Y(SQ_SEL_Y),
591b7e1c893Smrg                                 DST_SEL_Z(SQ_SEL_0),
592b7e1c893Smrg                                 DST_SEL_W(SQ_SEL_1),
593b7e1c893Smrg                                 USE_CONST_FIELDS(0),
594b7e1c893Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
595ad43ddacSmrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
596b7e1c893Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
597b7e1c893Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
598b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
599b7e1c893Smrg                             ENDIAN_SWAP(ENDIAN_NONE),
600b7e1c893Smrg                             CONST_BUF_NO_STRIDE(0),
601b7e1c893Smrg                             MEGA_FETCH(1));
602b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
603ad43ddacSmrg    /* 8/9 */
604b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
605b7e1c893Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
606b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
607b7e1c893Smrg                             BUFFER_ID(0),
608b7e1c893Smrg                             SRC_GPR(0),
609b7e1c893Smrg                             SRC_REL(ABSOLUTE),
610b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
611b7e1c893Smrg                             MEGA_FETCH_COUNT(8));
612b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
613b7e1c893Smrg                                 DST_REL(ABSOLUTE),
614b7e1c893Smrg                                 DST_SEL_X(SQ_SEL_X),
615b7e1c893Smrg                                 DST_SEL_Y(SQ_SEL_Y),
616b7e1c893Smrg                                 DST_SEL_Z(SQ_SEL_0),
617b7e1c893Smrg                                 DST_SEL_W(SQ_SEL_1),
618b7e1c893Smrg                                 USE_CONST_FIELDS(0),
619b7e1c893Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
620ad43ddacSmrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
621b7e1c893Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
622b7e1c893Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
623b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
624b7e1c893Smrg                             ENDIAN_SWAP(ENDIAN_NONE),
625b7e1c893Smrg                             CONST_BUF_NO_STRIDE(0),
626b7e1c893Smrg                             MEGA_FETCH(0));
627b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
628b7e1c893Smrg
629b7e1c893Smrg    return i;
630b7e1c893Smrg}
631b7e1c893Smrg
632b7e1c893Smrgint R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
633b7e1c893Smrg{
634b7e1c893Smrg    int i = 0;
635b7e1c893Smrg
636b7e1c893Smrg    /* 0 */
637ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(16));
638b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
639b7e1c893Smrg                            CF_CONST(0),
640b7e1c893Smrg                            COND(SQ_CF_COND_BOOL),
641b7e1c893Smrg                            I_COUNT(0),
642b7e1c893Smrg                            CALL_COUNT(0),
643b7e1c893Smrg                            END_OF_PROGRAM(0),
644b7e1c893Smrg                            VALID_PIXEL_MODE(0),
645b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
646b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
647b7e1c893Smrg                            BARRIER(0));
648b7e1c893Smrg    /* 1 */
649ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(24));
650b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
651b7e1c893Smrg                            CF_CONST(0),
652b7e1c893Smrg                            COND(SQ_CF_COND_NOT_BOOL),
653b7e1c893Smrg                            I_COUNT(0),
654b7e1c893Smrg                            CALL_COUNT(0),
655b7e1c893Smrg                            END_OF_PROGRAM(0),
656b7e1c893Smrg                            VALID_PIXEL_MODE(0),
657b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
658b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
659b7e1c893Smrg                            BARRIER(0));
660b7e1c893Smrg    /* 2 */
661b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
662b7e1c893Smrg                                KCACHE_BANK0(0),
663b7e1c893Smrg                                KCACHE_BANK1(0),
664b7e1c893Smrg                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
665b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
666b7e1c893Smrg                                KCACHE_ADDR0(0),
667b7e1c893Smrg                                KCACHE_ADDR1(0),
668ad43ddacSmrg                                I_COUNT(12),
669b7e1c893Smrg                                USES_WATERFALL(0),
670b7e1c893Smrg                                CF_INST(SQ_CF_INST_ALU),
671b7e1c893Smrg                                WHOLE_QUAD_MODE(0),
672b7e1c893Smrg                                BARRIER(1));
673b7e1c893Smrg    /* 3 */
674b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
675b7e1c893Smrg                                          TYPE(SQ_EXPORT_PIXEL),
676b7e1c893Smrg                                          RW_GPR(2),
677b7e1c893Smrg                                          RW_REL(ABSOLUTE),
678b7e1c893Smrg                                          INDEX_GPR(0),
679b7e1c893Smrg                                          ELEM_SIZE(3));
680b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
681b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
682b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
683b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
684b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
685b7e1c893Smrg                                               BURST_COUNT(1),
686b7e1c893Smrg                                               END_OF_PROGRAM(1),
687b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
688b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
689b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
690b7e1c893Smrg                                               BARRIER(1));
691ad43ddacSmrg    /* 4,5,6,7 */
692ad43ddacSmrg    /* r2.x = MAD(c0.w, r1.x, c0.x) */
693ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
694b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
695ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
696b7e1c893Smrg                             SRC0_NEG(0),
697ad43ddacSmrg                             SRC1_SEL(1),
698b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
699b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
700b7e1c893Smrg                             SRC1_NEG(0),
701b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
702b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
703b7e1c893Smrg                             LAST(0));
704ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
705b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
706ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
707b7e1c893Smrg                                 SRC2_NEG(0),
708b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
709b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
710ad43ddacSmrg                                 DST_GPR(2),
711b7e1c893Smrg                                 DST_REL(ABSOLUTE),
712b7e1c893Smrg                                 DST_ELEM(ELEM_X),
713ad43ddacSmrg                                 CLAMP(0));
714ad43ddacSmrg    /* r2.y = MAD(c0.w, r1.x, c0.y) */
715ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
716b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
717ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
718b7e1c893Smrg                             SRC0_NEG(0),
719ad43ddacSmrg                             SRC1_SEL(1),
720b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
721ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
722b7e1c893Smrg                             SRC1_NEG(0),
723b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
724b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
725b7e1c893Smrg                             LAST(0));
726ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
727b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
728ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
729b7e1c893Smrg                                 SRC2_NEG(0),
730b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
731b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
732ad43ddacSmrg                                 DST_GPR(2),
733b7e1c893Smrg                                 DST_REL(ABSOLUTE),
734b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
735b7e1c893Smrg                                 CLAMP(0));
736ad43ddacSmrg    /* r2.z = MAD(c0.w, r1.x, c0.z) */
737ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
738b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
739ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
740b7e1c893Smrg                             SRC0_NEG(0),
741ad43ddacSmrg                             SRC1_SEL(1),
742b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
743ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
744b7e1c893Smrg                             SRC1_NEG(0),
745b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
746b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
747b7e1c893Smrg                             LAST(0));
748ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
749b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
750ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
751b7e1c893Smrg                                 SRC2_NEG(0),
752b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
753b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
754ad43ddacSmrg                                 DST_GPR(2),
755b7e1c893Smrg                                 DST_REL(ABSOLUTE),
756b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
757b7e1c893Smrg                                 CLAMP(0));
758ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
759b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
760b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
761b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
762b7e1c893Smrg                             SRC0_NEG(0),
763b7e1c893Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
764b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
765b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
766b7e1c893Smrg                             SRC1_NEG(0),
767b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
768b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
769b7e1c893Smrg                             LAST(1));
770ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
771ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
772ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
773ad43ddacSmrg                                 SRC2_NEG(0),
774ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
775b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
776ad43ddacSmrg                                 DST_GPR(2),
777b7e1c893Smrg                                 DST_REL(ABSOLUTE),
778b7e1c893Smrg                                 DST_ELEM(ELEM_W),
779b7e1c893Smrg                                 CLAMP(0));
780ad43ddacSmrg
781ad43ddacSmrg    /* 8,9,10,11 */
782ad43ddacSmrg    /* r2.x = MAD(c1.x, r1.y, pv.x) */
783ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
784b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
785b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
786b7e1c893Smrg                             SRC0_NEG(0),
787ad43ddacSmrg                             SRC1_SEL(1),
788b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
789ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
790b7e1c893Smrg                             SRC1_NEG(0),
791b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
792b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
793b7e1c893Smrg                             LAST(0));
794ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
795ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
796ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
797ad43ddacSmrg                                 SRC2_NEG(0),
798ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
799ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
800b7e1c893Smrg                                 DST_GPR(2),
801b7e1c893Smrg                                 DST_REL(ABSOLUTE),
802b7e1c893Smrg                                 DST_ELEM(ELEM_X),
803ad43ddacSmrg                                 CLAMP(0));
804ad43ddacSmrg    /* r2.y = MAD(c1.y, r1.y, pv.y) */
805ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
806b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
807b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
808b7e1c893Smrg                             SRC0_NEG(0),
809ad43ddacSmrg                             SRC1_SEL(1),
810b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
811b7e1c893Smrg                             SRC1_ELEM(ELEM_Y),
812b7e1c893Smrg                             SRC1_NEG(0),
813b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
814b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
815b7e1c893Smrg                             LAST(0));
816ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
817ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
818ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
819ad43ddacSmrg                                 SRC2_NEG(0),
820ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
821ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
822ad43ddacSmrg                                 DST_GPR(2),
823b7e1c893Smrg                                 DST_REL(ABSOLUTE),
824b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
825ad43ddacSmrg                                 CLAMP(0));
826ad43ddacSmrg    /* r2.z = MAD(c1.z, r1.y, pv.z) */
827ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
828b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
829b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
830b7e1c893Smrg                             SRC0_NEG(0),
831ad43ddacSmrg                             SRC1_SEL(1),
832b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
833ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
834b7e1c893Smrg                             SRC1_NEG(0),
835b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
836b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
837b7e1c893Smrg                             LAST(0));
838ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
839ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
840ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
841ad43ddacSmrg                                 SRC2_NEG(0),
842ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
843ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
844ad43ddacSmrg                                 DST_GPR(2),
845b7e1c893Smrg                                 DST_REL(ABSOLUTE),
846b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
847ad43ddacSmrg                                 CLAMP(0));
848ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
849ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
850b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
851ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
852b7e1c893Smrg                             SRC0_NEG(0),
853ad43ddacSmrg                             SRC1_SEL(SQ_ALU_SRC_0),
854b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
855ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
856b7e1c893Smrg                             SRC1_NEG(0),
857b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
858b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
859b7e1c893Smrg                             LAST(1));
860ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
861ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
862ad43ddacSmrg                                 SRC2_ELEM(ELEM_W),
863ad43ddacSmrg                                 SRC2_NEG(0),
864ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
865ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
866ad43ddacSmrg                                 DST_GPR(2),
867b7e1c893Smrg                                 DST_REL(ABSOLUTE),
868b7e1c893Smrg                                 DST_ELEM(ELEM_W),
869ad43ddacSmrg                                 CLAMP(0));
870ad43ddacSmrg    /* 12,13,14,15 */
871ad43ddacSmrg    /* r2.x = MAD(c2.x, r1.z, pv.x) */
872ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
873b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
874b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
875b7e1c893Smrg                             SRC0_NEG(0),
876ad43ddacSmrg                             SRC1_SEL(1),
877b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
878ad43ddacSmrg                             SRC1_ELEM(ELEM_Z),
879b7e1c893Smrg                             SRC1_NEG(0),
880b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
881b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
882b7e1c893Smrg                             LAST(0));
883ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
884ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
885ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
886ad43ddacSmrg                                 SRC2_NEG(0),
887ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
888ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
889ad43ddacSmrg                                 DST_GPR(2),
890b7e1c893Smrg                                 DST_REL(ABSOLUTE),
891b7e1c893Smrg                                 DST_ELEM(ELEM_X),
892b7e1c893Smrg                                 CLAMP(1));
893ad43ddacSmrg    /* r2.y = MAD(c2.y, r1.z, pv.y) */
894ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
895b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
896b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
897b7e1c893Smrg                             SRC0_NEG(0),
898ad43ddacSmrg                             SRC1_SEL(1),
899b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
900ad43ddacSmrg                             SRC1_ELEM(ELEM_Z),
901b7e1c893Smrg                             SRC1_NEG(0),
902b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
903b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
904b7e1c893Smrg                             LAST(0));
905ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
906ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
907ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
908ad43ddacSmrg                                 SRC2_NEG(0),
909ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
910ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
911b7e1c893Smrg                                 DST_GPR(2),
912b7e1c893Smrg                                 DST_REL(ABSOLUTE),
913b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
914b7e1c893Smrg                                 CLAMP(1));
915ad43ddacSmrg    /* r2.z = MAD(c2.z, r1.z, pv.z) */
916ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
917b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
918b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
919b7e1c893Smrg                             SRC0_NEG(0),
920ad43ddacSmrg                             SRC1_SEL(1),
921b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
922b7e1c893Smrg                             SRC1_ELEM(ELEM_Z),
923b7e1c893Smrg                             SRC1_NEG(0),
924b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
925b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
926b7e1c893Smrg                             LAST(0));
927ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
928ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
929ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
930ad43ddacSmrg                                 SRC2_NEG(0),
931ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
932ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
933ad43ddacSmrg                                 DST_GPR(2),
934b7e1c893Smrg                                 DST_REL(ABSOLUTE),
935b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
936b7e1c893Smrg                                 CLAMP(1));
937ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
938ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
939b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
940b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
941b7e1c893Smrg                             SRC0_NEG(0),
942ad43ddacSmrg                             SRC1_SEL(SQ_ALU_SRC_0),
943b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
944b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
945b7e1c893Smrg                             SRC1_NEG(0),
946b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
947b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
948b7e1c893Smrg                             LAST(1));
949ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
950ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
951ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
952ad43ddacSmrg                                 SRC2_NEG(0),
953ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
954ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
955ad43ddacSmrg                                 DST_GPR(2),
956b7e1c893Smrg                                 DST_REL(ABSOLUTE),
957b7e1c893Smrg                                 DST_ELEM(ELEM_W),
958b7e1c893Smrg                                 CLAMP(1));
959ad43ddacSmrg
960ad43ddacSmrg    /* 16 */
961ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(18));
962b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
963b7e1c893Smrg                            CF_CONST(0),
964b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
965b7e1c893Smrg                            I_COUNT(3),
966b7e1c893Smrg                            CALL_COUNT(0),
967b7e1c893Smrg                            END_OF_PROGRAM(0),
968b7e1c893Smrg                            VALID_PIXEL_MODE(0),
969b7e1c893Smrg                            CF_INST(SQ_CF_INST_TEX),
970b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
971b7e1c893Smrg                            BARRIER(1));
972ad43ddacSmrg    /* 17 */
973b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
974b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
975b7e1c893Smrg			    CF_CONST(0),
976b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
977b7e1c893Smrg			    I_COUNT(0),
978b7e1c893Smrg			    CALL_COUNT(0),
979b7e1c893Smrg			    END_OF_PROGRAM(0),
980b7e1c893Smrg			    VALID_PIXEL_MODE(0),
981b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
982b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
983b7e1c893Smrg			    BARRIER(1));
984ad43ddacSmrg    /* 18/19 */
985b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
986b7e1c893Smrg                             BC_FRAC_MODE(0),
987b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
988b7e1c893Smrg                             RESOURCE_ID(0),
989b7e1c893Smrg                             SRC_GPR(0),
990b7e1c893Smrg                             SRC_REL(ABSOLUTE),
991b7e1c893Smrg                             R7xx_ALT_CONST(0));
992b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
993b7e1c893Smrg                             DST_REL(ABSOLUTE),
994b7e1c893Smrg                             DST_SEL_X(SQ_SEL_X),
995b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
996b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
997b7e1c893Smrg                             DST_SEL_W(SQ_SEL_1),
998b7e1c893Smrg                             LOD_BIAS(0),
999b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1000b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1001b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1002b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1003b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1004b7e1c893Smrg                             OFFSET_Y(0),
1005b7e1c893Smrg                             OFFSET_Z(0),
1006b7e1c893Smrg                             SAMPLER_ID(0),
1007b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1008b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1009b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1010b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1011b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1012ad43ddacSmrg    /* 20/21 */
1013b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1014b7e1c893Smrg                             BC_FRAC_MODE(0),
1015b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1016b7e1c893Smrg                             RESOURCE_ID(1),
1017b7e1c893Smrg                             SRC_GPR(0),
1018b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1019b7e1c893Smrg                             R7xx_ALT_CONST(0));
1020b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1021b7e1c893Smrg                             DST_REL(ABSOLUTE),
1022b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1023b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1024b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_X),
1025b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1026b7e1c893Smrg                             LOD_BIAS(0),
1027b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1028b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1029b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1030b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1031b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1032b7e1c893Smrg                             OFFSET_Y(0),
1033b7e1c893Smrg                             OFFSET_Z(0),
1034b7e1c893Smrg                             SAMPLER_ID(1),
1035b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1036b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1037b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1038b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1039b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1040ad43ddacSmrg    /* 22/23 */
1041b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1042b7e1c893Smrg                             BC_FRAC_MODE(0),
1043b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1044b7e1c893Smrg                             RESOURCE_ID(2),
1045b7e1c893Smrg                             SRC_GPR(0),
1046b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1047b7e1c893Smrg                             R7xx_ALT_CONST(0));
1048b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1049b7e1c893Smrg                             DST_REL(ABSOLUTE),
1050b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1051b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_X),
1052b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1053b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1054b7e1c893Smrg                             LOD_BIAS(0),
1055b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1056b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1057b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1058b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1059b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1060b7e1c893Smrg                             OFFSET_Y(0),
1061b7e1c893Smrg                             OFFSET_Z(0),
1062b7e1c893Smrg                             SAMPLER_ID(2),
1063b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1064b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1065b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1066b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1067b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1068ad43ddacSmrg    /* 24 */
1069ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(26));
1070b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1071b7e1c893Smrg                            CF_CONST(0),
1072b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
1073b7e1c893Smrg                            I_COUNT(2),
1074b7e1c893Smrg                            CALL_COUNT(0),
1075b7e1c893Smrg                            END_OF_PROGRAM(0),
1076b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1077b7e1c893Smrg                            CF_INST(SQ_CF_INST_TEX),
1078b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1079b7e1c893Smrg                            BARRIER(1));
1080ad43ddacSmrg    /* 25 */
1081b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1082b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1083b7e1c893Smrg			    CF_CONST(0),
1084b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1085b7e1c893Smrg			    I_COUNT(0),
1086b7e1c893Smrg			    CALL_COUNT(0),
1087b7e1c893Smrg			    END_OF_PROGRAM(0),
1088b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1089b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1090b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1091b7e1c893Smrg			    BARRIER(1));
1092ad43ddacSmrg    /* 26/27 */
1093b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1094b7e1c893Smrg                             BC_FRAC_MODE(0),
1095b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1096b7e1c893Smrg                             RESOURCE_ID(0),
1097b7e1c893Smrg                             SRC_GPR(0),
1098b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1099b7e1c893Smrg                             R7xx_ALT_CONST(0));
1100b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1101b7e1c893Smrg                             DST_REL(ABSOLUTE),
1102b7e1c893Smrg                             DST_SEL_X(SQ_SEL_X),
1103b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1104b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1105b7e1c893Smrg                             DST_SEL_W(SQ_SEL_1),
1106b7e1c893Smrg                             LOD_BIAS(0),
1107b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1108b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1109b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1110b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1111b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1112b7e1c893Smrg                             OFFSET_Y(0),
1113b7e1c893Smrg                             OFFSET_Z(0),
1114b7e1c893Smrg                             SAMPLER_ID(0),
1115b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1116b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1117b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1118b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1119b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1120ad43ddacSmrg    /* 28/29 */
1121b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1122b7e1c893Smrg                             BC_FRAC_MODE(0),
1123b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1124b7e1c893Smrg                             RESOURCE_ID(1),
1125b7e1c893Smrg                             SRC_GPR(0),
1126b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1127b7e1c893Smrg                             R7xx_ALT_CONST(0));
1128b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1129b7e1c893Smrg                             DST_REL(ABSOLUTE),
1130b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1131b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_X),
1132b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_Y),
1133b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1134b7e1c893Smrg                             LOD_BIAS(0),
1135b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1136b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1137b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1138b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1139b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1140b7e1c893Smrg                             OFFSET_Y(0),
1141b7e1c893Smrg                             OFFSET_Z(0),
1142b7e1c893Smrg                             SAMPLER_ID(1),
1143b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1144b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1145b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1146b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1147b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1148b7e1c893Smrg
1149b7e1c893Smrg    return i;
1150b7e1c893Smrg}
1151b7e1c893Smrg
1152b7e1c893Smrg/* comp vs --------------------------------------- */
1153b7e1c893Smrgint R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1154b7e1c893Smrg{
1155b7e1c893Smrg    int i = 0;
1156b7e1c893Smrg
1157b7e1c893Smrg    /* 0 */
1158b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(3));
1159b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1160b7e1c893Smrg                            CF_CONST(0),
1161b7e1c893Smrg                            COND(SQ_CF_COND_BOOL),
1162b7e1c893Smrg                            I_COUNT(0),
1163b7e1c893Smrg                            CALL_COUNT(0),
1164b7e1c893Smrg                            END_OF_PROGRAM(0),
1165b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1166b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
1167b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1168b7e1c893Smrg                            BARRIER(0));
1169b7e1c893Smrg    /* 1 */
11700974d292Smrg    shader[i++] = CF_DWORD0(ADDR(9));
1171b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1172b7e1c893Smrg                            CF_CONST(0),
1173b7e1c893Smrg                            COND(SQ_CF_COND_NOT_BOOL),
1174b7e1c893Smrg                            I_COUNT(0),
1175b7e1c893Smrg                            CALL_COUNT(0),
1176b7e1c893Smrg                            END_OF_PROGRAM(0),
1177b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1178b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
1179b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1180b7e1c893Smrg                            BARRIER(0));
1181b7e1c893Smrg    /* 2 */
11822f39173dSmrg    shader[i++] = CF_DWORD0(ADDR(0));
1183b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1184b7e1c893Smrg                            CF_CONST(0),
1185b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
1186b7e1c893Smrg                            I_COUNT(0),
1187b7e1c893Smrg                            CALL_COUNT(0),
1188b7e1c893Smrg                            END_OF_PROGRAM(1),
1189b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1190b7e1c893Smrg                            CF_INST(SQ_CF_INST_NOP),
1191b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1192b7e1c893Smrg                            BARRIER(1));
1193b7e1c893Smrg    /* 3 - mask sub */
11940974d292Smrg    shader[i++] = CF_DWORD0(ADDR(32));
1195b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1196b7e1c893Smrg			    CF_CONST(0),
1197b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1198b7e1c893Smrg			    I_COUNT(3),
1199b7e1c893Smrg			    CALL_COUNT(0),
1200b7e1c893Smrg			    END_OF_PROGRAM(0),
1201b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1202b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
1203b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1204b7e1c893Smrg			    BARRIER(1));
1205ad43ddacSmrg
1206ad43ddacSmrg    /* 4 - ALU */
12070974d292Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1208ad43ddacSmrg				KCACHE_BANK0(0),
1209ad43ddacSmrg				KCACHE_BANK1(0),
1210ad43ddacSmrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1211ad43ddacSmrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1212ad43ddacSmrg				KCACHE_ADDR0(0),
1213ad43ddacSmrg				KCACHE_ADDR1(0),
1214ad43ddacSmrg				I_COUNT(12),
1215ad43ddacSmrg				USES_WATERFALL(0),
1216ad43ddacSmrg				CF_INST(SQ_CF_INST_ALU),
1217ad43ddacSmrg				WHOLE_QUAD_MODE(0),
1218ad43ddacSmrg				BARRIER(1));
1219ad43ddacSmrg
1220ad43ddacSmrg    /* 5 - dst */
1221b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1222b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
1223b7e1c893Smrg					  RW_GPR(2),
1224b7e1c893Smrg					  RW_REL(ABSOLUTE),
1225b7e1c893Smrg					  INDEX_GPR(0),
1226b7e1c893Smrg					  ELEM_SIZE(0));
1227b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1228b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1229ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1230ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1231b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1232b7e1c893Smrg					       BURST_COUNT(1),
1233b7e1c893Smrg					       END_OF_PROGRAM(0),
1234b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1235b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1236b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1237b7e1c893Smrg					       BARRIER(1));
1238ad43ddacSmrg    /* 6 - src */
1239b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1240b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1241b7e1c893Smrg					  RW_GPR(1),
1242b7e1c893Smrg					  RW_REL(ABSOLUTE),
1243b7e1c893Smrg					  INDEX_GPR(0),
1244b7e1c893Smrg					  ELEM_SIZE(0));
1245b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1246b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1247ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1248ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1249b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1250b7e1c893Smrg					       BURST_COUNT(1),
1251b7e1c893Smrg					       END_OF_PROGRAM(0),
1252b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1253b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT),
1254b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1255b7e1c893Smrg					       BARRIER(0));
1256ad43ddacSmrg    /* 7 - mask */
1257b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1258b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1259b7e1c893Smrg					  RW_GPR(0),
1260b7e1c893Smrg					  RW_REL(ABSOLUTE),
1261b7e1c893Smrg					  INDEX_GPR(0),
1262b7e1c893Smrg					  ELEM_SIZE(0));
1263b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1264b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1265ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1266ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1267b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1268b7e1c893Smrg					       BURST_COUNT(1),
1269b7e1c893Smrg					       END_OF_PROGRAM(0),
1270b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1271b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1272b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1273b7e1c893Smrg					       BARRIER(0));
1274ad43ddacSmrg    /* 8 */
1275b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1276b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1277b7e1c893Smrg			    CF_CONST(0),
1278b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1279b7e1c893Smrg			    I_COUNT(0),
1280b7e1c893Smrg			    CALL_COUNT(0),
1281b7e1c893Smrg			    END_OF_PROGRAM(0),
1282b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1283b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1284b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1285b7e1c893Smrg			    BARRIER(1));
12860974d292Smrg    /* 9 - non-mask sub */
12870974d292Smrg    shader[i++] = CF_DWORD0(ADDR(38));
12880974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
12890974d292Smrg			    CF_CONST(0),
12900974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
12910974d292Smrg			    I_COUNT(2),
12920974d292Smrg			    CALL_COUNT(0),
12930974d292Smrg			    END_OF_PROGRAM(0),
12940974d292Smrg			    VALID_PIXEL_MODE(0),
12950974d292Smrg			    CF_INST(SQ_CF_INST_VTX),
12960974d292Smrg			    WHOLE_QUAD_MODE(0),
12970974d292Smrg			    BARRIER(1));
1298b7e1c893Smrg
12990974d292Smrg    /* 10 - ALU */
13000974d292Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(26),
13010974d292Smrg				KCACHE_BANK0(0),
13020974d292Smrg				KCACHE_BANK1(0),
13030974d292Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
13040974d292Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
13050974d292Smrg				KCACHE_ADDR0(0),
13060974d292Smrg				KCACHE_ADDR1(0),
13070974d292Smrg				I_COUNT(6),
13080974d292Smrg				USES_WATERFALL(0),
13090974d292Smrg				CF_INST(SQ_CF_INST_ALU),
13100974d292Smrg				WHOLE_QUAD_MODE(0),
13110974d292Smrg				BARRIER(1));
1312ad43ddacSmrg
13130974d292Smrg    /* 11 - dst */
13140974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
13150974d292Smrg					  TYPE(SQ_EXPORT_POS),
13160974d292Smrg					  RW_GPR(1),
13170974d292Smrg					  RW_REL(ABSOLUTE),
13180974d292Smrg					  INDEX_GPR(0),
13190974d292Smrg					  ELEM_SIZE(0));
13200974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
13210974d292Smrg					       SRC_SEL_Y(SQ_SEL_Y),
13220974d292Smrg					       SRC_SEL_Z(SQ_SEL_0),
13230974d292Smrg					       SRC_SEL_W(SQ_SEL_1),
13240974d292Smrg					       R6xx_ELEM_LOOP(0),
13250974d292Smrg					       BURST_COUNT(0),
13260974d292Smrg					       END_OF_PROGRAM(0),
13270974d292Smrg					       VALID_PIXEL_MODE(0),
13280974d292Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
13290974d292Smrg					       WHOLE_QUAD_MODE(0),
13300974d292Smrg					       BARRIER(1));
13310974d292Smrg    /* 12 - src */
13320974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
13330974d292Smrg					  TYPE(SQ_EXPORT_PARAM),
13340974d292Smrg					  RW_GPR(0),
13350974d292Smrg					  RW_REL(ABSOLUTE),
13360974d292Smrg					  INDEX_GPR(0),
13370974d292Smrg					  ELEM_SIZE(0));
13380974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
13390974d292Smrg					       SRC_SEL_Y(SQ_SEL_Y),
13400974d292Smrg					       SRC_SEL_Z(SQ_SEL_0),
13410974d292Smrg					       SRC_SEL_W(SQ_SEL_1),
13420974d292Smrg					       R6xx_ELEM_LOOP(0),
13430974d292Smrg					       BURST_COUNT(0),
13440974d292Smrg					       END_OF_PROGRAM(0),
13450974d292Smrg					       VALID_PIXEL_MODE(0),
13460974d292Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
13470974d292Smrg					       WHOLE_QUAD_MODE(0),
13480974d292Smrg					       BARRIER(0));
13490974d292Smrg    /* 13 */
13500974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
13510974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
13520974d292Smrg			    CF_CONST(0),
13530974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
13540974d292Smrg			    I_COUNT(0),
13550974d292Smrg			    CALL_COUNT(0),
13560974d292Smrg			    END_OF_PROGRAM(0),
13570974d292Smrg			    VALID_PIXEL_MODE(0),
13580974d292Smrg			    CF_INST(SQ_CF_INST_RETURN),
13590974d292Smrg			    WHOLE_QUAD_MODE(0),
13600974d292Smrg			    BARRIER(1));
13610974d292Smrg
13620974d292Smrg
13630974d292Smrg    /* 14 srcX MAD - mask */
13640974d292Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
13650974d292Smrg                             SRC0_REL(ABSOLUTE),
13660974d292Smrg                             SRC0_ELEM(ELEM_Y),
13670974d292Smrg                             SRC0_NEG(0),
13680974d292Smrg                             SRC1_SEL(1),
13690974d292Smrg                             SRC1_REL(ABSOLUTE),
13700974d292Smrg                             SRC1_ELEM(ELEM_Y),
13710974d292Smrg                             SRC1_NEG(0),
13720974d292Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
13730974d292Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
13740974d292Smrg                             LAST(1));
13750974d292Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
13760974d292Smrg                                 SRC2_REL(ABSOLUTE),
13770974d292Smrg                                 SRC2_ELEM(ELEM_Z),
13780974d292Smrg                                 SRC2_NEG(0),
13790974d292Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
13800974d292Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
13810974d292Smrg                                 DST_GPR(1),
13820974d292Smrg                                 DST_REL(ABSOLUTE),
13830974d292Smrg                                 DST_ELEM(ELEM_Z),
13840974d292Smrg                                 CLAMP(0));
13850974d292Smrg    /* 15 srcY MAD */
13860974d292Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
1387ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1388ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1389ad43ddacSmrg                             SRC0_NEG(0),
1390ad43ddacSmrg                             SRC1_SEL(1),
1391ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1392ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1393ad43ddacSmrg                             SRC1_NEG(0),
1394ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1395ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1396ad43ddacSmrg                             LAST(1));
1397ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257),
1398ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1399ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1400ad43ddacSmrg                                 SRC2_NEG(0),
1401ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1402ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1403ad43ddacSmrg                                 DST_GPR(1),
1404ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1405ad43ddacSmrg                                 DST_ELEM(ELEM_W),
1406ad43ddacSmrg                                 CLAMP(0));
1407ad43ddacSmrg
14080974d292Smrg    /* 16 srcX MAD */
1409ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
1410ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1411ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1412ad43ddacSmrg                             SRC0_NEG(0),
1413ad43ddacSmrg                             SRC1_SEL(1),
1414ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1415ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1416ad43ddacSmrg                             SRC1_NEG(0),
1417ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1418ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1419ad43ddacSmrg                             LAST(0));
1420ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1),
1421ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1422ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1423ad43ddacSmrg                                 SRC2_NEG(0),
1424ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1425ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1426ad43ddacSmrg                                 DST_GPR(1),
1427ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1428ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1429ad43ddacSmrg                                 CLAMP(0));
14300974d292Smrg    /* 17 srcY MAD */
1431ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
1432ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1433ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1434ad43ddacSmrg                             SRC0_NEG(0),
1435ad43ddacSmrg                             SRC1_SEL(1),
1436ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1437ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1438ad43ddacSmrg                             SRC1_NEG(0),
1439ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1440ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1441ad43ddacSmrg                             LAST(1));
1442ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1),
1443ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1444ad43ddacSmrg                                 SRC2_ELEM(ELEM_W),
1445ad43ddacSmrg                                 SRC2_NEG(0),
1446ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1447ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1448ad43ddacSmrg                                 DST_GPR(1),
1449ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1450ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1451ad43ddacSmrg                                 CLAMP(0));
1452ad43ddacSmrg
14530974d292Smrg    /* 18 maskX MAD */
1454ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
1455ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1456ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1457ad43ddacSmrg                             SRC0_NEG(0),
1458ad43ddacSmrg                             SRC1_SEL(0),
1459ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1460ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1461ad43ddacSmrg                             SRC1_NEG(0),
1462ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1463ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1464ad43ddacSmrg                             LAST(1));
1465ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(258),
1466ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1467ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1468ad43ddacSmrg                                 SRC2_NEG(0),
1469ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1470ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1471ad43ddacSmrg                                 DST_GPR(0),
1472ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1473ad43ddacSmrg                                 DST_ELEM(ELEM_Z),
1474ad43ddacSmrg                                 CLAMP(0));
1475ad43ddacSmrg
14760974d292Smrg    /* 19 maskY MAD */
1477ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(259),
1478ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1479ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1480ad43ddacSmrg                             SRC0_NEG(0),
1481ad43ddacSmrg                             SRC1_SEL(0),
1482ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1483ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1484ad43ddacSmrg                             SRC1_NEG(0),
1485ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1486ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1487ad43ddacSmrg                             LAST(1));
1488ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
1489ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1490ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1491ad43ddacSmrg                                 SRC2_NEG(0),
1492ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1493ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1494ad43ddacSmrg                                 DST_GPR(0),
1495ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1496ad43ddacSmrg                                 DST_ELEM(ELEM_W),
1497ad43ddacSmrg                                 CLAMP(0));
1498ad43ddacSmrg
14990974d292Smrg    /* 20 srcX MAD */
1500ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
1501ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1502ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1503ad43ddacSmrg                             SRC0_NEG(0),
1504ad43ddacSmrg                             SRC1_SEL(0),
1505ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1506ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1507ad43ddacSmrg                             SRC1_NEG(0),
1508ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1509ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1510ad43ddacSmrg                             LAST(0));
1511ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
1512ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1513ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1514ad43ddacSmrg                                 SRC2_NEG(0),
1515ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1516ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1517ad43ddacSmrg                                 DST_GPR(0),
1518ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1519ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1520ad43ddacSmrg                                 CLAMP(0));
15210974d292Smrg    /* 21 srcY MAD */
1522ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(259),
1523ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1524ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1525ad43ddacSmrg                             SRC0_NEG(0),
1526ad43ddacSmrg                             SRC1_SEL(0),
1527ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1528ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1529ad43ddacSmrg                             SRC1_NEG(0),
1530ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1531ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1532ad43ddacSmrg                             LAST(1));
1533ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
1534ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1535ad43ddacSmrg                                 SRC2_ELEM(ELEM_W),
1536ad43ddacSmrg                                 SRC2_NEG(0),
1537ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1538ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1539ad43ddacSmrg                                 DST_GPR(0),
1540ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1541ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1542ad43ddacSmrg                                 CLAMP(0));
1543ad43ddacSmrg
15440974d292Smrg    /* 22 srcX / w */
1545ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1546ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1547ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1548ad43ddacSmrg                             SRC0_NEG(0),
1549ad43ddacSmrg                             SRC1_SEL(256),
1550ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1551ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1552ad43ddacSmrg                             SRC1_NEG(0),
1553ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1554ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1555ad43ddacSmrg                             LAST(1));
1556ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1557ad43ddacSmrg                                 SRC0_ABS(0),
1558ad43ddacSmrg                                 SRC1_ABS(0),
1559ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1560ad43ddacSmrg                                 UPDATE_PRED(0),
1561ad43ddacSmrg                                 WRITE_MASK(1),
1562ad43ddacSmrg                                 FOG_MERGE(0),
1563ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1564ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1565ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1566ad43ddacSmrg                                 DST_GPR(1),
1567ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1568ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1569ad43ddacSmrg                                 CLAMP(0));
1570ad43ddacSmrg
15710974d292Smrg    /* 23 srcY / h */
1572ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1573ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1574ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1575ad43ddacSmrg                             SRC0_NEG(0),
1576ad43ddacSmrg                             SRC1_SEL(257),
1577ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1578ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1579ad43ddacSmrg                             SRC1_NEG(0),
1580ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1581ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1582ad43ddacSmrg                             LAST(1));
1583ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1584ad43ddacSmrg                                 SRC0_ABS(0),
1585ad43ddacSmrg                                 SRC1_ABS(0),
1586ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1587ad43ddacSmrg                                 UPDATE_PRED(0),
1588ad43ddacSmrg                                 WRITE_MASK(1),
1589ad43ddacSmrg                                 FOG_MERGE(0),
1590ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1591ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1592ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1593ad43ddacSmrg                                 DST_GPR(1),
1594ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1595ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1596ad43ddacSmrg                                 CLAMP(0));
1597ad43ddacSmrg
15980974d292Smrg    /* 24 maskX / w */
1599ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
1600ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1601ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1602ad43ddacSmrg                             SRC0_NEG(0),
1603ad43ddacSmrg                             SRC1_SEL(258),
1604ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1605ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1606ad43ddacSmrg                             SRC1_NEG(0),
1607ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1608ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1609ad43ddacSmrg                             LAST(1));
1610ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1611ad43ddacSmrg                                 SRC0_ABS(0),
1612ad43ddacSmrg                                 SRC1_ABS(0),
1613ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1614ad43ddacSmrg                                 UPDATE_PRED(0),
1615ad43ddacSmrg                                 WRITE_MASK(1),
1616ad43ddacSmrg                                 FOG_MERGE(0),
1617ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1618ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1619ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1620ad43ddacSmrg                                 DST_GPR(0),
1621ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1622ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1623ad43ddacSmrg                                 CLAMP(0));
1624ad43ddacSmrg
16250974d292Smrg    /* 25 maskY / h */
1626ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
1627ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1628ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1629ad43ddacSmrg                             SRC0_NEG(0),
1630ad43ddacSmrg                             SRC1_SEL(259),
1631ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1632ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1633ad43ddacSmrg                             SRC1_NEG(0),
1634ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1635ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1636ad43ddacSmrg                             LAST(1));
1637ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1638ad43ddacSmrg                                 SRC0_ABS(0),
1639ad43ddacSmrg                                 SRC1_ABS(0),
1640ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1641ad43ddacSmrg                                 UPDATE_PRED(0),
1642ad43ddacSmrg                                 WRITE_MASK(1),
1643ad43ddacSmrg                                 FOG_MERGE(0),
1644ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1645ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1646ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1647ad43ddacSmrg                                 DST_GPR(0),
1648ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1649ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1650ad43ddacSmrg                                 CLAMP(0));
1651ad43ddacSmrg
16520974d292Smrg    /* 26 srcX MAD - non-mask */
1653ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
1654ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1655ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1656ad43ddacSmrg                             SRC0_NEG(0),
1657ad43ddacSmrg                             SRC1_SEL(0),
1658ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1659ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1660ad43ddacSmrg                             SRC1_NEG(0),
1661ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1662ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1663ad43ddacSmrg                             LAST(1));
1664ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
1665ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1666ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1667ad43ddacSmrg                                 SRC2_NEG(0),
1668ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1669ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1670ad43ddacSmrg                                 DST_GPR(0),
1671ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1672ad43ddacSmrg                                 DST_ELEM(ELEM_Z),
1673ad43ddacSmrg                                 CLAMP(0));
16740974d292Smrg    /* 27 srcY MAD */
1675ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
1676ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1677ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1678ad43ddacSmrg                             SRC0_NEG(0),
1679ad43ddacSmrg                             SRC1_SEL(0),
1680ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1681ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1682ad43ddacSmrg                             SRC1_NEG(0),
1683ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1684ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1685ad43ddacSmrg                             LAST(1));
1686ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257),
1687ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1688ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1689ad43ddacSmrg                                 SRC2_NEG(0),
1690ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1691ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1692ad43ddacSmrg                                 DST_GPR(0),
1693ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1694ad43ddacSmrg                                 DST_ELEM(ELEM_W),
1695ad43ddacSmrg                                 CLAMP(0));
1696ad43ddacSmrg
16970974d292Smrg    /* 28 srcX MAD */
1698ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
1699ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1700ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1701ad43ddacSmrg                             SRC0_NEG(0),
1702ad43ddacSmrg                             SRC1_SEL(0),
1703ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1704ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1705ad43ddacSmrg                             SRC1_NEG(0),
1706ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1707ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1708ad43ddacSmrg                             LAST(0));
1709ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
1710ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1711ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1712ad43ddacSmrg                                 SRC2_NEG(0),
1713ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1714ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1715ad43ddacSmrg                                 DST_GPR(0),
1716ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1717ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1718ad43ddacSmrg                                 CLAMP(0));
17190974d292Smrg    /* 29 srcY MAD */
1720ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
1721ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1722ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1723ad43ddacSmrg                             SRC0_NEG(0),
1724ad43ddacSmrg                             SRC1_SEL(0),
1725ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1726ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1727ad43ddacSmrg                             SRC1_NEG(0),
1728ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1729ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1730ad43ddacSmrg                             LAST(1));
1731ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
1732ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1733ad43ddacSmrg                                 SRC2_ELEM(ELEM_W),
1734ad43ddacSmrg                                 SRC2_NEG(0),
1735ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1736ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1737ad43ddacSmrg                                 DST_GPR(0),
1738ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1739ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1740ad43ddacSmrg                                 CLAMP(0));
17410974d292Smrg    /* 30 srcX / w */
1742ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
1743ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1744ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1745ad43ddacSmrg                             SRC0_NEG(0),
1746ad43ddacSmrg                             SRC1_SEL(256),
1747ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1748ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1749ad43ddacSmrg                             SRC1_NEG(0),
1750ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1751ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1752ad43ddacSmrg                             LAST(1));
1753ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1754ad43ddacSmrg                                 SRC0_ABS(0),
1755ad43ddacSmrg                                 SRC1_ABS(0),
1756ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1757ad43ddacSmrg                                 UPDATE_PRED(0),
1758ad43ddacSmrg                                 WRITE_MASK(1),
1759ad43ddacSmrg                                 FOG_MERGE(0),
1760ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1761ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1762ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1763ad43ddacSmrg                                 DST_GPR(0),
1764ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1765ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1766ad43ddacSmrg                                 CLAMP(0));
1767ad43ddacSmrg
17680974d292Smrg    /* 31 srcY / h */
1769ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
1770ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1771ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1772ad43ddacSmrg                             SRC0_NEG(0),
1773ad43ddacSmrg                             SRC1_SEL(257),
1774ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1775ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1776ad43ddacSmrg                             SRC1_NEG(0),
1777ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1778ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1779ad43ddacSmrg                             LAST(1));
1780ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1781ad43ddacSmrg                                 SRC0_ABS(0),
1782ad43ddacSmrg                                 SRC1_ABS(0),
1783ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1784ad43ddacSmrg                                 UPDATE_PRED(0),
1785ad43ddacSmrg                                 WRITE_MASK(1),
1786ad43ddacSmrg                                 FOG_MERGE(0),
1787ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1788ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1789ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1790ad43ddacSmrg                                 DST_GPR(0),
1791ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1792ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1793ad43ddacSmrg                                 CLAMP(0));
1794ad43ddacSmrg
17950974d292Smrg    /* 32/33 - dst - mask */
1796b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1797b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1798b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
1799b7e1c893Smrg			     BUFFER_ID(0),
1800b7e1c893Smrg			     SRC_GPR(0),
1801b7e1c893Smrg			     SRC_REL(ABSOLUTE),
1802b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
18030974d292Smrg			     MEGA_FETCH_COUNT(24));
18040974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
1805b7e1c893Smrg				 DST_REL(0),
1806b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
1807b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
1808b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
1809b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
1810b7e1c893Smrg				 USE_CONST_FIELDS(0),
1811ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
1812ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
1813ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
1814b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1815b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
1816b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
1817b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
1818b7e1c893Smrg			     MEGA_FETCH(1));
1819b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
18200974d292Smrg    /* 34/35 - src */
1821b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1822b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1823b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
1824b7e1c893Smrg			     BUFFER_ID(0),
1825b7e1c893Smrg			     SRC_GPR(0),
1826b7e1c893Smrg			     SRC_REL(ABSOLUTE),
1827b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
1828b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
18290974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
1830b7e1c893Smrg				 DST_REL(0),
1831b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
1832b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
1833ad43ddacSmrg				 DST_SEL_Z(SQ_SEL_1),
1834ad43ddacSmrg				 DST_SEL_W(SQ_SEL_0),
1835b7e1c893Smrg				 USE_CONST_FIELDS(0),
1836ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
1837ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
1838ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
1839b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1840b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
1841b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
1842b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
1843b7e1c893Smrg			     MEGA_FETCH(0));
1844b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
18450974d292Smrg    /* 36/37 - mask */
18460974d292Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
18470974d292Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
18480974d292Smrg			     FETCH_WHOLE_QUAD(0),
18490974d292Smrg			     BUFFER_ID(0),
18500974d292Smrg			     SRC_GPR(0),
18510974d292Smrg			     SRC_REL(ABSOLUTE),
18520974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
18530974d292Smrg			     MEGA_FETCH_COUNT(8));
18540974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
18550974d292Smrg				 DST_REL(0),
18560974d292Smrg				 DST_SEL_X(SQ_SEL_X),
18570974d292Smrg				 DST_SEL_Y(SQ_SEL_Y),
18580974d292Smrg				 DST_SEL_Z(SQ_SEL_1),
18590974d292Smrg				 DST_SEL_W(SQ_SEL_0),
18600974d292Smrg				 USE_CONST_FIELDS(0),
18610974d292Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
18620974d292Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
18630974d292Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
18640974d292Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
18650974d292Smrg    shader[i++] = VTX_DWORD2(OFFSET(16),
18660974d292Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
18670974d292Smrg			     CONST_BUF_NO_STRIDE(0),
18680974d292Smrg			     MEGA_FETCH(0));
18690974d292Smrg    shader[i++] = VTX_DWORD_PAD;
1870b7e1c893Smrg
18710974d292Smrg    /* 38/39 - dst - non-mask */
18720974d292Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
18730974d292Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
18740974d292Smrg			     FETCH_WHOLE_QUAD(0),
18750974d292Smrg			     BUFFER_ID(0),
18760974d292Smrg			     SRC_GPR(0),
18770974d292Smrg			     SRC_REL(ABSOLUTE),
18780974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
18790974d292Smrg			     MEGA_FETCH_COUNT(16));
18800974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
18810974d292Smrg				 DST_REL(0),
18820974d292Smrg				 DST_SEL_X(SQ_SEL_X),
18830974d292Smrg				 DST_SEL_Y(SQ_SEL_Y),
18840974d292Smrg				 DST_SEL_Z(SQ_SEL_0),
18850974d292Smrg				 DST_SEL_W(SQ_SEL_1),
18860974d292Smrg				 USE_CONST_FIELDS(0),
18870974d292Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
18880974d292Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
18890974d292Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
18900974d292Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
18910974d292Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
18920974d292Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
18930974d292Smrg			     CONST_BUF_NO_STRIDE(0),
18940974d292Smrg			     MEGA_FETCH(1));
18950974d292Smrg    shader[i++] = VTX_DWORD_PAD;
18960974d292Smrg    /* 40/41 - src */
18970974d292Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
18980974d292Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
18990974d292Smrg			     FETCH_WHOLE_QUAD(0),
19000974d292Smrg			     BUFFER_ID(0),
19010974d292Smrg			     SRC_GPR(0),
19020974d292Smrg			     SRC_REL(ABSOLUTE),
19030974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
19040974d292Smrg			     MEGA_FETCH_COUNT(8));
19050974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
19060974d292Smrg				 DST_REL(0),
19070974d292Smrg				 DST_SEL_X(SQ_SEL_X),
19080974d292Smrg				 DST_SEL_Y(SQ_SEL_Y),
19090974d292Smrg				 DST_SEL_Z(SQ_SEL_1),
19100974d292Smrg				 DST_SEL_W(SQ_SEL_0),
19110974d292Smrg				 USE_CONST_FIELDS(0),
19120974d292Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
19130974d292Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
19140974d292Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
19150974d292Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
19160974d292Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
19170974d292Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
19180974d292Smrg			     CONST_BUF_NO_STRIDE(0),
19190974d292Smrg			     MEGA_FETCH(0));
19200974d292Smrg    shader[i++] = VTX_DWORD_PAD;
19210974d292Smrg
19220974d292Smrg    return i;
19230974d292Smrg}
19240974d292Smrg
19250974d292Smrg/* comp ps --------------------------------------- */
19260974d292Smrgint R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
19270974d292Smrg{
19280974d292Smrg    int i = 0;
19290974d292Smrg
19300974d292Smrg    /* 0 */
19310974d292Smrg    shader[i++] = CF_DWORD0(ADDR(3));
19320974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
19330974d292Smrg                            CF_CONST(0),
19340974d292Smrg                            COND(SQ_CF_COND_BOOL),
19350974d292Smrg                            I_COUNT(0),
19360974d292Smrg                            CALL_COUNT(0),
19370974d292Smrg                            END_OF_PROGRAM(0),
19380974d292Smrg                            VALID_PIXEL_MODE(0),
19390974d292Smrg                            CF_INST(SQ_CF_INST_CALL),
19400974d292Smrg                            WHOLE_QUAD_MODE(0),
19410974d292Smrg                            BARRIER(0));
19420974d292Smrg    /* 1 */
19430974d292Smrg    shader[i++] = CF_DWORD0(ADDR(7));
19440974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
19450974d292Smrg                            CF_CONST(0),
19460974d292Smrg                            COND(SQ_CF_COND_NOT_BOOL),
19470974d292Smrg                            I_COUNT(0),
19480974d292Smrg                            CALL_COUNT(0),
19490974d292Smrg                            END_OF_PROGRAM(0),
19500974d292Smrg                            VALID_PIXEL_MODE(0),
19510974d292Smrg                            CF_INST(SQ_CF_INST_CALL),
19520974d292Smrg                            WHOLE_QUAD_MODE(0),
19530974d292Smrg                            BARRIER(0));
19540974d292Smrg    /* 2 */
19550974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
19560974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
19570974d292Smrg                            CF_CONST(0),
19580974d292Smrg                            COND(SQ_CF_COND_ACTIVE),
19590974d292Smrg                            I_COUNT(0),
19600974d292Smrg                            CALL_COUNT(0),
19610974d292Smrg                            END_OF_PROGRAM(1),
19620974d292Smrg                            VALID_PIXEL_MODE(0),
19630974d292Smrg                            CF_INST(SQ_CF_INST_NOP),
19640974d292Smrg                            WHOLE_QUAD_MODE(0),
19650974d292Smrg                            BARRIER(1));
19660974d292Smrg
19670974d292Smrg    /* 3 - mask sub */
19680974d292Smrg    shader[i++] = CF_DWORD0(ADDR(14));
19690974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
19700974d292Smrg			    CF_CONST(0),
19710974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
19720974d292Smrg			    I_COUNT(2),
19730974d292Smrg			    CALL_COUNT(0),
19740974d292Smrg			    END_OF_PROGRAM(0),
19750974d292Smrg			    VALID_PIXEL_MODE(0),
19760974d292Smrg			    CF_INST(SQ_CF_INST_TEX),
19770974d292Smrg			    WHOLE_QUAD_MODE(0),
19780974d292Smrg			    BARRIER(1));
19790974d292Smrg
19800974d292Smrg    /* 4 */
19810974d292Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(10),
19820974d292Smrg				KCACHE_BANK0(0),
19830974d292Smrg				KCACHE_BANK1(0),
19840974d292Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
19850974d292Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
19860974d292Smrg				KCACHE_ADDR0(0),
19870974d292Smrg				KCACHE_ADDR1(0),
19880974d292Smrg				I_COUNT(4),
19890974d292Smrg				USES_WATERFALL(0),
19900974d292Smrg				CF_INST(SQ_CF_INST_ALU),
19910974d292Smrg				WHOLE_QUAD_MODE(0),
19920974d292Smrg				BARRIER(1));
19930974d292Smrg
19940974d292Smrg    /* 5 */
19950974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
19960974d292Smrg					  TYPE(SQ_EXPORT_PIXEL),
19970974d292Smrg					  RW_GPR(2),
19980974d292Smrg					  RW_REL(ABSOLUTE),
19990974d292Smrg					  INDEX_GPR(0),
20000974d292Smrg					  ELEM_SIZE(1));
20010974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
20020974d292Smrg					       SRC_SEL_Y(SQ_SEL_Y),
20030974d292Smrg					       SRC_SEL_Z(SQ_SEL_Z),
20040974d292Smrg					       SRC_SEL_W(SQ_SEL_W),
20050974d292Smrg					       R6xx_ELEM_LOOP(0),
20060974d292Smrg					       BURST_COUNT(1),
20070974d292Smrg					       END_OF_PROGRAM(0),
20080974d292Smrg					       VALID_PIXEL_MODE(0),
20090974d292Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
20100974d292Smrg					       WHOLE_QUAD_MODE(0),
20110974d292Smrg					       BARRIER(1));
20120974d292Smrg    /* 6 */
20130974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
20140974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
20150974d292Smrg			    CF_CONST(0),
20160974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
20170974d292Smrg			    I_COUNT(0),
20180974d292Smrg			    CALL_COUNT(0),
20190974d292Smrg			    END_OF_PROGRAM(0),
20200974d292Smrg			    VALID_PIXEL_MODE(0),
20210974d292Smrg			    CF_INST(SQ_CF_INST_RETURN),
20220974d292Smrg			    WHOLE_QUAD_MODE(0),
20230974d292Smrg			    BARRIER(1));
20240974d292Smrg
20250974d292Smrg    /* 7 non-mask sub */
20260974d292Smrg    shader[i++] = CF_DWORD0(ADDR(18));
2027b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2028b7e1c893Smrg			    CF_CONST(0),
2029b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
2030b7e1c893Smrg			    I_COUNT(1),
2031b7e1c893Smrg			    CALL_COUNT(0),
2032b7e1c893Smrg			    END_OF_PROGRAM(0),
2033b7e1c893Smrg			    VALID_PIXEL_MODE(0),
2034b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
2035b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
2036b7e1c893Smrg			    BARRIER(1));
20370974d292Smrg    /* 8 */
2038b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2039b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
2040b7e1c893Smrg					  RW_GPR(0),
2041b7e1c893Smrg					  RW_REL(ABSOLUTE),
2042b7e1c893Smrg					  INDEX_GPR(0),
2043b7e1c893Smrg					  ELEM_SIZE(1));
2044b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2045b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
2046b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
2047b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
2048b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
2049b7e1c893Smrg					       BURST_COUNT(1),
20500974d292Smrg					       END_OF_PROGRAM(0),
2051b7e1c893Smrg					       VALID_PIXEL_MODE(0),
2052b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2053b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
2054b7e1c893Smrg					       BARRIER(1));
20550974d292Smrg    /* 9 */
20560974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
20570974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
20580974d292Smrg			    CF_CONST(0),
20590974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
20600974d292Smrg			    I_COUNT(0),
20610974d292Smrg			    CALL_COUNT(0),
20620974d292Smrg			    END_OF_PROGRAM(0),
20630974d292Smrg			    VALID_PIXEL_MODE(0),
20640974d292Smrg			    CF_INST(SQ_CF_INST_RETURN),
20650974d292Smrg			    WHOLE_QUAD_MODE(0),
20660974d292Smrg			    BARRIER(1));
20670974d292Smrg
20680974d292Smrg    /* 10 - alu 0 */
20690974d292Smrg    /* MUL gpr[2].x gpr[1].x gpr[0].x */
20700974d292Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
20710974d292Smrg			     SRC0_REL(ABSOLUTE),
20720974d292Smrg			     SRC0_ELEM(ELEM_X),
20730974d292Smrg			     SRC0_NEG(0),
20740974d292Smrg			     SRC1_SEL(0),
20750974d292Smrg			     SRC1_REL(ABSOLUTE),
20760974d292Smrg			     SRC1_ELEM(ELEM_X),
20770974d292Smrg			     SRC1_NEG(0),
20780974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
20790974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
20800974d292Smrg			     LAST(0));
20810974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
20820974d292Smrg				 SRC0_ABS(0),
20830974d292Smrg				 SRC1_ABS(0),
20840974d292Smrg				 UPDATE_EXECUTE_MASK(0),
20850974d292Smrg				 UPDATE_PRED(0),
20860974d292Smrg				 WRITE_MASK(1),
20870974d292Smrg				 FOG_MERGE(0),
20880974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
20890974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
20900974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
20910974d292Smrg				 DST_GPR(2),
20920974d292Smrg				 DST_REL(ABSOLUTE),
20930974d292Smrg				 DST_ELEM(ELEM_X),
20940974d292Smrg				 CLAMP(1));
20950974d292Smrg    /* 11 - alu 1 */
20960974d292Smrg    /* MUL gpr[2].y gpr[1].y gpr[0].y */
20970974d292Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
20980974d292Smrg			     SRC0_REL(ABSOLUTE),
20990974d292Smrg			     SRC0_ELEM(ELEM_Y),
21000974d292Smrg			     SRC0_NEG(0),
21010974d292Smrg			     SRC1_SEL(0),
21020974d292Smrg			     SRC1_REL(ABSOLUTE),
21030974d292Smrg			     SRC1_ELEM(ELEM_Y),
21040974d292Smrg			     SRC1_NEG(0),
21050974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
21060974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
21070974d292Smrg			     LAST(0));
21080974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
21090974d292Smrg				 SRC0_ABS(0),
21100974d292Smrg				 SRC1_ABS(0),
21110974d292Smrg				 UPDATE_EXECUTE_MASK(0),
21120974d292Smrg				 UPDATE_PRED(0),
21130974d292Smrg				 WRITE_MASK(1),
21140974d292Smrg				 FOG_MERGE(0),
21150974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
21160974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
21170974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
21180974d292Smrg				 DST_GPR(2),
21190974d292Smrg				 DST_REL(ABSOLUTE),
21200974d292Smrg				 DST_ELEM(ELEM_Y),
21210974d292Smrg				 CLAMP(1));
21220974d292Smrg    /* 12 - alu 2 */
21230974d292Smrg    /* MUL gpr[2].z gpr[1].z gpr[0].z */
21240974d292Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
21250974d292Smrg			     SRC0_REL(ABSOLUTE),
21260974d292Smrg			     SRC0_ELEM(ELEM_Z),
21270974d292Smrg			     SRC0_NEG(0),
21280974d292Smrg			     SRC1_SEL(0),
21290974d292Smrg			     SRC1_REL(ABSOLUTE),
21300974d292Smrg			     SRC1_ELEM(ELEM_Z),
21310974d292Smrg			     SRC1_NEG(0),
21320974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
21330974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
21340974d292Smrg			     LAST(0));
21350974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
21360974d292Smrg				 SRC0_ABS(0),
21370974d292Smrg				 SRC1_ABS(0),
21380974d292Smrg				 UPDATE_EXECUTE_MASK(0),
21390974d292Smrg				 UPDATE_PRED(0),
21400974d292Smrg				 WRITE_MASK(1),
21410974d292Smrg				 FOG_MERGE(0),
21420974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
21430974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
21440974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
21450974d292Smrg				 DST_GPR(2),
21460974d292Smrg				 DST_REL(ABSOLUTE),
21470974d292Smrg				 DST_ELEM(ELEM_Z),
21480974d292Smrg				 CLAMP(1));
21490974d292Smrg    /* 13 - alu 3 */
21500974d292Smrg    /* MUL gpr[2].w gpr[1].w gpr[0].w */
21510974d292Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
21520974d292Smrg			     SRC0_REL(ABSOLUTE),
21530974d292Smrg			     SRC0_ELEM(ELEM_W),
21540974d292Smrg			     SRC0_NEG(0),
21550974d292Smrg			     SRC1_SEL(0),
21560974d292Smrg			     SRC1_REL(ABSOLUTE),
21570974d292Smrg			     SRC1_ELEM(ELEM_W),
21580974d292Smrg			     SRC1_NEG(0),
21590974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
21600974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
21610974d292Smrg			     LAST(1));
21620974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
21630974d292Smrg				 SRC0_ABS(0),
21640974d292Smrg				 SRC1_ABS(0),
21650974d292Smrg				 UPDATE_EXECUTE_MASK(0),
21660974d292Smrg				 UPDATE_PRED(0),
21670974d292Smrg				 WRITE_MASK(1),
21680974d292Smrg				 FOG_MERGE(0),
21690974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
21700974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
21710974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
21720974d292Smrg				 DST_GPR(2),
21730974d292Smrg				 DST_REL(ABSOLUTE),
21740974d292Smrg				 DST_ELEM(ELEM_W),
21750974d292Smrg				 CLAMP(1));
2176b7e1c893Smrg
21770974d292Smrg    /* 14/15 - src - mask */
21780974d292Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
21790974d292Smrg			     BC_FRAC_MODE(0),
21800974d292Smrg			     FETCH_WHOLE_QUAD(0),
21810974d292Smrg			     RESOURCE_ID(0),
21820974d292Smrg			     SRC_GPR(0),
21830974d292Smrg			     SRC_REL(ABSOLUTE),
21840974d292Smrg			     R7xx_ALT_CONST(0));
21850974d292Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
21860974d292Smrg			     DST_REL(ABSOLUTE),
21870974d292Smrg			     DST_SEL_X(SQ_SEL_X),
21880974d292Smrg			     DST_SEL_Y(SQ_SEL_Y),
21890974d292Smrg			     DST_SEL_Z(SQ_SEL_Z),
21900974d292Smrg			     DST_SEL_W(SQ_SEL_W),
21910974d292Smrg			     LOD_BIAS(0),
21920974d292Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
21930974d292Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
21940974d292Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
21950974d292Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
21960974d292Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
21970974d292Smrg			     OFFSET_Y(0),
21980974d292Smrg			     OFFSET_Z(0),
21990974d292Smrg			     SAMPLER_ID(0),
22000974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
22010974d292Smrg			     SRC_SEL_Y(SQ_SEL_Y),
22020974d292Smrg			     SRC_SEL_Z(SQ_SEL_0),
22030974d292Smrg			     SRC_SEL_W(SQ_SEL_1));
22040974d292Smrg    shader[i++] = TEX_DWORD_PAD;
22050974d292Smrg    /* 16/17 - mask */
22060974d292Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
22070974d292Smrg			     BC_FRAC_MODE(0),
22080974d292Smrg			     FETCH_WHOLE_QUAD(0),
22090974d292Smrg			     RESOURCE_ID(1),
22100974d292Smrg			     SRC_GPR(1),
22110974d292Smrg			     SRC_REL(ABSOLUTE),
22120974d292Smrg			     R7xx_ALT_CONST(0));
22130974d292Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
22140974d292Smrg			     DST_REL(ABSOLUTE),
22150974d292Smrg			     DST_SEL_X(SQ_SEL_X),
22160974d292Smrg			     DST_SEL_Y(SQ_SEL_Y),
22170974d292Smrg			     DST_SEL_Z(SQ_SEL_Z),
22180974d292Smrg			     DST_SEL_W(SQ_SEL_W),
22190974d292Smrg			     LOD_BIAS(0),
22200974d292Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
22210974d292Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
22220974d292Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
22230974d292Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
22240974d292Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
22250974d292Smrg			     OFFSET_Y(0),
22260974d292Smrg			     OFFSET_Z(0),
22270974d292Smrg			     SAMPLER_ID(1),
22280974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
22290974d292Smrg			     SRC_SEL_Y(SQ_SEL_Y),
22300974d292Smrg			     SRC_SEL_Z(SQ_SEL_0),
22310974d292Smrg			     SRC_SEL_W(SQ_SEL_1));
22320974d292Smrg    shader[i++] = TEX_DWORD_PAD;
2233b7e1c893Smrg
22340974d292Smrg    /* 18/19 - src - non-mask */
2235b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2236b7e1c893Smrg			     BC_FRAC_MODE(0),
2237b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
2238b7e1c893Smrg			     RESOURCE_ID(0),
2239b7e1c893Smrg			     SRC_GPR(0),
2240b7e1c893Smrg			     SRC_REL(ABSOLUTE),
2241b7e1c893Smrg			     R7xx_ALT_CONST(0));
2242b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
2243b7e1c893Smrg			     DST_REL(ABSOLUTE),
2244b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X),
2245b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y),
2246b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z),
2247b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W),
2248b7e1c893Smrg			     LOD_BIAS(0),
2249b7e1c893Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
2250b7e1c893Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
2251b7e1c893Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
2252b7e1c893Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
2253b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2254b7e1c893Smrg			     OFFSET_Y(0),
2255b7e1c893Smrg			     OFFSET_Z(0),
2256b7e1c893Smrg			     SAMPLER_ID(0),
2257b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
2258b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
2259b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
2260b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
2261b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
2262b7e1c893Smrg
2263b7e1c893Smrg    return i;
2264b7e1c893Smrg}
2265