r600_shader.c revision 921a55d8
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "radeon.h"
34b7e1c893Smrg#include "r600_shader.h"
35b7e1c893Smrg#include "r600_reg.h"
36b7e1c893Smrg
37b7e1c893Smrg/* solid vs --------------------------------------- */
38b7e1c893Smrgint R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39b7e1c893Smrg{
40b7e1c893Smrg    int i = 0;
41b7e1c893Smrg
42b7e1c893Smrg    /* 0 */
43b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
44b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
45b7e1c893Smrg			    CF_CONST(0),
46b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
47b7e1c893Smrg			    I_COUNT(1),
48b7e1c893Smrg			    CALL_COUNT(0),
49b7e1c893Smrg			    END_OF_PROGRAM(0),
50b7e1c893Smrg			    VALID_PIXEL_MODE(0),
51b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
52b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
53b7e1c893Smrg			    BARRIER(1));
54b7e1c893Smrg    /* 1 */
55b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
57b7e1c893Smrg					  RW_GPR(1),
58b7e1c893Smrg					  RW_REL(ABSOLUTE),
59b7e1c893Smrg					  INDEX_GPR(0),
60b7e1c893Smrg					  ELEM_SIZE(0));
61b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
63b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
64b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
65b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
66b7e1c893Smrg					       BURST_COUNT(1),
67b7e1c893Smrg					       END_OF_PROGRAM(0),
68b7e1c893Smrg					       VALID_PIXEL_MODE(0),
69b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
71b7e1c893Smrg					       BARRIER(1));
72b7e1c893Smrg    /* 2 - always export a param whether it's used or not */
73b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
75b7e1c893Smrg					  RW_GPR(0),
76b7e1c893Smrg					  RW_REL(ABSOLUTE),
77b7e1c893Smrg					  INDEX_GPR(0),
78b7e1c893Smrg					  ELEM_SIZE(0));
79b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
81b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
82b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
83b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
84b7e1c893Smrg					       BURST_COUNT(0),
85b7e1c893Smrg					       END_OF_PROGRAM(1),
86b7e1c893Smrg					       VALID_PIXEL_MODE(0),
87b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
88b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
89b7e1c893Smrg					       BARRIER(0));
90b7e1c893Smrg    /* 3 - padding */
91b7e1c893Smrg    shader[i++] = 0x00000000;
92b7e1c893Smrg    shader[i++] = 0x00000000;
93b7e1c893Smrg    /* 4/5 */
94b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
97b7e1c893Smrg			     BUFFER_ID(0),
98b7e1c893Smrg			     SRC_GPR(0),
99b7e1c893Smrg			     SRC_REL(ABSOLUTE),
100b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
101b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
102b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103b7e1c893Smrg				 DST_REL(0),
104b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
105b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
106b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
107b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
108b7e1c893Smrg				 USE_CONST_FIELDS(0),
109ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
110ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
111ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
112b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
114b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
115b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
116b7e1c893Smrg			     MEGA_FETCH(1));
117b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
118b7e1c893Smrg
119b7e1c893Smrg    return i;
120b7e1c893Smrg}
121b7e1c893Smrg
122b7e1c893Smrg/* solid ps --------------------------------------- */
123b7e1c893Smrgint R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
124b7e1c893Smrg{
125b7e1c893Smrg    int i = 0;
126b7e1c893Smrg
127b7e1c893Smrg    /* 0 */
128b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(2),
129b7e1c893Smrg				KCACHE_BANK0(0),
130b7e1c893Smrg				KCACHE_BANK1(0),
131b7e1c893Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
132b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
133b7e1c893Smrg				KCACHE_ADDR0(0),
134b7e1c893Smrg				KCACHE_ADDR1(0),
135b7e1c893Smrg				I_COUNT(4),
136b7e1c893Smrg				USES_WATERFALL(0),
137b7e1c893Smrg				CF_INST(SQ_CF_INST_ALU),
138b7e1c893Smrg				WHOLE_QUAD_MODE(0),
139b7e1c893Smrg				BARRIER(1));
140b7e1c893Smrg    /* 1 */
141b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
142b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
143b7e1c893Smrg					  RW_GPR(0),
144b7e1c893Smrg					  RW_REL(ABSOLUTE),
145b7e1c893Smrg					  INDEX_GPR(0),
146b7e1c893Smrg					  ELEM_SIZE(1));
147b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
148b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
149b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
150b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
151b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
152b7e1c893Smrg					       BURST_COUNT(1),
153b7e1c893Smrg					       END_OF_PROGRAM(1),
154b7e1c893Smrg					       VALID_PIXEL_MODE(0),
155b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
156b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
157b7e1c893Smrg					       BARRIER(1));
158b7e1c893Smrg
159b7e1c893Smrg    /* 2 */
160921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
161b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
162b7e1c893Smrg			     SRC0_ELEM(ELEM_X),
163b7e1c893Smrg			     SRC0_NEG(0),
164921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
165b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
166b7e1c893Smrg			     SRC1_ELEM(ELEM_X),
167b7e1c893Smrg			     SRC1_NEG(0),
168b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
169b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
170b7e1c893Smrg			     LAST(0));
171b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
172b7e1c893Smrg				 SRC0_ABS(0),
173b7e1c893Smrg				 SRC1_ABS(0),
174b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
175b7e1c893Smrg				 UPDATE_PRED(0),
176b7e1c893Smrg				 WRITE_MASK(1),
177b7e1c893Smrg				 FOG_MERGE(0),
178b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
179b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
180b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
181b7e1c893Smrg				 DST_GPR(0),
182b7e1c893Smrg				 DST_REL(ABSOLUTE),
183b7e1c893Smrg				 DST_ELEM(ELEM_X),
184b7e1c893Smrg				 CLAMP(1));
185b7e1c893Smrg    /* 3 */
186921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
187b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
188b7e1c893Smrg			     SRC0_ELEM(ELEM_Y),
189b7e1c893Smrg			     SRC0_NEG(0),
190921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
191b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
192b7e1c893Smrg			     SRC1_ELEM(ELEM_Y),
193b7e1c893Smrg			     SRC1_NEG(0),
194b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
195b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
196b7e1c893Smrg			     LAST(0));
197b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
198b7e1c893Smrg				 SRC0_ABS(0),
199b7e1c893Smrg				 SRC1_ABS(0),
200b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
201b7e1c893Smrg				 UPDATE_PRED(0),
202b7e1c893Smrg				 WRITE_MASK(1),
203b7e1c893Smrg				 FOG_MERGE(0),
204b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
205b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
206b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
207b7e1c893Smrg				 DST_GPR(0),
208b7e1c893Smrg				 DST_REL(ABSOLUTE),
209b7e1c893Smrg				 DST_ELEM(ELEM_Y),
210b7e1c893Smrg				 CLAMP(1));
211b7e1c893Smrg    /* 4 */
212921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
213b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
214b7e1c893Smrg			     SRC0_ELEM(ELEM_Z),
215b7e1c893Smrg			     SRC0_NEG(0),
216921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
217b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
218b7e1c893Smrg			     SRC1_ELEM(ELEM_Z),
219b7e1c893Smrg			     SRC1_NEG(0),
220b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
221b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
222b7e1c893Smrg			     LAST(0));
223b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
224b7e1c893Smrg				 SRC0_ABS(0),
225b7e1c893Smrg				 SRC1_ABS(0),
226b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
227b7e1c893Smrg				 UPDATE_PRED(0),
228b7e1c893Smrg				 WRITE_MASK(1),
229b7e1c893Smrg				 FOG_MERGE(0),
230b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
231b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
232b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
233b7e1c893Smrg				 DST_GPR(0),
234b7e1c893Smrg				 DST_REL(ABSOLUTE),
235b7e1c893Smrg				 DST_ELEM(ELEM_Z),
236b7e1c893Smrg				 CLAMP(1));
237b7e1c893Smrg    /* 5 */
238921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
239b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
240b7e1c893Smrg			     SRC0_ELEM(ELEM_W),
241b7e1c893Smrg			     SRC0_NEG(0),
242921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
243b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
244b7e1c893Smrg			     SRC1_ELEM(ELEM_W),
245b7e1c893Smrg			     SRC1_NEG(0),
246b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
247b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
248b7e1c893Smrg			     LAST(1));
249b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
250b7e1c893Smrg				 SRC0_ABS(0),
251b7e1c893Smrg				 SRC1_ABS(0),
252b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
253b7e1c893Smrg				 UPDATE_PRED(0),
254b7e1c893Smrg				 WRITE_MASK(1),
255b7e1c893Smrg				 FOG_MERGE(0),
256b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
257b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
258b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
259b7e1c893Smrg				 DST_GPR(0),
260b7e1c893Smrg				 DST_REL(ABSOLUTE),
261b7e1c893Smrg				 DST_ELEM(ELEM_W),
262b7e1c893Smrg				 CLAMP(1));
263b7e1c893Smrg
264b7e1c893Smrg    return i;
265b7e1c893Smrg}
266b7e1c893Smrg
267b7e1c893Smrg/* copy vs --------------------------------------- */
268b7e1c893Smrgint R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
269b7e1c893Smrg{
270b7e1c893Smrg    int i = 0;
271b7e1c893Smrg
272b7e1c893Smrg    /* 0 */
273b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
274b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
275b7e1c893Smrg			    CF_CONST(0),
276b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
277b7e1c893Smrg			    I_COUNT(2),
278b7e1c893Smrg			    CALL_COUNT(0),
279b7e1c893Smrg			    END_OF_PROGRAM(0),
280b7e1c893Smrg			    VALID_PIXEL_MODE(0),
281b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
282b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
283b7e1c893Smrg			    BARRIER(1));
284b7e1c893Smrg    /* 1 */
285b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
286b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
287b7e1c893Smrg					  RW_GPR(1),
288b7e1c893Smrg					  RW_REL(ABSOLUTE),
289b7e1c893Smrg					  INDEX_GPR(0),
290b7e1c893Smrg					  ELEM_SIZE(0));
291b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
292b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
293b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
294b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
295b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
296b7e1c893Smrg					       BURST_COUNT(0),
297b7e1c893Smrg					       END_OF_PROGRAM(0),
298b7e1c893Smrg					       VALID_PIXEL_MODE(0),
299b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
300b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
301b7e1c893Smrg					       BARRIER(1));
302b7e1c893Smrg    /* 2 */
303b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
304b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
305b7e1c893Smrg					  RW_GPR(0),
306b7e1c893Smrg					  RW_REL(ABSOLUTE),
307b7e1c893Smrg					  INDEX_GPR(0),
308b7e1c893Smrg					  ELEM_SIZE(0));
309b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
310b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
311b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
312b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
313b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
314b7e1c893Smrg					       BURST_COUNT(0),
315b7e1c893Smrg					       END_OF_PROGRAM(1),
316b7e1c893Smrg					       VALID_PIXEL_MODE(0),
317b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
318b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
319b7e1c893Smrg					       BARRIER(0));
320b7e1c893Smrg    /* 3 */
321b7e1c893Smrg    shader[i++] = 0x00000000;
322b7e1c893Smrg    shader[i++] = 0x00000000;
323b7e1c893Smrg    /* 4/5 */
324b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
325b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
326b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
327b7e1c893Smrg			     BUFFER_ID(0),
328b7e1c893Smrg			     SRC_GPR(0),
329b7e1c893Smrg			     SRC_REL(ABSOLUTE),
330b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
331b7e1c893Smrg			     MEGA_FETCH_COUNT(16));
332b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
333b7e1c893Smrg				 DST_REL(0),
334b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
335b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
336b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
337b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
338b7e1c893Smrg				 USE_CONST_FIELDS(0),
339ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
340ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
341ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
342b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
343b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
344b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
345b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
346b7e1c893Smrg			     MEGA_FETCH(1));
347b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
348b7e1c893Smrg    /* 6/7 */
349b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
350b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
351b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
352b7e1c893Smrg			     BUFFER_ID(0),
353b7e1c893Smrg			     SRC_GPR(0),
354b7e1c893Smrg			     SRC_REL(ABSOLUTE),
355b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
356b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
357b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
358b7e1c893Smrg				 DST_REL(0),
359b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
360b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
361b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
362b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
363b7e1c893Smrg				 USE_CONST_FIELDS(0),
364ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
365ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
366ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
367b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
368b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
369b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
370b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
371b7e1c893Smrg			     MEGA_FETCH(0));
372b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
373b7e1c893Smrg
374b7e1c893Smrg    return i;
375b7e1c893Smrg}
376b7e1c893Smrg
377b7e1c893Smrg/* copy ps --------------------------------------- */
378b7e1c893Smrgint R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
379b7e1c893Smrg{
380b7e1c893Smrg    int i=0;
381b7e1c893Smrg
382b7e1c893Smrg    /* CF INST 0 */
383b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(2));
384b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
385b7e1c893Smrg			    CF_CONST(0),
386b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
387b7e1c893Smrg			    I_COUNT(1),
388b7e1c893Smrg			    CALL_COUNT(0),
389b7e1c893Smrg			    END_OF_PROGRAM(0),
390b7e1c893Smrg			    VALID_PIXEL_MODE(0),
391b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
392b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
393b7e1c893Smrg			    BARRIER(1));
394b7e1c893Smrg    /* CF INST 1 */
395b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
396b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
397b7e1c893Smrg					  RW_GPR(0),
398b7e1c893Smrg					  RW_REL(ABSOLUTE),
399b7e1c893Smrg					  INDEX_GPR(0),
400b7e1c893Smrg					  ELEM_SIZE(1));
401b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
402b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
403b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
404b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
405b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
406b7e1c893Smrg					       BURST_COUNT(1),
407b7e1c893Smrg					       END_OF_PROGRAM(1),
408b7e1c893Smrg					       VALID_PIXEL_MODE(0),
409b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
410b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
411b7e1c893Smrg					       BARRIER(1));
412b7e1c893Smrg    /* TEX INST 0 */
413b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
414b7e1c893Smrg			     BC_FRAC_MODE(0),
415b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
416b7e1c893Smrg			     RESOURCE_ID(0),
417b7e1c893Smrg			     SRC_GPR(0),
418b7e1c893Smrg			     SRC_REL(ABSOLUTE),
419b7e1c893Smrg			     R7xx_ALT_CONST(0));
420b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
421b7e1c893Smrg			     DST_REL(ABSOLUTE),
422b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X), /* R */
423b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y), /* G */
424b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z), /* B */
425b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W), /* A */
426b7e1c893Smrg			     LOD_BIAS(0),
427b7e1c893Smrg			     COORD_TYPE_X(TEX_UNNORMALIZED),
428b7e1c893Smrg			     COORD_TYPE_Y(TEX_UNNORMALIZED),
429b7e1c893Smrg			     COORD_TYPE_Z(TEX_UNNORMALIZED),
430b7e1c893Smrg			     COORD_TYPE_W(TEX_UNNORMALIZED));
431b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
432b7e1c893Smrg			     OFFSET_Y(0),
433b7e1c893Smrg			     OFFSET_Z(0),
434b7e1c893Smrg			     SAMPLER_ID(0),
435b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
436b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
437b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
438b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
439b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
440b7e1c893Smrg
441b7e1c893Smrg    return i;
442b7e1c893Smrg}
443b7e1c893Smrg
444b7e1c893Smrg/*
445b7e1c893Smrg * ; xv vertex shader
446b7e1c893Smrg * 00 VTX: ADDR(4) CNT(2)
447b7e1c893Smrg *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT)
448b7e1c893Smrg *          FORMAT_COMP(SIGNED)
449b7e1c893Smrg *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
450b7e1c893Smrg *          FORMAT_COMP(SIGNED)
451b7e1c893Smrg * 01 EXP_DONE: POS0, R1
452b7e1c893Smrg * 02 EXP_DONE: PARAM0, R0  NO_BARRIER
453b7e1c893Smrg * END_OF_PROGRAM
454b7e1c893Smrg */
455b7e1c893Smrgint R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
456b7e1c893Smrg{
457b7e1c893Smrg    int i = 0;
458b7e1c893Smrg
459b7e1c893Smrg    /* 0 */
460ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(6));
461b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
462b7e1c893Smrg                            CF_CONST(0),
463b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
464b7e1c893Smrg                            I_COUNT(2),
465b7e1c893Smrg                            CALL_COUNT(0),
466b7e1c893Smrg                            END_OF_PROGRAM(0),
467b7e1c893Smrg                            VALID_PIXEL_MODE(0),
468b7e1c893Smrg                            CF_INST(SQ_CF_INST_VTX),
469b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
470b7e1c893Smrg                            BARRIER(1));
471ad43ddacSmrg
472ad43ddacSmrg    /* 1 - ALU */
473ad43ddacSmrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
474ad43ddacSmrg				KCACHE_BANK0(0),
475ad43ddacSmrg				KCACHE_BANK1(0),
476ad43ddacSmrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
477ad43ddacSmrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
478ad43ddacSmrg				KCACHE_ADDR0(0),
479ad43ddacSmrg				KCACHE_ADDR1(0),
480ad43ddacSmrg				I_COUNT(2),
481ad43ddacSmrg				USES_WATERFALL(0),
482ad43ddacSmrg				CF_INST(SQ_CF_INST_ALU),
483ad43ddacSmrg				WHOLE_QUAD_MODE(0),
484ad43ddacSmrg				BARRIER(1));
485ad43ddacSmrg
486ad43ddacSmrg    /* 2 */
487b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
488b7e1c893Smrg                                          TYPE(SQ_EXPORT_POS),
489b7e1c893Smrg                                          RW_GPR(1),
490b7e1c893Smrg                                          RW_REL(ABSOLUTE),
491b7e1c893Smrg                                          INDEX_GPR(0),
492b7e1c893Smrg                                          ELEM_SIZE(3));
493b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
494b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
495b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
496b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
497b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
498b7e1c893Smrg                                               BURST_COUNT(1),
499b7e1c893Smrg                                               END_OF_PROGRAM(0),
500b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
501b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
502b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
503b7e1c893Smrg                                               BARRIER(1));
504ad43ddacSmrg    /* 3 */
505b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
506b7e1c893Smrg                                          TYPE(SQ_EXPORT_PARAM),
507b7e1c893Smrg                                          RW_GPR(0),
508b7e1c893Smrg                                          RW_REL(ABSOLUTE),
509b7e1c893Smrg                                          INDEX_GPR(0),
510b7e1c893Smrg                                          ELEM_SIZE(3));
511b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
512b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
513b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
514b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
515b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
516b7e1c893Smrg                                               BURST_COUNT(1),
517b7e1c893Smrg                                               END_OF_PROGRAM(1),
518b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
519b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
520b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
521b7e1c893Smrg                                               BARRIER(0));
522ad43ddacSmrg
523ad43ddacSmrg
524ad43ddacSmrg    /* 4 texX / w */
525921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
526ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
527ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
528ad43ddacSmrg                             SRC0_NEG(0),
529921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
530ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
531ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
532ad43ddacSmrg                             SRC1_NEG(0),
533ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
534ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
535ad43ddacSmrg                             LAST(0));
536ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
537ad43ddacSmrg                                 SRC0_ABS(0),
538ad43ddacSmrg                                 SRC1_ABS(0),
539ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
540ad43ddacSmrg                                 UPDATE_PRED(0),
541ad43ddacSmrg                                 WRITE_MASK(1),
542ad43ddacSmrg                                 FOG_MERGE(0),
543ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
544ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
545ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
546ad43ddacSmrg                                 DST_GPR(0),
547ad43ddacSmrg                                 DST_REL(ABSOLUTE),
548ad43ddacSmrg                                 DST_ELEM(ELEM_X),
549ad43ddacSmrg                                 CLAMP(0));
550ad43ddacSmrg
551ad43ddacSmrg    /* 5 texY / h */
552921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
553ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
554ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
555ad43ddacSmrg                             SRC0_NEG(0),
556921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
557ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
558ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
559ad43ddacSmrg                             SRC1_NEG(0),
560ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
561ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
562ad43ddacSmrg                             LAST(1));
563ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
564ad43ddacSmrg                                 SRC0_ABS(0),
565ad43ddacSmrg                                 SRC1_ABS(0),
566ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
567ad43ddacSmrg                                 UPDATE_PRED(0),
568ad43ddacSmrg                                 WRITE_MASK(1),
569ad43ddacSmrg                                 FOG_MERGE(0),
570ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
571ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
572ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
573ad43ddacSmrg                                 DST_GPR(0),
574ad43ddacSmrg                                 DST_REL(ABSOLUTE),
575ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
576ad43ddacSmrg                                 CLAMP(0));
577ad43ddacSmrg
578ad43ddacSmrg    /* 6/7 */
579b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
580b7e1c893Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
581b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
582b7e1c893Smrg                             BUFFER_ID(0),
583b7e1c893Smrg                             SRC_GPR(0),
584b7e1c893Smrg                             SRC_REL(ABSOLUTE),
585b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
586b7e1c893Smrg                             MEGA_FETCH_COUNT(16));
587b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
588b7e1c893Smrg                                 DST_REL(ABSOLUTE),
589b7e1c893Smrg                                 DST_SEL_X(SQ_SEL_X),
590b7e1c893Smrg                                 DST_SEL_Y(SQ_SEL_Y),
591b7e1c893Smrg                                 DST_SEL_Z(SQ_SEL_0),
592b7e1c893Smrg                                 DST_SEL_W(SQ_SEL_1),
593b7e1c893Smrg                                 USE_CONST_FIELDS(0),
594b7e1c893Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
595ad43ddacSmrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
596b7e1c893Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
597b7e1c893Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
598b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
599b7e1c893Smrg                             ENDIAN_SWAP(ENDIAN_NONE),
600b7e1c893Smrg                             CONST_BUF_NO_STRIDE(0),
601b7e1c893Smrg                             MEGA_FETCH(1));
602b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
603ad43ddacSmrg    /* 8/9 */
604b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
605b7e1c893Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
606b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
607b7e1c893Smrg                             BUFFER_ID(0),
608b7e1c893Smrg                             SRC_GPR(0),
609b7e1c893Smrg                             SRC_REL(ABSOLUTE),
610b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
611b7e1c893Smrg                             MEGA_FETCH_COUNT(8));
612b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
613b7e1c893Smrg                                 DST_REL(ABSOLUTE),
614b7e1c893Smrg                                 DST_SEL_X(SQ_SEL_X),
615b7e1c893Smrg                                 DST_SEL_Y(SQ_SEL_Y),
616b7e1c893Smrg                                 DST_SEL_Z(SQ_SEL_0),
617b7e1c893Smrg                                 DST_SEL_W(SQ_SEL_1),
618b7e1c893Smrg                                 USE_CONST_FIELDS(0),
619b7e1c893Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
620ad43ddacSmrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
621b7e1c893Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
622b7e1c893Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
623b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
624b7e1c893Smrg                             ENDIAN_SWAP(ENDIAN_NONE),
625b7e1c893Smrg                             CONST_BUF_NO_STRIDE(0),
626b7e1c893Smrg                             MEGA_FETCH(0));
627b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
628b7e1c893Smrg
629b7e1c893Smrg    return i;
630b7e1c893Smrg}
631b7e1c893Smrg
632b7e1c893Smrgint R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
633b7e1c893Smrg{
634b7e1c893Smrg    int i = 0;
635b7e1c893Smrg
636b7e1c893Smrg    /* 0 */
637ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(16));
638b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
639b7e1c893Smrg                            CF_CONST(0),
640b7e1c893Smrg                            COND(SQ_CF_COND_BOOL),
641b7e1c893Smrg                            I_COUNT(0),
642b7e1c893Smrg                            CALL_COUNT(0),
643b7e1c893Smrg                            END_OF_PROGRAM(0),
644b7e1c893Smrg                            VALID_PIXEL_MODE(0),
645b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
646b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
647b7e1c893Smrg                            BARRIER(0));
648b7e1c893Smrg    /* 1 */
649ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(24));
650b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
651b7e1c893Smrg                            CF_CONST(0),
652b7e1c893Smrg                            COND(SQ_CF_COND_NOT_BOOL),
653b7e1c893Smrg                            I_COUNT(0),
654b7e1c893Smrg                            CALL_COUNT(0),
655b7e1c893Smrg                            END_OF_PROGRAM(0),
656b7e1c893Smrg                            VALID_PIXEL_MODE(0),
657b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
658b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
659b7e1c893Smrg                            BARRIER(0));
660b7e1c893Smrg    /* 2 */
661b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
662b7e1c893Smrg                                KCACHE_BANK0(0),
663b7e1c893Smrg                                KCACHE_BANK1(0),
664b7e1c893Smrg                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
665b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
666b7e1c893Smrg                                KCACHE_ADDR0(0),
667b7e1c893Smrg                                KCACHE_ADDR1(0),
668ad43ddacSmrg                                I_COUNT(12),
669b7e1c893Smrg                                USES_WATERFALL(0),
670b7e1c893Smrg                                CF_INST(SQ_CF_INST_ALU),
671b7e1c893Smrg                                WHOLE_QUAD_MODE(0),
672b7e1c893Smrg                                BARRIER(1));
673b7e1c893Smrg    /* 3 */
674b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
675b7e1c893Smrg                                          TYPE(SQ_EXPORT_PIXEL),
676b7e1c893Smrg                                          RW_GPR(2),
677b7e1c893Smrg                                          RW_REL(ABSOLUTE),
678b7e1c893Smrg                                          INDEX_GPR(0),
679b7e1c893Smrg                                          ELEM_SIZE(3));
680b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
681b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
682b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
683b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
684b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
685b7e1c893Smrg                                               BURST_COUNT(1),
686b7e1c893Smrg                                               END_OF_PROGRAM(1),
687b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
688b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
689b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
690b7e1c893Smrg                                               BARRIER(1));
691ad43ddacSmrg    /* 4,5,6,7 */
692ad43ddacSmrg    /* r2.x = MAD(c0.w, r1.x, c0.x) */
693921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
694b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
695ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
696b7e1c893Smrg                             SRC0_NEG(0),
697921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
698b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
699b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
700b7e1c893Smrg                             SRC1_NEG(0),
701b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
702b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
703b7e1c893Smrg                             LAST(0));
704921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
705b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
706ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
707b7e1c893Smrg                                 SRC2_NEG(0),
708b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
709b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
710ad43ddacSmrg                                 DST_GPR(2),
711b7e1c893Smrg                                 DST_REL(ABSOLUTE),
712b7e1c893Smrg                                 DST_ELEM(ELEM_X),
713ad43ddacSmrg                                 CLAMP(0));
714ad43ddacSmrg    /* r2.y = MAD(c0.w, r1.x, c0.y) */
715921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
716b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
717ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
718b7e1c893Smrg                             SRC0_NEG(0),
719921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
720b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
721ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
722b7e1c893Smrg                             SRC1_NEG(0),
723b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
724b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
725b7e1c893Smrg                             LAST(0));
726921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
727b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
728ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
729b7e1c893Smrg                                 SRC2_NEG(0),
730b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
731b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
732ad43ddacSmrg                                 DST_GPR(2),
733b7e1c893Smrg                                 DST_REL(ABSOLUTE),
734b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
735b7e1c893Smrg                                 CLAMP(0));
736ad43ddacSmrg    /* r2.z = MAD(c0.w, r1.x, c0.z) */
737921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
738b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
739ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
740b7e1c893Smrg                             SRC0_NEG(0),
741921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
742b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
743ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
744b7e1c893Smrg                             SRC1_NEG(0),
745b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
746b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
747b7e1c893Smrg                             LAST(0));
748921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
749b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
750ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
751b7e1c893Smrg                                 SRC2_NEG(0),
752b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
753b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
754ad43ddacSmrg                                 DST_GPR(2),
755b7e1c893Smrg                                 DST_REL(ABSOLUTE),
756b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
757b7e1c893Smrg                                 CLAMP(0));
758ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
759b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
760b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
761b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
762b7e1c893Smrg                             SRC0_NEG(0),
763b7e1c893Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
764b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
765b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
766b7e1c893Smrg                             SRC1_NEG(0),
767b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
768b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
769b7e1c893Smrg                             LAST(1));
770ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
771ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
772ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
773ad43ddacSmrg                                 SRC2_NEG(0),
774ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
775b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
776ad43ddacSmrg                                 DST_GPR(2),
777b7e1c893Smrg                                 DST_REL(ABSOLUTE),
778b7e1c893Smrg                                 DST_ELEM(ELEM_W),
779b7e1c893Smrg                                 CLAMP(0));
780ad43ddacSmrg
781ad43ddacSmrg    /* 8,9,10,11 */
782ad43ddacSmrg    /* r2.x = MAD(c1.x, r1.y, pv.x) */
783921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
784b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
785b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
786b7e1c893Smrg                             SRC0_NEG(0),
787921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
788b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
789ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
790b7e1c893Smrg                             SRC1_NEG(0),
791b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
792b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
793b7e1c893Smrg                             LAST(0));
794ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
795ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
796ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
797ad43ddacSmrg                                 SRC2_NEG(0),
798ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
799ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
800b7e1c893Smrg                                 DST_GPR(2),
801b7e1c893Smrg                                 DST_REL(ABSOLUTE),
802b7e1c893Smrg                                 DST_ELEM(ELEM_X),
803ad43ddacSmrg                                 CLAMP(0));
804ad43ddacSmrg    /* r2.y = MAD(c1.y, r1.y, pv.y) */
805921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
806b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
807b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
808b7e1c893Smrg                             SRC0_NEG(0),
809921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
810b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
811b7e1c893Smrg                             SRC1_ELEM(ELEM_Y),
812b7e1c893Smrg                             SRC1_NEG(0),
813b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
814b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
815b7e1c893Smrg                             LAST(0));
816ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
817ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
818ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
819ad43ddacSmrg                                 SRC2_NEG(0),
820ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
821ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
822ad43ddacSmrg                                 DST_GPR(2),
823b7e1c893Smrg                                 DST_REL(ABSOLUTE),
824b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
825ad43ddacSmrg                                 CLAMP(0));
826ad43ddacSmrg    /* r2.z = MAD(c1.z, r1.y, pv.z) */
827921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
828b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
829b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
830b7e1c893Smrg                             SRC0_NEG(0),
831921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
832b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
833ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
834b7e1c893Smrg                             SRC1_NEG(0),
835b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
836b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
837b7e1c893Smrg                             LAST(0));
838ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
839ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
840ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
841ad43ddacSmrg                                 SRC2_NEG(0),
842ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
843ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
844ad43ddacSmrg                                 DST_GPR(2),
845b7e1c893Smrg                                 DST_REL(ABSOLUTE),
846b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
847ad43ddacSmrg                                 CLAMP(0));
848ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
849ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
850b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
851ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
852b7e1c893Smrg                             SRC0_NEG(0),
853ad43ddacSmrg                             SRC1_SEL(SQ_ALU_SRC_0),
854b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
855ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
856b7e1c893Smrg                             SRC1_NEG(0),
857b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
858b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
859b7e1c893Smrg                             LAST(1));
860ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
861ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
862ad43ddacSmrg                                 SRC2_ELEM(ELEM_W),
863ad43ddacSmrg                                 SRC2_NEG(0),
864ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
865ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
866ad43ddacSmrg                                 DST_GPR(2),
867b7e1c893Smrg                                 DST_REL(ABSOLUTE),
868b7e1c893Smrg                                 DST_ELEM(ELEM_W),
869ad43ddacSmrg                                 CLAMP(0));
870ad43ddacSmrg    /* 12,13,14,15 */
871ad43ddacSmrg    /* r2.x = MAD(c2.x, r1.z, pv.x) */
872921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
873b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
874b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
875b7e1c893Smrg                             SRC0_NEG(0),
876921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
877b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
878ad43ddacSmrg                             SRC1_ELEM(ELEM_Z),
879b7e1c893Smrg                             SRC1_NEG(0),
880b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
881b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
882b7e1c893Smrg                             LAST(0));
883ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
884ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
885ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
886ad43ddacSmrg                                 SRC2_NEG(0),
887ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
888ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
889ad43ddacSmrg                                 DST_GPR(2),
890b7e1c893Smrg                                 DST_REL(ABSOLUTE),
891b7e1c893Smrg                                 DST_ELEM(ELEM_X),
892b7e1c893Smrg                                 CLAMP(1));
893ad43ddacSmrg    /* r2.y = MAD(c2.y, r1.z, pv.y) */
894921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
895b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
896b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
897b7e1c893Smrg                             SRC0_NEG(0),
898921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
899b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
900ad43ddacSmrg                             SRC1_ELEM(ELEM_Z),
901b7e1c893Smrg                             SRC1_NEG(0),
902b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
903b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
904b7e1c893Smrg                             LAST(0));
905ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
906ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
907ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
908ad43ddacSmrg                                 SRC2_NEG(0),
909ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
910ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
911b7e1c893Smrg                                 DST_GPR(2),
912b7e1c893Smrg                                 DST_REL(ABSOLUTE),
913b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
914b7e1c893Smrg                                 CLAMP(1));
915ad43ddacSmrg    /* r2.z = MAD(c2.z, r1.z, pv.z) */
916921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
917b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
918b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
919b7e1c893Smrg                             SRC0_NEG(0),
920921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
921b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
922b7e1c893Smrg                             SRC1_ELEM(ELEM_Z),
923b7e1c893Smrg                             SRC1_NEG(0),
924b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
925b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
926b7e1c893Smrg                             LAST(0));
927ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
928ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
929ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
930ad43ddacSmrg                                 SRC2_NEG(0),
931ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
932ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
933ad43ddacSmrg                                 DST_GPR(2),
934b7e1c893Smrg                                 DST_REL(ABSOLUTE),
935b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
936b7e1c893Smrg                                 CLAMP(1));
937ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
938ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
939b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
940b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
941b7e1c893Smrg                             SRC0_NEG(0),
942ad43ddacSmrg                             SRC1_SEL(SQ_ALU_SRC_0),
943b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
944b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
945b7e1c893Smrg                             SRC1_NEG(0),
946b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
947b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
948b7e1c893Smrg                             LAST(1));
949ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
950ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
951ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
952ad43ddacSmrg                                 SRC2_NEG(0),
953ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
954ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
955ad43ddacSmrg                                 DST_GPR(2),
956b7e1c893Smrg                                 DST_REL(ABSOLUTE),
957b7e1c893Smrg                                 DST_ELEM(ELEM_W),
958b7e1c893Smrg                                 CLAMP(1));
959ad43ddacSmrg
960ad43ddacSmrg    /* 16 */
961ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(18));
962b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
963b7e1c893Smrg                            CF_CONST(0),
964b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
965b7e1c893Smrg                            I_COUNT(3),
966b7e1c893Smrg                            CALL_COUNT(0),
967b7e1c893Smrg                            END_OF_PROGRAM(0),
968b7e1c893Smrg                            VALID_PIXEL_MODE(0),
969b7e1c893Smrg                            CF_INST(SQ_CF_INST_TEX),
970b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
971b7e1c893Smrg                            BARRIER(1));
972ad43ddacSmrg    /* 17 */
973b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
974b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
975b7e1c893Smrg			    CF_CONST(0),
976b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
977b7e1c893Smrg			    I_COUNT(0),
978b7e1c893Smrg			    CALL_COUNT(0),
979b7e1c893Smrg			    END_OF_PROGRAM(0),
980b7e1c893Smrg			    VALID_PIXEL_MODE(0),
981b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
982b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
983b7e1c893Smrg			    BARRIER(1));
984ad43ddacSmrg    /* 18/19 */
985b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
986b7e1c893Smrg                             BC_FRAC_MODE(0),
987b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
988b7e1c893Smrg                             RESOURCE_ID(0),
989b7e1c893Smrg                             SRC_GPR(0),
990b7e1c893Smrg                             SRC_REL(ABSOLUTE),
991b7e1c893Smrg                             R7xx_ALT_CONST(0));
992b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
993b7e1c893Smrg                             DST_REL(ABSOLUTE),
994b7e1c893Smrg                             DST_SEL_X(SQ_SEL_X),
995b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
996b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
997b7e1c893Smrg                             DST_SEL_W(SQ_SEL_1),
998b7e1c893Smrg                             LOD_BIAS(0),
999b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1000b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1001b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1002b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1003b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1004b7e1c893Smrg                             OFFSET_Y(0),
1005b7e1c893Smrg                             OFFSET_Z(0),
1006b7e1c893Smrg                             SAMPLER_ID(0),
1007b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1008b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1009b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1010b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1011b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1012ad43ddacSmrg    /* 20/21 */
1013b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1014b7e1c893Smrg                             BC_FRAC_MODE(0),
1015b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1016b7e1c893Smrg                             RESOURCE_ID(1),
1017b7e1c893Smrg                             SRC_GPR(0),
1018b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1019b7e1c893Smrg                             R7xx_ALT_CONST(0));
1020b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1021b7e1c893Smrg                             DST_REL(ABSOLUTE),
1022b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1023b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1024b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_X),
1025b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1026b7e1c893Smrg                             LOD_BIAS(0),
1027b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1028b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1029b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1030b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1031b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1032b7e1c893Smrg                             OFFSET_Y(0),
1033b7e1c893Smrg                             OFFSET_Z(0),
1034b7e1c893Smrg                             SAMPLER_ID(1),
1035b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1036b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1037b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1038b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1039b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1040ad43ddacSmrg    /* 22/23 */
1041b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1042b7e1c893Smrg                             BC_FRAC_MODE(0),
1043b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1044b7e1c893Smrg                             RESOURCE_ID(2),
1045b7e1c893Smrg                             SRC_GPR(0),
1046b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1047b7e1c893Smrg                             R7xx_ALT_CONST(0));
1048b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1049b7e1c893Smrg                             DST_REL(ABSOLUTE),
1050b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1051b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_X),
1052b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1053b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1054b7e1c893Smrg                             LOD_BIAS(0),
1055b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1056b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1057b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1058b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1059b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1060b7e1c893Smrg                             OFFSET_Y(0),
1061b7e1c893Smrg                             OFFSET_Z(0),
1062b7e1c893Smrg                             SAMPLER_ID(2),
1063b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1064b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1065b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1066b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1067b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1068ad43ddacSmrg    /* 24 */
1069ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(26));
1070b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1071b7e1c893Smrg                            CF_CONST(0),
1072b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
1073b7e1c893Smrg                            I_COUNT(2),
1074b7e1c893Smrg                            CALL_COUNT(0),
1075b7e1c893Smrg                            END_OF_PROGRAM(0),
1076b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1077b7e1c893Smrg                            CF_INST(SQ_CF_INST_TEX),
1078b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1079b7e1c893Smrg                            BARRIER(1));
1080ad43ddacSmrg    /* 25 */
1081b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1082b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1083b7e1c893Smrg			    CF_CONST(0),
1084b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1085b7e1c893Smrg			    I_COUNT(0),
1086b7e1c893Smrg			    CALL_COUNT(0),
1087b7e1c893Smrg			    END_OF_PROGRAM(0),
1088b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1089b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1090b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1091b7e1c893Smrg			    BARRIER(1));
1092ad43ddacSmrg    /* 26/27 */
1093b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1094b7e1c893Smrg                             BC_FRAC_MODE(0),
1095b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1096b7e1c893Smrg                             RESOURCE_ID(0),
1097b7e1c893Smrg                             SRC_GPR(0),
1098b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1099b7e1c893Smrg                             R7xx_ALT_CONST(0));
1100b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1101b7e1c893Smrg                             DST_REL(ABSOLUTE),
1102b7e1c893Smrg                             DST_SEL_X(SQ_SEL_X),
1103b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1104b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1105b7e1c893Smrg                             DST_SEL_W(SQ_SEL_1),
1106b7e1c893Smrg                             LOD_BIAS(0),
1107b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1108b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1109b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1110b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1111b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1112b7e1c893Smrg                             OFFSET_Y(0),
1113b7e1c893Smrg                             OFFSET_Z(0),
1114b7e1c893Smrg                             SAMPLER_ID(0),
1115b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1116b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1117b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1118b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1119b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1120ad43ddacSmrg    /* 28/29 */
1121b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1122b7e1c893Smrg                             BC_FRAC_MODE(0),
1123b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1124b7e1c893Smrg                             RESOURCE_ID(1),
1125b7e1c893Smrg                             SRC_GPR(0),
1126b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1127b7e1c893Smrg                             R7xx_ALT_CONST(0));
1128b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1129b7e1c893Smrg                             DST_REL(ABSOLUTE),
1130b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1131b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_X),
1132b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_Y),
1133b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1134b7e1c893Smrg                             LOD_BIAS(0),
1135b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1136b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1137b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1138b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1139b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1140b7e1c893Smrg                             OFFSET_Y(0),
1141b7e1c893Smrg                             OFFSET_Z(0),
1142b7e1c893Smrg                             SAMPLER_ID(1),
1143b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1144b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1145b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1146b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1147b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1148b7e1c893Smrg
1149b7e1c893Smrg    return i;
1150b7e1c893Smrg}
1151b7e1c893Smrg
1152b7e1c893Smrg/* comp vs --------------------------------------- */
1153b7e1c893Smrgint R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1154b7e1c893Smrg{
1155b7e1c893Smrg    int i = 0;
1156b7e1c893Smrg
1157b7e1c893Smrg    /* 0 */
1158b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(3));
1159b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1160b7e1c893Smrg                            CF_CONST(0),
1161b7e1c893Smrg                            COND(SQ_CF_COND_BOOL),
1162b7e1c893Smrg                            I_COUNT(0),
1163b7e1c893Smrg                            CALL_COUNT(0),
1164b7e1c893Smrg                            END_OF_PROGRAM(0),
1165b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1166b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
1167b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1168b7e1c893Smrg                            BARRIER(0));
1169b7e1c893Smrg    /* 1 */
11700974d292Smrg    shader[i++] = CF_DWORD0(ADDR(9));
1171b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1172b7e1c893Smrg                            CF_CONST(0),
1173b7e1c893Smrg                            COND(SQ_CF_COND_NOT_BOOL),
1174b7e1c893Smrg                            I_COUNT(0),
1175b7e1c893Smrg                            CALL_COUNT(0),
1176b7e1c893Smrg                            END_OF_PROGRAM(0),
1177b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1178b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
1179b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1180b7e1c893Smrg                            BARRIER(0));
1181b7e1c893Smrg    /* 2 */
11822f39173dSmrg    shader[i++] = CF_DWORD0(ADDR(0));
1183b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1184b7e1c893Smrg                            CF_CONST(0),
1185b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
1186b7e1c893Smrg                            I_COUNT(0),
1187b7e1c893Smrg                            CALL_COUNT(0),
1188b7e1c893Smrg                            END_OF_PROGRAM(1),
1189b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1190b7e1c893Smrg                            CF_INST(SQ_CF_INST_NOP),
1191b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1192b7e1c893Smrg                            BARRIER(1));
1193b7e1c893Smrg    /* 3 - mask sub */
1194921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(44));
1195b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1196b7e1c893Smrg			    CF_CONST(0),
1197b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1198b7e1c893Smrg			    I_COUNT(3),
1199b7e1c893Smrg			    CALL_COUNT(0),
1200b7e1c893Smrg			    END_OF_PROGRAM(0),
1201b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1202b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
1203b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1204b7e1c893Smrg			    BARRIER(1));
1205ad43ddacSmrg
1206ad43ddacSmrg    /* 4 - ALU */
12070974d292Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1208ad43ddacSmrg				KCACHE_BANK0(0),
1209ad43ddacSmrg				KCACHE_BANK1(0),
1210ad43ddacSmrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1211ad43ddacSmrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1212ad43ddacSmrg				KCACHE_ADDR0(0),
1213ad43ddacSmrg				KCACHE_ADDR1(0),
1214921a55d8Smrg				I_COUNT(20),
1215ad43ddacSmrg				USES_WATERFALL(0),
1216ad43ddacSmrg				CF_INST(SQ_CF_INST_ALU),
1217ad43ddacSmrg				WHOLE_QUAD_MODE(0),
1218ad43ddacSmrg				BARRIER(1));
1219ad43ddacSmrg
1220ad43ddacSmrg    /* 5 - dst */
1221b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1222b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
1223b7e1c893Smrg					  RW_GPR(2),
1224b7e1c893Smrg					  RW_REL(ABSOLUTE),
1225b7e1c893Smrg					  INDEX_GPR(0),
1226b7e1c893Smrg					  ELEM_SIZE(0));
1227b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1228b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1229ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1230ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1231b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1232b7e1c893Smrg					       BURST_COUNT(1),
1233b7e1c893Smrg					       END_OF_PROGRAM(0),
1234b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1235b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1236b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1237b7e1c893Smrg					       BARRIER(1));
1238ad43ddacSmrg    /* 6 - src */
1239b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1240b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1241b7e1c893Smrg					  RW_GPR(1),
1242b7e1c893Smrg					  RW_REL(ABSOLUTE),
1243b7e1c893Smrg					  INDEX_GPR(0),
1244b7e1c893Smrg					  ELEM_SIZE(0));
1245b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1246b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1247ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1248ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1249b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1250b7e1c893Smrg					       BURST_COUNT(1),
1251b7e1c893Smrg					       END_OF_PROGRAM(0),
1252b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1253b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT),
1254b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1255b7e1c893Smrg					       BARRIER(0));
1256ad43ddacSmrg    /* 7 - mask */
1257b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1258b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1259b7e1c893Smrg					  RW_GPR(0),
1260b7e1c893Smrg					  RW_REL(ABSOLUTE),
1261b7e1c893Smrg					  INDEX_GPR(0),
1262b7e1c893Smrg					  ELEM_SIZE(0));
1263b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1264b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1265ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1266ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1267b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1268b7e1c893Smrg					       BURST_COUNT(1),
1269b7e1c893Smrg					       END_OF_PROGRAM(0),
1270b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1271b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1272b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1273b7e1c893Smrg					       BARRIER(0));
1274ad43ddacSmrg    /* 8 */
1275b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1276b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1277b7e1c893Smrg			    CF_CONST(0),
1278b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1279b7e1c893Smrg			    I_COUNT(0),
1280b7e1c893Smrg			    CALL_COUNT(0),
1281b7e1c893Smrg			    END_OF_PROGRAM(0),
1282b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1283b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1284b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1285b7e1c893Smrg			    BARRIER(1));
12860974d292Smrg    /* 9 - non-mask sub */
1287921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(50));
12880974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
12890974d292Smrg			    CF_CONST(0),
12900974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
12910974d292Smrg			    I_COUNT(2),
12920974d292Smrg			    CALL_COUNT(0),
12930974d292Smrg			    END_OF_PROGRAM(0),
12940974d292Smrg			    VALID_PIXEL_MODE(0),
12950974d292Smrg			    CF_INST(SQ_CF_INST_VTX),
12960974d292Smrg			    WHOLE_QUAD_MODE(0),
12970974d292Smrg			    BARRIER(1));
1298b7e1c893Smrg
12990974d292Smrg    /* 10 - ALU */
1300921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(34),
13010974d292Smrg				KCACHE_BANK0(0),
13020974d292Smrg				KCACHE_BANK1(0),
13030974d292Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
13040974d292Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
13050974d292Smrg				KCACHE_ADDR0(0),
13060974d292Smrg				KCACHE_ADDR1(0),
1307921a55d8Smrg				I_COUNT(10),
13080974d292Smrg				USES_WATERFALL(0),
13090974d292Smrg				CF_INST(SQ_CF_INST_ALU),
13100974d292Smrg				WHOLE_QUAD_MODE(0),
13110974d292Smrg				BARRIER(1));
1312ad43ddacSmrg
13130974d292Smrg    /* 11 - dst */
13140974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
13150974d292Smrg					  TYPE(SQ_EXPORT_POS),
13160974d292Smrg					  RW_GPR(1),
13170974d292Smrg					  RW_REL(ABSOLUTE),
13180974d292Smrg					  INDEX_GPR(0),
13190974d292Smrg					  ELEM_SIZE(0));
13200974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
13210974d292Smrg					       SRC_SEL_Y(SQ_SEL_Y),
13220974d292Smrg					       SRC_SEL_Z(SQ_SEL_0),
13230974d292Smrg					       SRC_SEL_W(SQ_SEL_1),
13240974d292Smrg					       R6xx_ELEM_LOOP(0),
13250974d292Smrg					       BURST_COUNT(0),
13260974d292Smrg					       END_OF_PROGRAM(0),
13270974d292Smrg					       VALID_PIXEL_MODE(0),
13280974d292Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
13290974d292Smrg					       WHOLE_QUAD_MODE(0),
13300974d292Smrg					       BARRIER(1));
13310974d292Smrg    /* 12 - src */
13320974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
13330974d292Smrg					  TYPE(SQ_EXPORT_PARAM),
13340974d292Smrg					  RW_GPR(0),
13350974d292Smrg					  RW_REL(ABSOLUTE),
13360974d292Smrg					  INDEX_GPR(0),
13370974d292Smrg					  ELEM_SIZE(0));
13380974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
13390974d292Smrg					       SRC_SEL_Y(SQ_SEL_Y),
13400974d292Smrg					       SRC_SEL_Z(SQ_SEL_0),
13410974d292Smrg					       SRC_SEL_W(SQ_SEL_1),
13420974d292Smrg					       R6xx_ELEM_LOOP(0),
13430974d292Smrg					       BURST_COUNT(0),
13440974d292Smrg					       END_OF_PROGRAM(0),
13450974d292Smrg					       VALID_PIXEL_MODE(0),
13460974d292Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
13470974d292Smrg					       WHOLE_QUAD_MODE(0),
13480974d292Smrg					       BARRIER(0));
13490974d292Smrg    /* 13 */
13500974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
13510974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
13520974d292Smrg			    CF_CONST(0),
13530974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
13540974d292Smrg			    I_COUNT(0),
13550974d292Smrg			    CALL_COUNT(0),
13560974d292Smrg			    END_OF_PROGRAM(0),
13570974d292Smrg			    VALID_PIXEL_MODE(0),
13580974d292Smrg			    CF_INST(SQ_CF_INST_RETURN),
13590974d292Smrg			    WHOLE_QUAD_MODE(0),
13600974d292Smrg			    BARRIER(1));
13610974d292Smrg
13620974d292Smrg
1363921a55d8Smrg    /* 14 srcX.x DOT4 - mask */
1364921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1365921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1366921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1367921a55d8Smrg                             SRC0_NEG(0),
1368921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1369921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1370921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1371921a55d8Smrg                             SRC1_NEG(0),
1372921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1373921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1374921a55d8Smrg                             LAST(0));
1375921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1376921a55d8Smrg                                 SRC0_ABS(0),
1377921a55d8Smrg                                 SRC1_ABS(0),
1378921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1379921a55d8Smrg                                 UPDATE_PRED(0),
1380921a55d8Smrg                                 WRITE_MASK(1),
1381921a55d8Smrg                                 FOG_MERGE(0),
1382921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1383921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1384921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1385921a55d8Smrg                                 DST_GPR(3),
1386921a55d8Smrg                                 DST_REL(ABSOLUTE),
1387921a55d8Smrg                                 DST_ELEM(ELEM_X),
1388921a55d8Smrg                                 CLAMP(0));
1389921a55d8Smrg
1390921a55d8Smrg    /* 15 srcX.y DOT4 - mask */
1391921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
13920974d292Smrg                             SRC0_REL(ABSOLUTE),
13930974d292Smrg                             SRC0_ELEM(ELEM_Y),
13940974d292Smrg                             SRC0_NEG(0),
1395921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
13960974d292Smrg                             SRC1_REL(ABSOLUTE),
13970974d292Smrg                             SRC1_ELEM(ELEM_Y),
13980974d292Smrg                             SRC1_NEG(0),
13990974d292Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
14000974d292Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1401921a55d8Smrg                             LAST(0));
1402921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1403921a55d8Smrg                                 SRC0_ABS(0),
1404921a55d8Smrg                                 SRC1_ABS(0),
1405921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1406921a55d8Smrg                                 UPDATE_PRED(0),
1407921a55d8Smrg                                 WRITE_MASK(0),
1408921a55d8Smrg                                 FOG_MERGE(0),
1409921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1410921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
14110974d292Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1412921a55d8Smrg                                 DST_GPR(3),
1413921a55d8Smrg                                 DST_REL(ABSOLUTE),
1414921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1415921a55d8Smrg                                 CLAMP(0));
1416921a55d8Smrg
1417921a55d8Smrg    /* 16 srcX.z DOT4 - mask */
1418921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1419921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1420921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1421921a55d8Smrg                             SRC0_NEG(0),
1422921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1423921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1424921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1425921a55d8Smrg                             SRC1_NEG(0),
1426921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1427921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1428921a55d8Smrg                             LAST(0));
1429921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1430921a55d8Smrg                                 SRC0_ABS(0),
1431921a55d8Smrg                                 SRC1_ABS(0),
1432921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1433921a55d8Smrg                                 UPDATE_PRED(0),
1434921a55d8Smrg                                 WRITE_MASK(0),
1435921a55d8Smrg                                 FOG_MERGE(0),
1436921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1437921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1438921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1439921a55d8Smrg                                 DST_GPR(3),
14400974d292Smrg                                 DST_REL(ABSOLUTE),
14410974d292Smrg                                 DST_ELEM(ELEM_Z),
14420974d292Smrg                                 CLAMP(0));
1443921a55d8Smrg
1444921a55d8Smrg    /* 17 srcX.w DOT4 - mask */
1445921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1446ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1447921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1448ad43ddacSmrg                             SRC0_NEG(0),
1449921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1450ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1451921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1452ad43ddacSmrg                             SRC1_NEG(0),
1453ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1454ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1455ad43ddacSmrg                             LAST(1));
1456921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1457921a55d8Smrg                                 SRC0_ABS(0),
1458921a55d8Smrg                                 SRC1_ABS(0),
1459921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1460921a55d8Smrg                                 UPDATE_PRED(0),
1461921a55d8Smrg                                 WRITE_MASK(0),
1462921a55d8Smrg                                 FOG_MERGE(0),
1463921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1464921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1465ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1466921a55d8Smrg                                 DST_GPR(3),
1467ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1468ad43ddacSmrg                                 DST_ELEM(ELEM_W),
1469ad43ddacSmrg                                 CLAMP(0));
1470ad43ddacSmrg
1471921a55d8Smrg    /* 18 srcY.x DOT4 - mask */
1472921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1473ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1474ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1475ad43ddacSmrg                             SRC0_NEG(0),
1476921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1477ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1478ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1479ad43ddacSmrg                             SRC1_NEG(0),
1480ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1481ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1482ad43ddacSmrg                             LAST(0));
1483921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1484921a55d8Smrg                                 SRC0_ABS(0),
1485921a55d8Smrg                                 SRC1_ABS(0),
1486921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1487921a55d8Smrg                                 UPDATE_PRED(0),
1488921a55d8Smrg                                 WRITE_MASK(0),
1489921a55d8Smrg                                 FOG_MERGE(0),
1490921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1491921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1492ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1493921a55d8Smrg                                 DST_GPR(3),
1494ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1495ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1496ad43ddacSmrg                                 CLAMP(0));
1497921a55d8Smrg
1498921a55d8Smrg    /* 19 srcY.y DOT4 - mask */
1499921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1500921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1501921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1502921a55d8Smrg                             SRC0_NEG(0),
1503921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1504921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1505921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1506921a55d8Smrg                             SRC1_NEG(0),
1507921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1508921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1509921a55d8Smrg                             LAST(0));
1510921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1511921a55d8Smrg                                 SRC0_ABS(0),
1512921a55d8Smrg                                 SRC1_ABS(0),
1513921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1514921a55d8Smrg                                 UPDATE_PRED(0),
1515921a55d8Smrg                                 WRITE_MASK(1),
1516921a55d8Smrg                                 FOG_MERGE(0),
1517921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1518921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1519921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1520921a55d8Smrg                                 DST_GPR(3),
1521921a55d8Smrg                                 DST_REL(ABSOLUTE),
1522921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1523921a55d8Smrg                                 CLAMP(0));
1524921a55d8Smrg
1525921a55d8Smrg    /* 20 srcY.z DOT4 - mask */
1526921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1527921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1528921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1529921a55d8Smrg                             SRC0_NEG(0),
1530921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1531921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1532921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1533921a55d8Smrg                             SRC1_NEG(0),
1534921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1535921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1536921a55d8Smrg                             LAST(0));
1537921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1538921a55d8Smrg                                 SRC0_ABS(0),
1539921a55d8Smrg                                 SRC1_ABS(0),
1540921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1541921a55d8Smrg                                 UPDATE_PRED(0),
1542921a55d8Smrg                                 WRITE_MASK(0),
1543921a55d8Smrg                                 FOG_MERGE(0),
1544921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1545921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1546921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1547921a55d8Smrg                                 DST_GPR(3),
1548921a55d8Smrg                                 DST_REL(ABSOLUTE),
1549921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1550921a55d8Smrg                                 CLAMP(0));
1551921a55d8Smrg
1552921a55d8Smrg    /* 21 srcY.w DOT4 - mask */
1553921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1554921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1555921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1556921a55d8Smrg                             SRC0_NEG(0),
1557921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1558921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1559921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1560921a55d8Smrg                             SRC1_NEG(0),
1561921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1562921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1563921a55d8Smrg                             LAST(1));
1564921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1565921a55d8Smrg                                 SRC0_ABS(0),
1566921a55d8Smrg                                 SRC1_ABS(0),
1567921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1568921a55d8Smrg                                 UPDATE_PRED(0),
1569921a55d8Smrg                                 WRITE_MASK(0),
1570921a55d8Smrg                                 FOG_MERGE(0),
1571921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1572921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1573921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1574921a55d8Smrg                                 DST_GPR(3),
1575921a55d8Smrg                                 DST_REL(ABSOLUTE),
1576921a55d8Smrg                                 DST_ELEM(ELEM_W),
1577921a55d8Smrg                                 CLAMP(0));
1578921a55d8Smrg
1579921a55d8Smrg    /* 22 maskX.x DOT4 - mask */
1580921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1581ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1582ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1583ad43ddacSmrg                             SRC0_NEG(0),
1584921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1585ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1586ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1587ad43ddacSmrg                             SRC1_NEG(0),
1588ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1589ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1590921a55d8Smrg                             LAST(0));
1591921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1592921a55d8Smrg                                 SRC0_ABS(0),
1593921a55d8Smrg                                 SRC1_ABS(0),
1594921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1595921a55d8Smrg                                 UPDATE_PRED(0),
1596921a55d8Smrg                                 WRITE_MASK(1),
1597921a55d8Smrg                                 FOG_MERGE(0),
1598921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1599921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1600ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1601921a55d8Smrg                                 DST_GPR(4),
1602ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1603921a55d8Smrg                                 DST_ELEM(ELEM_X),
1604ad43ddacSmrg                                 CLAMP(0));
1605ad43ddacSmrg
1606921a55d8Smrg    /* 23 maskX.y DOT4 - mask */
1607921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1608ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1609ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1610ad43ddacSmrg                             SRC0_NEG(0),
1611921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1612ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1613ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1614ad43ddacSmrg                             SRC1_NEG(0),
1615ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1616ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1617921a55d8Smrg                             LAST(0));
1618921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1619921a55d8Smrg                                 SRC0_ABS(0),
1620921a55d8Smrg                                 SRC1_ABS(0),
1621921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1622921a55d8Smrg                                 UPDATE_PRED(0),
1623921a55d8Smrg                                 WRITE_MASK(0),
1624921a55d8Smrg                                 FOG_MERGE(0),
1625921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1626921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1627ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1628921a55d8Smrg                                 DST_GPR(4),
1629921a55d8Smrg                                 DST_REL(ABSOLUTE),
1630921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1631921a55d8Smrg                                 CLAMP(0));
1632921a55d8Smrg
1633921a55d8Smrg    /* 24 maskX.z DOT4 - mask */
1634921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1635921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1636921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1637921a55d8Smrg                             SRC0_NEG(0),
1638921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1639921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1640921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1641921a55d8Smrg                             SRC1_NEG(0),
1642921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1643921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1644921a55d8Smrg                             LAST(0));
1645921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1646921a55d8Smrg                                 SRC0_ABS(0),
1647921a55d8Smrg                                 SRC1_ABS(0),
1648921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1649921a55d8Smrg                                 UPDATE_PRED(0),
1650921a55d8Smrg                                 WRITE_MASK(0),
1651921a55d8Smrg                                 FOG_MERGE(0),
1652921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1653921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1654921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1655921a55d8Smrg                                 DST_GPR(4),
1656ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1657ad43ddacSmrg                                 DST_ELEM(ELEM_Z),
1658ad43ddacSmrg                                 CLAMP(0));
1659ad43ddacSmrg
1660921a55d8Smrg    /* 25 maskX.w DOT4 - mask */
1661921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1662ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1663921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1664ad43ddacSmrg                             SRC0_NEG(0),
1665921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1666ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1667921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1668ad43ddacSmrg                             SRC1_NEG(0),
1669ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1670ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1671ad43ddacSmrg                             LAST(1));
1672921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1673921a55d8Smrg                                 SRC0_ABS(0),
1674921a55d8Smrg                                 SRC1_ABS(0),
1675921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1676921a55d8Smrg                                 UPDATE_PRED(0),
1677921a55d8Smrg                                 WRITE_MASK(0),
1678921a55d8Smrg                                 FOG_MERGE(0),
1679921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1680921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1681ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1682921a55d8Smrg                                 DST_GPR(4),
1683ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1684ad43ddacSmrg                                 DST_ELEM(ELEM_W),
1685ad43ddacSmrg                                 CLAMP(0));
1686ad43ddacSmrg
1687921a55d8Smrg    /* 26 maskY.x DOT4 - mask */
1688921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1689ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1690ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1691ad43ddacSmrg                             SRC0_NEG(0),
1692921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1693ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1694ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1695ad43ddacSmrg                             SRC1_NEG(0),
1696ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1697ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1698ad43ddacSmrg                             LAST(0));
1699921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1700921a55d8Smrg                                 SRC0_ABS(0),
1701921a55d8Smrg                                 SRC1_ABS(0),
1702921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1703921a55d8Smrg                                 UPDATE_PRED(0),
1704921a55d8Smrg                                 WRITE_MASK(0),
1705921a55d8Smrg                                 FOG_MERGE(0),
1706921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1707921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1708ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1709921a55d8Smrg                                 DST_GPR(4),
1710ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1711ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1712ad43ddacSmrg                                 CLAMP(0));
1713921a55d8Smrg
1714921a55d8Smrg    /* 27 maskY.y DOT4 - mask */
1715921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1716ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1717921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1718ad43ddacSmrg                             SRC0_NEG(0),
1719921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1720ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1721921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1722ad43ddacSmrg                             SRC1_NEG(0),
1723ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1724ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1725921a55d8Smrg                             LAST(0));
1726921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1727921a55d8Smrg                                 SRC0_ABS(0),
1728921a55d8Smrg                                 SRC1_ABS(0),
1729921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1730921a55d8Smrg                                 UPDATE_PRED(0),
1731921a55d8Smrg                                 WRITE_MASK(1),
1732921a55d8Smrg                                 FOG_MERGE(0),
1733921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1734921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1735ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1736921a55d8Smrg                                 DST_GPR(4),
1737ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1738ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1739ad43ddacSmrg                                 CLAMP(0));
1740ad43ddacSmrg
1741921a55d8Smrg    /* 28 maskY.z DOT4 - mask */
1742921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1743921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1744921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1745921a55d8Smrg                             SRC0_NEG(0),
1746921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1747921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1748921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1749921a55d8Smrg                             SRC1_NEG(0),
1750921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1751921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1752921a55d8Smrg                             LAST(0));
1753921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1754921a55d8Smrg                                 SRC0_ABS(0),
1755921a55d8Smrg                                 SRC1_ABS(0),
1756921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1757921a55d8Smrg                                 UPDATE_PRED(0),
1758921a55d8Smrg                                 WRITE_MASK(0),
1759921a55d8Smrg                                 FOG_MERGE(0),
1760921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1761921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1762921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1763921a55d8Smrg                                 DST_GPR(4),
1764921a55d8Smrg                                 DST_REL(ABSOLUTE),
1765921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1766921a55d8Smrg                                 CLAMP(0));
1767921a55d8Smrg
1768921a55d8Smrg    /* 29 maskY.w DOT4 - mask */
1769921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1770921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1771921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1772921a55d8Smrg                             SRC0_NEG(0),
1773921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1774921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1775921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1776921a55d8Smrg                             SRC1_NEG(0),
1777921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1778921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1779921a55d8Smrg                             LAST(1));
1780921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1781921a55d8Smrg                                 SRC0_ABS(0),
1782921a55d8Smrg                                 SRC1_ABS(0),
1783921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1784921a55d8Smrg                                 UPDATE_PRED(0),
1785921a55d8Smrg                                 WRITE_MASK(0),
1786921a55d8Smrg                                 FOG_MERGE(0),
1787921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1788921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1789921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1790921a55d8Smrg                                 DST_GPR(4),
1791921a55d8Smrg                                 DST_REL(ABSOLUTE),
1792921a55d8Smrg                                 DST_ELEM(ELEM_W),
1793921a55d8Smrg                                 CLAMP(0));
1794921a55d8Smrg
1795921a55d8Smrg    /* 30 srcX / w */
1796921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1797ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1798ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1799ad43ddacSmrg                             SRC0_NEG(0),
1800921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1801ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1802ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1803ad43ddacSmrg                             SRC1_NEG(0),
1804ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1805ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1806ad43ddacSmrg                             LAST(1));
1807ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1808ad43ddacSmrg                                 SRC0_ABS(0),
1809ad43ddacSmrg                                 SRC1_ABS(0),
1810ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1811ad43ddacSmrg                                 UPDATE_PRED(0),
1812ad43ddacSmrg                                 WRITE_MASK(1),
1813ad43ddacSmrg                                 FOG_MERGE(0),
1814ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1815ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1816ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1817ad43ddacSmrg                                 DST_GPR(1),
1818ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1819ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1820ad43ddacSmrg                                 CLAMP(0));
1821ad43ddacSmrg
1822921a55d8Smrg    /* 31 srcY / h */
1823921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1824ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1825ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1826ad43ddacSmrg                             SRC0_NEG(0),
1827921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1828ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1829ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1830ad43ddacSmrg                             SRC1_NEG(0),
1831ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1832ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1833ad43ddacSmrg                             LAST(1));
1834ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1835ad43ddacSmrg                                 SRC0_ABS(0),
1836ad43ddacSmrg                                 SRC1_ABS(0),
1837ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1838ad43ddacSmrg                                 UPDATE_PRED(0),
1839ad43ddacSmrg                                 WRITE_MASK(1),
1840ad43ddacSmrg                                 FOG_MERGE(0),
1841ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1842ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1843ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1844ad43ddacSmrg                                 DST_GPR(1),
1845ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1846ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1847ad43ddacSmrg                                 CLAMP(0));
1848ad43ddacSmrg
1849921a55d8Smrg    /* 32 maskX / w */
1850921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1851ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1852ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1853ad43ddacSmrg                             SRC0_NEG(0),
1854921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1855ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1856ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1857ad43ddacSmrg                             SRC1_NEG(0),
1858ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1859ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1860ad43ddacSmrg                             LAST(1));
1861ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1862ad43ddacSmrg                                 SRC0_ABS(0),
1863ad43ddacSmrg                                 SRC1_ABS(0),
1864ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1865ad43ddacSmrg                                 UPDATE_PRED(0),
1866ad43ddacSmrg                                 WRITE_MASK(1),
1867ad43ddacSmrg                                 FOG_MERGE(0),
1868ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1869ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1870ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1871ad43ddacSmrg                                 DST_GPR(0),
1872ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1873ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1874ad43ddacSmrg                                 CLAMP(0));
1875ad43ddacSmrg
1876921a55d8Smrg    /* 33 maskY / h */
1877921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1878ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1879ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1880ad43ddacSmrg                             SRC0_NEG(0),
1881921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1882ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1883ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1884ad43ddacSmrg                             SRC1_NEG(0),
1885ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1886ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1887ad43ddacSmrg                             LAST(1));
1888ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1889ad43ddacSmrg                                 SRC0_ABS(0),
1890ad43ddacSmrg                                 SRC1_ABS(0),
1891ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1892ad43ddacSmrg                                 UPDATE_PRED(0),
1893ad43ddacSmrg                                 WRITE_MASK(1),
1894ad43ddacSmrg                                 FOG_MERGE(0),
1895ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1896ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1897ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1898ad43ddacSmrg                                 DST_GPR(0),
1899ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1900ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1901ad43ddacSmrg                                 CLAMP(0));
1902ad43ddacSmrg
1903921a55d8Smrg    /* 34 srcX.x DOT4 - non-mask */
1904921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1905921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1906921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1907921a55d8Smrg                             SRC0_NEG(0),
1908921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1909921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1910921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1911921a55d8Smrg                             SRC1_NEG(0),
1912921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1913921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1914921a55d8Smrg                             LAST(0));
1915921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1916921a55d8Smrg                                 SRC0_ABS(0),
1917921a55d8Smrg                                 SRC1_ABS(0),
1918921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1919921a55d8Smrg                                 UPDATE_PRED(0),
1920921a55d8Smrg                                 WRITE_MASK(1),
1921921a55d8Smrg                                 FOG_MERGE(0),
1922921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1923921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1924921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1925921a55d8Smrg                                 DST_GPR(2),
1926921a55d8Smrg                                 DST_REL(ABSOLUTE),
1927921a55d8Smrg                                 DST_ELEM(ELEM_X),
1928921a55d8Smrg                                 CLAMP(0));
1929921a55d8Smrg
1930921a55d8Smrg    /* 35 srcX.y DOT4 - non-mask */
1931921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1932ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1933ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1934ad43ddacSmrg                             SRC0_NEG(0),
1935921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1936ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1937ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1938ad43ddacSmrg                             SRC1_NEG(0),
1939ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1940ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1941921a55d8Smrg                             LAST(0));
1942921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1943921a55d8Smrg                                 SRC0_ABS(0),
1944921a55d8Smrg                                 SRC1_ABS(0),
1945921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1946921a55d8Smrg                                 UPDATE_PRED(0),
1947921a55d8Smrg                                 WRITE_MASK(0),
1948921a55d8Smrg                                 FOG_MERGE(0),
1949921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1950921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1951ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1952921a55d8Smrg                                 DST_GPR(2),
1953921a55d8Smrg                                 DST_REL(ABSOLUTE),
1954921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1955921a55d8Smrg                                 CLAMP(0));
1956921a55d8Smrg
1957921a55d8Smrg    /* 36 srcX.z DOT4 - non-mask */
1958921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1959921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1960921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1961921a55d8Smrg                             SRC0_NEG(0),
1962921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1963921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1964921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1965921a55d8Smrg                             SRC1_NEG(0),
1966921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1967921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1968921a55d8Smrg                             LAST(0));
1969921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1970921a55d8Smrg                                 SRC0_ABS(0),
1971921a55d8Smrg                                 SRC1_ABS(0),
1972921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1973921a55d8Smrg                                 UPDATE_PRED(0),
1974921a55d8Smrg                                 WRITE_MASK(0),
1975921a55d8Smrg                                 FOG_MERGE(0),
1976921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1977921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1978921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1979921a55d8Smrg                                 DST_GPR(2),
1980ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1981ad43ddacSmrg                                 DST_ELEM(ELEM_Z),
1982ad43ddacSmrg                                 CLAMP(0));
1983921a55d8Smrg
1984921a55d8Smrg    /* 37 srcX.w DOT4 - non-mask */
1985921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1986ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1987921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1988ad43ddacSmrg                             SRC0_NEG(0),
1989921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1990ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1991921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1992ad43ddacSmrg                             SRC1_NEG(0),
1993ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1994ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1995ad43ddacSmrg                             LAST(1));
1996921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1997921a55d8Smrg                                 SRC0_ABS(0),
1998921a55d8Smrg                                 SRC1_ABS(0),
1999921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2000921a55d8Smrg                                 UPDATE_PRED(0),
2001921a55d8Smrg                                 WRITE_MASK(0),
2002921a55d8Smrg                                 FOG_MERGE(0),
2003921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2004921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2005ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2006921a55d8Smrg                                 DST_GPR(2),
2007ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2008ad43ddacSmrg                                 DST_ELEM(ELEM_W),
2009ad43ddacSmrg                                 CLAMP(0));
2010ad43ddacSmrg
2011921a55d8Smrg    /* 38 srcY.x DOT4 - non-mask */
2012921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2013ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2014ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
2015ad43ddacSmrg                             SRC0_NEG(0),
2016921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2017ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2018ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
2019ad43ddacSmrg                             SRC1_NEG(0),
2020ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
2021ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2022ad43ddacSmrg                             LAST(0));
2023921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2024921a55d8Smrg                                 SRC0_ABS(0),
2025921a55d8Smrg                                 SRC1_ABS(0),
2026921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2027921a55d8Smrg                                 UPDATE_PRED(0),
2028921a55d8Smrg                                 WRITE_MASK(0),
2029921a55d8Smrg                                 FOG_MERGE(0),
2030921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2031921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2032ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2033921a55d8Smrg                                 DST_GPR(2),
2034ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2035ad43ddacSmrg                                 DST_ELEM(ELEM_X),
2036ad43ddacSmrg                                 CLAMP(0));
2037921a55d8Smrg
2038921a55d8Smrg    /* 39 srcY.y DOT4 - non-mask */
2039921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2040ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2041921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
2042ad43ddacSmrg                             SRC0_NEG(0),
2043921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2044ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2045921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
2046ad43ddacSmrg                             SRC1_NEG(0),
2047ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
2048ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2049921a55d8Smrg                             LAST(0));
2050921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2051921a55d8Smrg                                 SRC0_ABS(0),
2052921a55d8Smrg                                 SRC1_ABS(0),
2053921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2054921a55d8Smrg                                 UPDATE_PRED(0),
2055921a55d8Smrg                                 WRITE_MASK(1),
2056921a55d8Smrg                                 FOG_MERGE(0),
2057921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2058921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2059ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2060921a55d8Smrg                                 DST_GPR(2),
2061ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2062ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
2063ad43ddacSmrg                                 CLAMP(0));
2064921a55d8Smrg
2065921a55d8Smrg    /* 40 srcY.z DOT4 - non-mask */
2066921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2067921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2068921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
2069921a55d8Smrg                             SRC0_NEG(0),
2070921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2071921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2072921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
2073921a55d8Smrg                             SRC1_NEG(0),
2074921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2075921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2076921a55d8Smrg                             LAST(0));
2077921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2078921a55d8Smrg                                 SRC0_ABS(0),
2079921a55d8Smrg                                 SRC1_ABS(0),
2080921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2081921a55d8Smrg                                 UPDATE_PRED(0),
2082921a55d8Smrg                                 WRITE_MASK(0),
2083921a55d8Smrg                                 FOG_MERGE(0),
2084921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2085921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2086921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2087921a55d8Smrg                                 DST_GPR(2),
2088921a55d8Smrg                                 DST_REL(ABSOLUTE),
2089921a55d8Smrg                                 DST_ELEM(ELEM_Z),
2090921a55d8Smrg                                 CLAMP(0));
2091921a55d8Smrg
2092921a55d8Smrg    /* 41 srcY.w DOT4 - non-mask */
2093921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2094921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2095921a55d8Smrg                             SRC0_ELEM(ELEM_W),
2096921a55d8Smrg                             SRC0_NEG(0),
2097921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2098921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2099921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2100921a55d8Smrg                             SRC1_NEG(0),
2101921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2102921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2103921a55d8Smrg                             LAST(1));
2104921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2105921a55d8Smrg                                 SRC0_ABS(0),
2106921a55d8Smrg                                 SRC1_ABS(0),
2107921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2108921a55d8Smrg                                 UPDATE_PRED(0),
2109921a55d8Smrg                                 WRITE_MASK(0),
2110921a55d8Smrg                                 FOG_MERGE(0),
2111921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2112921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2113921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2114921a55d8Smrg                                 DST_GPR(2),
2115921a55d8Smrg                                 DST_REL(ABSOLUTE),
2116921a55d8Smrg                                 DST_ELEM(ELEM_W),
2117921a55d8Smrg                                 CLAMP(0));
2118921a55d8Smrg
2119921a55d8Smrg    /* 42 srcX / w */
2120921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2121ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2122ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
2123ad43ddacSmrg                             SRC0_NEG(0),
2124921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
2125ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2126ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
2127ad43ddacSmrg                             SRC1_NEG(0),
2128ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
2129ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2130ad43ddacSmrg                             LAST(1));
2131ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2132ad43ddacSmrg                                 SRC0_ABS(0),
2133ad43ddacSmrg                                 SRC1_ABS(0),
2134ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
2135ad43ddacSmrg                                 UPDATE_PRED(0),
2136ad43ddacSmrg                                 WRITE_MASK(1),
2137ad43ddacSmrg                                 FOG_MERGE(0),
2138ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
2139ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
2140ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2141ad43ddacSmrg                                 DST_GPR(0),
2142ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2143ad43ddacSmrg                                 DST_ELEM(ELEM_X),
2144ad43ddacSmrg                                 CLAMP(0));
2145ad43ddacSmrg
2146921a55d8Smrg    /* 43 srcY / h */
2147921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2148ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2149ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
2150ad43ddacSmrg                             SRC0_NEG(0),
2151921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2152ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2153ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
2154ad43ddacSmrg                             SRC1_NEG(0),
2155ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
2156ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2157ad43ddacSmrg                             LAST(1));
2158ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2159ad43ddacSmrg                                 SRC0_ABS(0),
2160ad43ddacSmrg                                 SRC1_ABS(0),
2161ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
2162ad43ddacSmrg                                 UPDATE_PRED(0),
2163ad43ddacSmrg                                 WRITE_MASK(1),
2164ad43ddacSmrg                                 FOG_MERGE(0),
2165ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
2166ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
2167ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2168ad43ddacSmrg                                 DST_GPR(0),
2169ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2170ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
2171ad43ddacSmrg                                 CLAMP(0));
2172ad43ddacSmrg
2173921a55d8Smrg    /* 44/45 - dst - mask */
2174b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2175b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2176b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
2177b7e1c893Smrg			     BUFFER_ID(0),
2178b7e1c893Smrg			     SRC_GPR(0),
2179b7e1c893Smrg			     SRC_REL(ABSOLUTE),
2180b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
21810974d292Smrg			     MEGA_FETCH_COUNT(24));
21820974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2183b7e1c893Smrg				 DST_REL(0),
2184b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
2185b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
2186b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
2187b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
2188b7e1c893Smrg				 USE_CONST_FIELDS(0),
2189ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2190ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2191ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2192b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2193b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2194b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
2195b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
2196b7e1c893Smrg			     MEGA_FETCH(1));
2197b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
2198921a55d8Smrg    /* 46/47 - src */
2199b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2200b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2201b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
2202b7e1c893Smrg			     BUFFER_ID(0),
2203b7e1c893Smrg			     SRC_GPR(0),
2204b7e1c893Smrg			     SRC_REL(ABSOLUTE),
2205b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
2206b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
22070974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2208b7e1c893Smrg				 DST_REL(0),
2209b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
2210b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
2211ad43ddacSmrg				 DST_SEL_Z(SQ_SEL_1),
2212ad43ddacSmrg				 DST_SEL_W(SQ_SEL_0),
2213b7e1c893Smrg				 USE_CONST_FIELDS(0),
2214ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2215ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2216ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2217b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2218b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2219b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
2220b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
2221b7e1c893Smrg			     MEGA_FETCH(0));
2222b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
2223921a55d8Smrg    /* 48/49 - mask */
22240974d292Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
22250974d292Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
22260974d292Smrg			     FETCH_WHOLE_QUAD(0),
22270974d292Smrg			     BUFFER_ID(0),
22280974d292Smrg			     SRC_GPR(0),
22290974d292Smrg			     SRC_REL(ABSOLUTE),
22300974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
22310974d292Smrg			     MEGA_FETCH_COUNT(8));
22320974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
22330974d292Smrg				 DST_REL(0),
22340974d292Smrg				 DST_SEL_X(SQ_SEL_X),
22350974d292Smrg				 DST_SEL_Y(SQ_SEL_Y),
22360974d292Smrg				 DST_SEL_Z(SQ_SEL_1),
22370974d292Smrg				 DST_SEL_W(SQ_SEL_0),
22380974d292Smrg				 USE_CONST_FIELDS(0),
22390974d292Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
22400974d292Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
22410974d292Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
22420974d292Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
22430974d292Smrg    shader[i++] = VTX_DWORD2(OFFSET(16),
22440974d292Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
22450974d292Smrg			     CONST_BUF_NO_STRIDE(0),
22460974d292Smrg			     MEGA_FETCH(0));
22470974d292Smrg    shader[i++] = VTX_DWORD_PAD;
2248b7e1c893Smrg
2249921a55d8Smrg    /* 50/51 - dst - non-mask */
22500974d292Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
22510974d292Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
22520974d292Smrg			     FETCH_WHOLE_QUAD(0),
22530974d292Smrg			     BUFFER_ID(0),
22540974d292Smrg			     SRC_GPR(0),
22550974d292Smrg			     SRC_REL(ABSOLUTE),
22560974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
22570974d292Smrg			     MEGA_FETCH_COUNT(16));
22580974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
22590974d292Smrg				 DST_REL(0),
22600974d292Smrg				 DST_SEL_X(SQ_SEL_X),
22610974d292Smrg				 DST_SEL_Y(SQ_SEL_Y),
22620974d292Smrg				 DST_SEL_Z(SQ_SEL_0),
22630974d292Smrg				 DST_SEL_W(SQ_SEL_1),
22640974d292Smrg				 USE_CONST_FIELDS(0),
22650974d292Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
22660974d292Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
22670974d292Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
22680974d292Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
22690974d292Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
22700974d292Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
22710974d292Smrg			     CONST_BUF_NO_STRIDE(0),
22720974d292Smrg			     MEGA_FETCH(1));
22730974d292Smrg    shader[i++] = VTX_DWORD_PAD;
2274921a55d8Smrg    /* 52/53 - src */
22750974d292Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
22760974d292Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
22770974d292Smrg			     FETCH_WHOLE_QUAD(0),
22780974d292Smrg			     BUFFER_ID(0),
22790974d292Smrg			     SRC_GPR(0),
22800974d292Smrg			     SRC_REL(ABSOLUTE),
22810974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
22820974d292Smrg			     MEGA_FETCH_COUNT(8));
22830974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
22840974d292Smrg				 DST_REL(0),
22850974d292Smrg				 DST_SEL_X(SQ_SEL_X),
22860974d292Smrg				 DST_SEL_Y(SQ_SEL_Y),
22870974d292Smrg				 DST_SEL_Z(SQ_SEL_1),
22880974d292Smrg				 DST_SEL_W(SQ_SEL_0),
22890974d292Smrg				 USE_CONST_FIELDS(0),
22900974d292Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
22910974d292Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
22920974d292Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
22930974d292Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
22940974d292Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
22950974d292Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
22960974d292Smrg			     CONST_BUF_NO_STRIDE(0),
22970974d292Smrg			     MEGA_FETCH(0));
22980974d292Smrg    shader[i++] = VTX_DWORD_PAD;
22990974d292Smrg
23000974d292Smrg    return i;
23010974d292Smrg}
23020974d292Smrg
23030974d292Smrg/* comp ps --------------------------------------- */
23040974d292Smrgint R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
23050974d292Smrg{
23060974d292Smrg    int i = 0;
23070974d292Smrg
23080974d292Smrg    /* 0 */
23090974d292Smrg    shader[i++] = CF_DWORD0(ADDR(3));
23100974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23110974d292Smrg                            CF_CONST(0),
23120974d292Smrg                            COND(SQ_CF_COND_BOOL),
23130974d292Smrg                            I_COUNT(0),
23140974d292Smrg                            CALL_COUNT(0),
23150974d292Smrg                            END_OF_PROGRAM(0),
23160974d292Smrg                            VALID_PIXEL_MODE(0),
23170974d292Smrg                            CF_INST(SQ_CF_INST_CALL),
23180974d292Smrg                            WHOLE_QUAD_MODE(0),
23190974d292Smrg                            BARRIER(0));
23200974d292Smrg    /* 1 */
23210974d292Smrg    shader[i++] = CF_DWORD0(ADDR(7));
23220974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23230974d292Smrg                            CF_CONST(0),
23240974d292Smrg                            COND(SQ_CF_COND_NOT_BOOL),
23250974d292Smrg                            I_COUNT(0),
23260974d292Smrg                            CALL_COUNT(0),
23270974d292Smrg                            END_OF_PROGRAM(0),
23280974d292Smrg                            VALID_PIXEL_MODE(0),
23290974d292Smrg                            CF_INST(SQ_CF_INST_CALL),
23300974d292Smrg                            WHOLE_QUAD_MODE(0),
23310974d292Smrg                            BARRIER(0));
23320974d292Smrg    /* 2 */
23330974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
23340974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23350974d292Smrg                            CF_CONST(0),
23360974d292Smrg                            COND(SQ_CF_COND_ACTIVE),
23370974d292Smrg                            I_COUNT(0),
23380974d292Smrg                            CALL_COUNT(0),
23390974d292Smrg                            END_OF_PROGRAM(1),
23400974d292Smrg                            VALID_PIXEL_MODE(0),
23410974d292Smrg                            CF_INST(SQ_CF_INST_NOP),
23420974d292Smrg                            WHOLE_QUAD_MODE(0),
23430974d292Smrg                            BARRIER(1));
23440974d292Smrg
23450974d292Smrg    /* 3 - mask sub */
23460974d292Smrg    shader[i++] = CF_DWORD0(ADDR(14));
23470974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23480974d292Smrg			    CF_CONST(0),
23490974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
23500974d292Smrg			    I_COUNT(2),
23510974d292Smrg			    CALL_COUNT(0),
23520974d292Smrg			    END_OF_PROGRAM(0),
23530974d292Smrg			    VALID_PIXEL_MODE(0),
23540974d292Smrg			    CF_INST(SQ_CF_INST_TEX),
23550974d292Smrg			    WHOLE_QUAD_MODE(0),
23560974d292Smrg			    BARRIER(1));
23570974d292Smrg
23580974d292Smrg    /* 4 */
23590974d292Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(10),
23600974d292Smrg				KCACHE_BANK0(0),
23610974d292Smrg				KCACHE_BANK1(0),
23620974d292Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
23630974d292Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
23640974d292Smrg				KCACHE_ADDR0(0),
23650974d292Smrg				KCACHE_ADDR1(0),
23660974d292Smrg				I_COUNT(4),
23670974d292Smrg				USES_WATERFALL(0),
23680974d292Smrg				CF_INST(SQ_CF_INST_ALU),
23690974d292Smrg				WHOLE_QUAD_MODE(0),
23700974d292Smrg				BARRIER(1));
23710974d292Smrg
23720974d292Smrg    /* 5 */
23730974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
23740974d292Smrg					  TYPE(SQ_EXPORT_PIXEL),
23750974d292Smrg					  RW_GPR(2),
23760974d292Smrg					  RW_REL(ABSOLUTE),
23770974d292Smrg					  INDEX_GPR(0),
23780974d292Smrg					  ELEM_SIZE(1));
23790974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
23800974d292Smrg					       SRC_SEL_Y(SQ_SEL_Y),
23810974d292Smrg					       SRC_SEL_Z(SQ_SEL_Z),
23820974d292Smrg					       SRC_SEL_W(SQ_SEL_W),
23830974d292Smrg					       R6xx_ELEM_LOOP(0),
23840974d292Smrg					       BURST_COUNT(1),
23850974d292Smrg					       END_OF_PROGRAM(0),
23860974d292Smrg					       VALID_PIXEL_MODE(0),
23870974d292Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
23880974d292Smrg					       WHOLE_QUAD_MODE(0),
23890974d292Smrg					       BARRIER(1));
23900974d292Smrg    /* 6 */
23910974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
23920974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23930974d292Smrg			    CF_CONST(0),
23940974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
23950974d292Smrg			    I_COUNT(0),
23960974d292Smrg			    CALL_COUNT(0),
23970974d292Smrg			    END_OF_PROGRAM(0),
23980974d292Smrg			    VALID_PIXEL_MODE(0),
23990974d292Smrg			    CF_INST(SQ_CF_INST_RETURN),
24000974d292Smrg			    WHOLE_QUAD_MODE(0),
24010974d292Smrg			    BARRIER(1));
24020974d292Smrg
24030974d292Smrg    /* 7 non-mask sub */
24040974d292Smrg    shader[i++] = CF_DWORD0(ADDR(18));
2405b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2406b7e1c893Smrg			    CF_CONST(0),
2407b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
2408b7e1c893Smrg			    I_COUNT(1),
2409b7e1c893Smrg			    CALL_COUNT(0),
2410b7e1c893Smrg			    END_OF_PROGRAM(0),
2411b7e1c893Smrg			    VALID_PIXEL_MODE(0),
2412b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
2413b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
2414b7e1c893Smrg			    BARRIER(1));
24150974d292Smrg    /* 8 */
2416b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2417b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
2418b7e1c893Smrg					  RW_GPR(0),
2419b7e1c893Smrg					  RW_REL(ABSOLUTE),
2420b7e1c893Smrg					  INDEX_GPR(0),
2421b7e1c893Smrg					  ELEM_SIZE(1));
2422b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2423b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
2424b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
2425b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
2426b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
2427b7e1c893Smrg					       BURST_COUNT(1),
24280974d292Smrg					       END_OF_PROGRAM(0),
2429b7e1c893Smrg					       VALID_PIXEL_MODE(0),
2430b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2431b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
2432b7e1c893Smrg					       BARRIER(1));
24330974d292Smrg    /* 9 */
24340974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
24350974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
24360974d292Smrg			    CF_CONST(0),
24370974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
24380974d292Smrg			    I_COUNT(0),
24390974d292Smrg			    CALL_COUNT(0),
24400974d292Smrg			    END_OF_PROGRAM(0),
24410974d292Smrg			    VALID_PIXEL_MODE(0),
24420974d292Smrg			    CF_INST(SQ_CF_INST_RETURN),
24430974d292Smrg			    WHOLE_QUAD_MODE(0),
24440974d292Smrg			    BARRIER(1));
24450974d292Smrg
24460974d292Smrg    /* 10 - alu 0 */
24470974d292Smrg    /* MUL gpr[2].x gpr[1].x gpr[0].x */
2448921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
24490974d292Smrg			     SRC0_REL(ABSOLUTE),
24500974d292Smrg			     SRC0_ELEM(ELEM_X),
24510974d292Smrg			     SRC0_NEG(0),
2452921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
24530974d292Smrg			     SRC1_REL(ABSOLUTE),
24540974d292Smrg			     SRC1_ELEM(ELEM_X),
24550974d292Smrg			     SRC1_NEG(0),
24560974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
24570974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
24580974d292Smrg			     LAST(0));
24590974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
24600974d292Smrg				 SRC0_ABS(0),
24610974d292Smrg				 SRC1_ABS(0),
24620974d292Smrg				 UPDATE_EXECUTE_MASK(0),
24630974d292Smrg				 UPDATE_PRED(0),
24640974d292Smrg				 WRITE_MASK(1),
24650974d292Smrg				 FOG_MERGE(0),
24660974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
24670974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
24680974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
24690974d292Smrg				 DST_GPR(2),
24700974d292Smrg				 DST_REL(ABSOLUTE),
24710974d292Smrg				 DST_ELEM(ELEM_X),
24720974d292Smrg				 CLAMP(1));
24730974d292Smrg    /* 11 - alu 1 */
24740974d292Smrg    /* MUL gpr[2].y gpr[1].y gpr[0].y */
2475921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
24760974d292Smrg			     SRC0_REL(ABSOLUTE),
24770974d292Smrg			     SRC0_ELEM(ELEM_Y),
24780974d292Smrg			     SRC0_NEG(0),
2479921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
24800974d292Smrg			     SRC1_REL(ABSOLUTE),
24810974d292Smrg			     SRC1_ELEM(ELEM_Y),
24820974d292Smrg			     SRC1_NEG(0),
24830974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
24840974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
24850974d292Smrg			     LAST(0));
24860974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
24870974d292Smrg				 SRC0_ABS(0),
24880974d292Smrg				 SRC1_ABS(0),
24890974d292Smrg				 UPDATE_EXECUTE_MASK(0),
24900974d292Smrg				 UPDATE_PRED(0),
24910974d292Smrg				 WRITE_MASK(1),
24920974d292Smrg				 FOG_MERGE(0),
24930974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
24940974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
24950974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
24960974d292Smrg				 DST_GPR(2),
24970974d292Smrg				 DST_REL(ABSOLUTE),
24980974d292Smrg				 DST_ELEM(ELEM_Y),
24990974d292Smrg				 CLAMP(1));
25000974d292Smrg    /* 12 - alu 2 */
25010974d292Smrg    /* MUL gpr[2].z gpr[1].z gpr[0].z */
2502921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
25030974d292Smrg			     SRC0_REL(ABSOLUTE),
25040974d292Smrg			     SRC0_ELEM(ELEM_Z),
25050974d292Smrg			     SRC0_NEG(0),
2506921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
25070974d292Smrg			     SRC1_REL(ABSOLUTE),
25080974d292Smrg			     SRC1_ELEM(ELEM_Z),
25090974d292Smrg			     SRC1_NEG(0),
25100974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
25110974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
25120974d292Smrg			     LAST(0));
25130974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
25140974d292Smrg				 SRC0_ABS(0),
25150974d292Smrg				 SRC1_ABS(0),
25160974d292Smrg				 UPDATE_EXECUTE_MASK(0),
25170974d292Smrg				 UPDATE_PRED(0),
25180974d292Smrg				 WRITE_MASK(1),
25190974d292Smrg				 FOG_MERGE(0),
25200974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
25210974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
25220974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
25230974d292Smrg				 DST_GPR(2),
25240974d292Smrg				 DST_REL(ABSOLUTE),
25250974d292Smrg				 DST_ELEM(ELEM_Z),
25260974d292Smrg				 CLAMP(1));
25270974d292Smrg    /* 13 - alu 3 */
25280974d292Smrg    /* MUL gpr[2].w gpr[1].w gpr[0].w */
2529921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
25300974d292Smrg			     SRC0_REL(ABSOLUTE),
25310974d292Smrg			     SRC0_ELEM(ELEM_W),
25320974d292Smrg			     SRC0_NEG(0),
2533921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
25340974d292Smrg			     SRC1_REL(ABSOLUTE),
25350974d292Smrg			     SRC1_ELEM(ELEM_W),
25360974d292Smrg			     SRC1_NEG(0),
25370974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
25380974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
25390974d292Smrg			     LAST(1));
25400974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
25410974d292Smrg				 SRC0_ABS(0),
25420974d292Smrg				 SRC1_ABS(0),
25430974d292Smrg				 UPDATE_EXECUTE_MASK(0),
25440974d292Smrg				 UPDATE_PRED(0),
25450974d292Smrg				 WRITE_MASK(1),
25460974d292Smrg				 FOG_MERGE(0),
25470974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
25480974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
25490974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
25500974d292Smrg				 DST_GPR(2),
25510974d292Smrg				 DST_REL(ABSOLUTE),
25520974d292Smrg				 DST_ELEM(ELEM_W),
25530974d292Smrg				 CLAMP(1));
2554b7e1c893Smrg
25550974d292Smrg    /* 14/15 - src - mask */
25560974d292Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
25570974d292Smrg			     BC_FRAC_MODE(0),
25580974d292Smrg			     FETCH_WHOLE_QUAD(0),
25590974d292Smrg			     RESOURCE_ID(0),
25600974d292Smrg			     SRC_GPR(0),
25610974d292Smrg			     SRC_REL(ABSOLUTE),
25620974d292Smrg			     R7xx_ALT_CONST(0));
25630974d292Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
25640974d292Smrg			     DST_REL(ABSOLUTE),
25650974d292Smrg			     DST_SEL_X(SQ_SEL_X),
25660974d292Smrg			     DST_SEL_Y(SQ_SEL_Y),
25670974d292Smrg			     DST_SEL_Z(SQ_SEL_Z),
25680974d292Smrg			     DST_SEL_W(SQ_SEL_W),
25690974d292Smrg			     LOD_BIAS(0),
25700974d292Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
25710974d292Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
25720974d292Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
25730974d292Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
25740974d292Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
25750974d292Smrg			     OFFSET_Y(0),
25760974d292Smrg			     OFFSET_Z(0),
25770974d292Smrg			     SAMPLER_ID(0),
25780974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
25790974d292Smrg			     SRC_SEL_Y(SQ_SEL_Y),
25800974d292Smrg			     SRC_SEL_Z(SQ_SEL_0),
25810974d292Smrg			     SRC_SEL_W(SQ_SEL_1));
25820974d292Smrg    shader[i++] = TEX_DWORD_PAD;
25830974d292Smrg    /* 16/17 - mask */
25840974d292Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
25850974d292Smrg			     BC_FRAC_MODE(0),
25860974d292Smrg			     FETCH_WHOLE_QUAD(0),
25870974d292Smrg			     RESOURCE_ID(1),
25880974d292Smrg			     SRC_GPR(1),
25890974d292Smrg			     SRC_REL(ABSOLUTE),
25900974d292Smrg			     R7xx_ALT_CONST(0));
25910974d292Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
25920974d292Smrg			     DST_REL(ABSOLUTE),
25930974d292Smrg			     DST_SEL_X(SQ_SEL_X),
25940974d292Smrg			     DST_SEL_Y(SQ_SEL_Y),
25950974d292Smrg			     DST_SEL_Z(SQ_SEL_Z),
25960974d292Smrg			     DST_SEL_W(SQ_SEL_W),
25970974d292Smrg			     LOD_BIAS(0),
25980974d292Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
25990974d292Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
26000974d292Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
26010974d292Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
26020974d292Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
26030974d292Smrg			     OFFSET_Y(0),
26040974d292Smrg			     OFFSET_Z(0),
26050974d292Smrg			     SAMPLER_ID(1),
26060974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
26070974d292Smrg			     SRC_SEL_Y(SQ_SEL_Y),
26080974d292Smrg			     SRC_SEL_Z(SQ_SEL_0),
26090974d292Smrg			     SRC_SEL_W(SQ_SEL_1));
26100974d292Smrg    shader[i++] = TEX_DWORD_PAD;
2611b7e1c893Smrg
26120974d292Smrg    /* 18/19 - src - non-mask */
2613b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2614b7e1c893Smrg			     BC_FRAC_MODE(0),
2615b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
2616b7e1c893Smrg			     RESOURCE_ID(0),
2617b7e1c893Smrg			     SRC_GPR(0),
2618b7e1c893Smrg			     SRC_REL(ABSOLUTE),
2619b7e1c893Smrg			     R7xx_ALT_CONST(0));
2620b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
2621b7e1c893Smrg			     DST_REL(ABSOLUTE),
2622b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X),
2623b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y),
2624b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z),
2625b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W),
2626b7e1c893Smrg			     LOD_BIAS(0),
2627b7e1c893Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
2628b7e1c893Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
2629b7e1c893Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
2630b7e1c893Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
2631b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2632b7e1c893Smrg			     OFFSET_Y(0),
2633b7e1c893Smrg			     OFFSET_Z(0),
2634b7e1c893Smrg			     SAMPLER_ID(0),
2635b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
2636b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
2637b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
2638b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
2639b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
2640b7e1c893Smrg
2641b7e1c893Smrg    return i;
2642b7e1c893Smrg}
2643