r600_shader.c revision b13dfe66
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "radeon.h"
34b7e1c893Smrg#include "r600_shader.h"
35b7e1c893Smrg#include "r600_reg.h"
36b7e1c893Smrg
37b7e1c893Smrg/* solid vs --------------------------------------- */
38b7e1c893Smrgint R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39b7e1c893Smrg{
40b7e1c893Smrg    int i = 0;
41b7e1c893Smrg
42b7e1c893Smrg    /* 0 */
43b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
44b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
45b7e1c893Smrg			    CF_CONST(0),
46b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
47b7e1c893Smrg			    I_COUNT(1),
48b7e1c893Smrg			    CALL_COUNT(0),
49b7e1c893Smrg			    END_OF_PROGRAM(0),
50b7e1c893Smrg			    VALID_PIXEL_MODE(0),
51b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
52b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
53b7e1c893Smrg			    BARRIER(1));
54b7e1c893Smrg    /* 1 */
55b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
57b7e1c893Smrg					  RW_GPR(1),
58b7e1c893Smrg					  RW_REL(ABSOLUTE),
59b7e1c893Smrg					  INDEX_GPR(0),
60b7e1c893Smrg					  ELEM_SIZE(0));
61b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
63b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
64b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
65b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
66b7e1c893Smrg					       BURST_COUNT(1),
67b7e1c893Smrg					       END_OF_PROGRAM(0),
68b7e1c893Smrg					       VALID_PIXEL_MODE(0),
69b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
71b7e1c893Smrg					       BARRIER(1));
72b7e1c893Smrg    /* 2 - always export a param whether it's used or not */
73b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
75b7e1c893Smrg					  RW_GPR(0),
76b7e1c893Smrg					  RW_REL(ABSOLUTE),
77b7e1c893Smrg					  INDEX_GPR(0),
78b7e1c893Smrg					  ELEM_SIZE(0));
79b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
81b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
82b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
83b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
84b7e1c893Smrg					       BURST_COUNT(0),
85b7e1c893Smrg					       END_OF_PROGRAM(1),
86b7e1c893Smrg					       VALID_PIXEL_MODE(0),
87b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
88b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
89b7e1c893Smrg					       BARRIER(0));
90b7e1c893Smrg    /* 3 - padding */
91b7e1c893Smrg    shader[i++] = 0x00000000;
92b7e1c893Smrg    shader[i++] = 0x00000000;
93b7e1c893Smrg    /* 4/5 */
94b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
97b7e1c893Smrg			     BUFFER_ID(0),
98b7e1c893Smrg			     SRC_GPR(0),
99b7e1c893Smrg			     SRC_REL(ABSOLUTE),
100b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
101b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
102b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103b7e1c893Smrg				 DST_REL(0),
104b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
105b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
106b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
107b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
108b7e1c893Smrg				 USE_CONST_FIELDS(0),
109ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
110ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
111ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
112b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
114b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
115b13dfe66Smrg			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
116b13dfe66Smrg#else
117b13dfe66Smrg			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
118b13dfe66Smrg#endif
119b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
120b7e1c893Smrg			     MEGA_FETCH(1));
121b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
122b7e1c893Smrg
123b7e1c893Smrg    return i;
124b7e1c893Smrg}
125b7e1c893Smrg
126b7e1c893Smrg/* solid ps --------------------------------------- */
127b7e1c893Smrgint R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
128b7e1c893Smrg{
129b7e1c893Smrg    int i = 0;
130b7e1c893Smrg
131b7e1c893Smrg    /* 0 */
132b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(2),
133b7e1c893Smrg				KCACHE_BANK0(0),
134b7e1c893Smrg				KCACHE_BANK1(0),
135b7e1c893Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
136b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
137b7e1c893Smrg				KCACHE_ADDR0(0),
138b7e1c893Smrg				KCACHE_ADDR1(0),
139b7e1c893Smrg				I_COUNT(4),
140b7e1c893Smrg				USES_WATERFALL(0),
141b7e1c893Smrg				CF_INST(SQ_CF_INST_ALU),
142b7e1c893Smrg				WHOLE_QUAD_MODE(0),
143b7e1c893Smrg				BARRIER(1));
144b7e1c893Smrg    /* 1 */
145b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
146b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
147b7e1c893Smrg					  RW_GPR(0),
148b7e1c893Smrg					  RW_REL(ABSOLUTE),
149b7e1c893Smrg					  INDEX_GPR(0),
150b7e1c893Smrg					  ELEM_SIZE(1));
151b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
152b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
153b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
154b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
155b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
156b7e1c893Smrg					       BURST_COUNT(1),
157b7e1c893Smrg					       END_OF_PROGRAM(1),
158b7e1c893Smrg					       VALID_PIXEL_MODE(0),
159b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
160b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
161b7e1c893Smrg					       BARRIER(1));
162b7e1c893Smrg
163b7e1c893Smrg    /* 2 */
164921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
165b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
166b7e1c893Smrg			     SRC0_ELEM(ELEM_X),
167b7e1c893Smrg			     SRC0_NEG(0),
168921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
169b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
170b7e1c893Smrg			     SRC1_ELEM(ELEM_X),
171b7e1c893Smrg			     SRC1_NEG(0),
172b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
173b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
174b7e1c893Smrg			     LAST(0));
175b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
176b7e1c893Smrg				 SRC0_ABS(0),
177b7e1c893Smrg				 SRC1_ABS(0),
178b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
179b7e1c893Smrg				 UPDATE_PRED(0),
180b7e1c893Smrg				 WRITE_MASK(1),
181b7e1c893Smrg				 FOG_MERGE(0),
182b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
183b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
184b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
185b7e1c893Smrg				 DST_GPR(0),
186b7e1c893Smrg				 DST_REL(ABSOLUTE),
187b7e1c893Smrg				 DST_ELEM(ELEM_X),
188b7e1c893Smrg				 CLAMP(1));
189b7e1c893Smrg    /* 3 */
190921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
191b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
192b7e1c893Smrg			     SRC0_ELEM(ELEM_Y),
193b7e1c893Smrg			     SRC0_NEG(0),
194921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
195b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
196b7e1c893Smrg			     SRC1_ELEM(ELEM_Y),
197b7e1c893Smrg			     SRC1_NEG(0),
198b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
199b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
200b7e1c893Smrg			     LAST(0));
201b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
202b7e1c893Smrg				 SRC0_ABS(0),
203b7e1c893Smrg				 SRC1_ABS(0),
204b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
205b7e1c893Smrg				 UPDATE_PRED(0),
206b7e1c893Smrg				 WRITE_MASK(1),
207b7e1c893Smrg				 FOG_MERGE(0),
208b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
209b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
210b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
211b7e1c893Smrg				 DST_GPR(0),
212b7e1c893Smrg				 DST_REL(ABSOLUTE),
213b7e1c893Smrg				 DST_ELEM(ELEM_Y),
214b7e1c893Smrg				 CLAMP(1));
215b7e1c893Smrg    /* 4 */
216921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
217b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
218b7e1c893Smrg			     SRC0_ELEM(ELEM_Z),
219b7e1c893Smrg			     SRC0_NEG(0),
220921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
221b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
222b7e1c893Smrg			     SRC1_ELEM(ELEM_Z),
223b7e1c893Smrg			     SRC1_NEG(0),
224b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
225b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
226b7e1c893Smrg			     LAST(0));
227b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
228b7e1c893Smrg				 SRC0_ABS(0),
229b7e1c893Smrg				 SRC1_ABS(0),
230b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
231b7e1c893Smrg				 UPDATE_PRED(0),
232b7e1c893Smrg				 WRITE_MASK(1),
233b7e1c893Smrg				 FOG_MERGE(0),
234b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
235b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
236b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
237b7e1c893Smrg				 DST_GPR(0),
238b7e1c893Smrg				 DST_REL(ABSOLUTE),
239b7e1c893Smrg				 DST_ELEM(ELEM_Z),
240b7e1c893Smrg				 CLAMP(1));
241b7e1c893Smrg    /* 5 */
242921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
243b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
244b7e1c893Smrg			     SRC0_ELEM(ELEM_W),
245b7e1c893Smrg			     SRC0_NEG(0),
246921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
247b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
248b7e1c893Smrg			     SRC1_ELEM(ELEM_W),
249b7e1c893Smrg			     SRC1_NEG(0),
250b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
251b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
252b7e1c893Smrg			     LAST(1));
253b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
254b7e1c893Smrg				 SRC0_ABS(0),
255b7e1c893Smrg				 SRC1_ABS(0),
256b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
257b7e1c893Smrg				 UPDATE_PRED(0),
258b7e1c893Smrg				 WRITE_MASK(1),
259b7e1c893Smrg				 FOG_MERGE(0),
260b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
261b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
262b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
263b7e1c893Smrg				 DST_GPR(0),
264b7e1c893Smrg				 DST_REL(ABSOLUTE),
265b7e1c893Smrg				 DST_ELEM(ELEM_W),
266b7e1c893Smrg				 CLAMP(1));
267b7e1c893Smrg
268b7e1c893Smrg    return i;
269b7e1c893Smrg}
270b7e1c893Smrg
271b7e1c893Smrg/* copy vs --------------------------------------- */
272b7e1c893Smrgint R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
273b7e1c893Smrg{
274b7e1c893Smrg    int i = 0;
275b7e1c893Smrg
276b7e1c893Smrg    /* 0 */
277b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
278b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
279b7e1c893Smrg			    CF_CONST(0),
280b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
281b7e1c893Smrg			    I_COUNT(2),
282b7e1c893Smrg			    CALL_COUNT(0),
283b7e1c893Smrg			    END_OF_PROGRAM(0),
284b7e1c893Smrg			    VALID_PIXEL_MODE(0),
285b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
286b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
287b7e1c893Smrg			    BARRIER(1));
288b7e1c893Smrg    /* 1 */
289b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
290b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
291b7e1c893Smrg					  RW_GPR(1),
292b7e1c893Smrg					  RW_REL(ABSOLUTE),
293b7e1c893Smrg					  INDEX_GPR(0),
294b7e1c893Smrg					  ELEM_SIZE(0));
295b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
296b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
297b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
298b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
299b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
300b7e1c893Smrg					       BURST_COUNT(0),
301b7e1c893Smrg					       END_OF_PROGRAM(0),
302b7e1c893Smrg					       VALID_PIXEL_MODE(0),
303b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
304b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
305b7e1c893Smrg					       BARRIER(1));
306b7e1c893Smrg    /* 2 */
307b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
308b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
309b7e1c893Smrg					  RW_GPR(0),
310b7e1c893Smrg					  RW_REL(ABSOLUTE),
311b7e1c893Smrg					  INDEX_GPR(0),
312b7e1c893Smrg					  ELEM_SIZE(0));
313b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
314b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
315b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
316b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
317b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
318b7e1c893Smrg					       BURST_COUNT(0),
319b7e1c893Smrg					       END_OF_PROGRAM(1),
320b7e1c893Smrg					       VALID_PIXEL_MODE(0),
321b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
322b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
323b7e1c893Smrg					       BARRIER(0));
324b7e1c893Smrg    /* 3 */
325b7e1c893Smrg    shader[i++] = 0x00000000;
326b7e1c893Smrg    shader[i++] = 0x00000000;
327b7e1c893Smrg    /* 4/5 */
328b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
329b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
330b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
331b7e1c893Smrg			     BUFFER_ID(0),
332b7e1c893Smrg			     SRC_GPR(0),
333b7e1c893Smrg			     SRC_REL(ABSOLUTE),
334b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
335b7e1c893Smrg			     MEGA_FETCH_COUNT(16));
336b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
337b7e1c893Smrg				 DST_REL(0),
338b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
339b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
340b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
341b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
342b7e1c893Smrg				 USE_CONST_FIELDS(0),
343ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
344ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
345ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
346b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
347b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
348b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
349b13dfe66Smrg			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
350b13dfe66Smrg#else
351b13dfe66Smrg			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
352b13dfe66Smrg#endif
353b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
354b7e1c893Smrg			     MEGA_FETCH(1));
355b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
356b7e1c893Smrg    /* 6/7 */
357b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
358b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
359b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
360b7e1c893Smrg			     BUFFER_ID(0),
361b7e1c893Smrg			     SRC_GPR(0),
362b7e1c893Smrg			     SRC_REL(ABSOLUTE),
363b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
364b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
365b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
366b7e1c893Smrg				 DST_REL(0),
367b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
368b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
369b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
370b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
371b7e1c893Smrg				 USE_CONST_FIELDS(0),
372ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
373ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
374ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
375b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
376b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
377b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
378b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
379b13dfe66Smrg#else
380b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
381b13dfe66Smrg#endif
382b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
383b7e1c893Smrg			     MEGA_FETCH(0));
384b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
385b7e1c893Smrg
386b7e1c893Smrg    return i;
387b7e1c893Smrg}
388b7e1c893Smrg
389b7e1c893Smrg/* copy ps --------------------------------------- */
390b7e1c893Smrgint R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
391b7e1c893Smrg{
392b7e1c893Smrg    int i=0;
393b7e1c893Smrg
394b7e1c893Smrg    /* CF INST 0 */
395b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(2));
396b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
397b7e1c893Smrg			    CF_CONST(0),
398b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
399b7e1c893Smrg			    I_COUNT(1),
400b7e1c893Smrg			    CALL_COUNT(0),
401b7e1c893Smrg			    END_OF_PROGRAM(0),
402b7e1c893Smrg			    VALID_PIXEL_MODE(0),
403b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
404b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
405b7e1c893Smrg			    BARRIER(1));
406b7e1c893Smrg    /* CF INST 1 */
407b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
408b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
409b7e1c893Smrg					  RW_GPR(0),
410b7e1c893Smrg					  RW_REL(ABSOLUTE),
411b7e1c893Smrg					  INDEX_GPR(0),
412b7e1c893Smrg					  ELEM_SIZE(1));
413b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
414b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
415b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
416b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
417b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
418b7e1c893Smrg					       BURST_COUNT(1),
419b7e1c893Smrg					       END_OF_PROGRAM(1),
420b7e1c893Smrg					       VALID_PIXEL_MODE(0),
421b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
422b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
423b7e1c893Smrg					       BARRIER(1));
424b7e1c893Smrg    /* TEX INST 0 */
425b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
426b7e1c893Smrg			     BC_FRAC_MODE(0),
427b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
428b7e1c893Smrg			     RESOURCE_ID(0),
429b7e1c893Smrg			     SRC_GPR(0),
430b7e1c893Smrg			     SRC_REL(ABSOLUTE),
431b7e1c893Smrg			     R7xx_ALT_CONST(0));
432b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
433b7e1c893Smrg			     DST_REL(ABSOLUTE),
434b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X), /* R */
435b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y), /* G */
436b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z), /* B */
437b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W), /* A */
438b7e1c893Smrg			     LOD_BIAS(0),
439b7e1c893Smrg			     COORD_TYPE_X(TEX_UNNORMALIZED),
440b7e1c893Smrg			     COORD_TYPE_Y(TEX_UNNORMALIZED),
441b7e1c893Smrg			     COORD_TYPE_Z(TEX_UNNORMALIZED),
442b7e1c893Smrg			     COORD_TYPE_W(TEX_UNNORMALIZED));
443b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
444b7e1c893Smrg			     OFFSET_Y(0),
445b7e1c893Smrg			     OFFSET_Z(0),
446b7e1c893Smrg			     SAMPLER_ID(0),
447b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
448b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
449b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
450b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
451b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
452b7e1c893Smrg
453b7e1c893Smrg    return i;
454b7e1c893Smrg}
455b7e1c893Smrg
456b7e1c893Smrg/*
457b7e1c893Smrg * ; xv vertex shader
458b7e1c893Smrg * 00 VTX: ADDR(4) CNT(2)
459b7e1c893Smrg *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT)
460b7e1c893Smrg *          FORMAT_COMP(SIGNED)
461b7e1c893Smrg *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
462b7e1c893Smrg *          FORMAT_COMP(SIGNED)
463b7e1c893Smrg * 01 EXP_DONE: POS0, R1
464b7e1c893Smrg * 02 EXP_DONE: PARAM0, R0  NO_BARRIER
465b7e1c893Smrg * END_OF_PROGRAM
466b7e1c893Smrg */
467b7e1c893Smrgint R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
468b7e1c893Smrg{
469b7e1c893Smrg    int i = 0;
470b7e1c893Smrg
471b7e1c893Smrg    /* 0 */
472ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(6));
473b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
474b7e1c893Smrg                            CF_CONST(0),
475b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
476b7e1c893Smrg                            I_COUNT(2),
477b7e1c893Smrg                            CALL_COUNT(0),
478b7e1c893Smrg                            END_OF_PROGRAM(0),
479b7e1c893Smrg                            VALID_PIXEL_MODE(0),
480b7e1c893Smrg                            CF_INST(SQ_CF_INST_VTX),
481b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
482b7e1c893Smrg                            BARRIER(1));
483ad43ddacSmrg
484ad43ddacSmrg    /* 1 - ALU */
485ad43ddacSmrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
486ad43ddacSmrg				KCACHE_BANK0(0),
487ad43ddacSmrg				KCACHE_BANK1(0),
488ad43ddacSmrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
489ad43ddacSmrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
490ad43ddacSmrg				KCACHE_ADDR0(0),
491ad43ddacSmrg				KCACHE_ADDR1(0),
492ad43ddacSmrg				I_COUNT(2),
493ad43ddacSmrg				USES_WATERFALL(0),
494ad43ddacSmrg				CF_INST(SQ_CF_INST_ALU),
495ad43ddacSmrg				WHOLE_QUAD_MODE(0),
496ad43ddacSmrg				BARRIER(1));
497ad43ddacSmrg
498ad43ddacSmrg    /* 2 */
499b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
500b7e1c893Smrg                                          TYPE(SQ_EXPORT_POS),
501b7e1c893Smrg                                          RW_GPR(1),
502b7e1c893Smrg                                          RW_REL(ABSOLUTE),
503b7e1c893Smrg                                          INDEX_GPR(0),
504b7e1c893Smrg                                          ELEM_SIZE(3));
505b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
506b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
507b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
508b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
509b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
510b7e1c893Smrg                                               BURST_COUNT(1),
511b7e1c893Smrg                                               END_OF_PROGRAM(0),
512b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
513b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
514b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
515b7e1c893Smrg                                               BARRIER(1));
516ad43ddacSmrg    /* 3 */
517b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
518b7e1c893Smrg                                          TYPE(SQ_EXPORT_PARAM),
519b7e1c893Smrg                                          RW_GPR(0),
520b7e1c893Smrg                                          RW_REL(ABSOLUTE),
521b7e1c893Smrg                                          INDEX_GPR(0),
522b7e1c893Smrg                                          ELEM_SIZE(3));
523b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
524b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
525b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
526b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
527b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
528b7e1c893Smrg                                               BURST_COUNT(1),
529b7e1c893Smrg                                               END_OF_PROGRAM(1),
530b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
531b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
532b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
533b7e1c893Smrg                                               BARRIER(0));
534ad43ddacSmrg
535ad43ddacSmrg
536ad43ddacSmrg    /* 4 texX / w */
537921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
538ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
539ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
540ad43ddacSmrg                             SRC0_NEG(0),
541921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
542ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
543ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
544ad43ddacSmrg                             SRC1_NEG(0),
545ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
546ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
547ad43ddacSmrg                             LAST(0));
548ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
549ad43ddacSmrg                                 SRC0_ABS(0),
550ad43ddacSmrg                                 SRC1_ABS(0),
551ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
552ad43ddacSmrg                                 UPDATE_PRED(0),
553ad43ddacSmrg                                 WRITE_MASK(1),
554ad43ddacSmrg                                 FOG_MERGE(0),
555ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
556ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
557ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
558ad43ddacSmrg                                 DST_GPR(0),
559ad43ddacSmrg                                 DST_REL(ABSOLUTE),
560ad43ddacSmrg                                 DST_ELEM(ELEM_X),
561ad43ddacSmrg                                 CLAMP(0));
562ad43ddacSmrg
563ad43ddacSmrg    /* 5 texY / h */
564921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
565ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
566ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
567ad43ddacSmrg                             SRC0_NEG(0),
568921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
569ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
570ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
571ad43ddacSmrg                             SRC1_NEG(0),
572ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
573ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
574ad43ddacSmrg                             LAST(1));
575ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
576ad43ddacSmrg                                 SRC0_ABS(0),
577ad43ddacSmrg                                 SRC1_ABS(0),
578ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
579ad43ddacSmrg                                 UPDATE_PRED(0),
580ad43ddacSmrg                                 WRITE_MASK(1),
581ad43ddacSmrg                                 FOG_MERGE(0),
582ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
583ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
584ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
585ad43ddacSmrg                                 DST_GPR(0),
586ad43ddacSmrg                                 DST_REL(ABSOLUTE),
587ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
588ad43ddacSmrg                                 CLAMP(0));
589ad43ddacSmrg
590ad43ddacSmrg    /* 6/7 */
591b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
592b7e1c893Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
593b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
594b7e1c893Smrg                             BUFFER_ID(0),
595b7e1c893Smrg                             SRC_GPR(0),
596b7e1c893Smrg                             SRC_REL(ABSOLUTE),
597b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
598b7e1c893Smrg                             MEGA_FETCH_COUNT(16));
599b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
600b7e1c893Smrg                                 DST_REL(ABSOLUTE),
601b7e1c893Smrg                                 DST_SEL_X(SQ_SEL_X),
602b7e1c893Smrg                                 DST_SEL_Y(SQ_SEL_Y),
603b7e1c893Smrg                                 DST_SEL_Z(SQ_SEL_0),
604b7e1c893Smrg                                 DST_SEL_W(SQ_SEL_1),
605b7e1c893Smrg                                 USE_CONST_FIELDS(0),
606b7e1c893Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
607ad43ddacSmrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
608b7e1c893Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
609b7e1c893Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
610b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
611b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
612b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
613b13dfe66Smrg#else
614b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
615b13dfe66Smrg#endif
616b7e1c893Smrg                             CONST_BUF_NO_STRIDE(0),
617b7e1c893Smrg                             MEGA_FETCH(1));
618b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
619ad43ddacSmrg    /* 8/9 */
620b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
621b7e1c893Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
622b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
623b7e1c893Smrg                             BUFFER_ID(0),
624b7e1c893Smrg                             SRC_GPR(0),
625b7e1c893Smrg                             SRC_REL(ABSOLUTE),
626b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
627b7e1c893Smrg                             MEGA_FETCH_COUNT(8));
628b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
629b7e1c893Smrg                                 DST_REL(ABSOLUTE),
630b7e1c893Smrg                                 DST_SEL_X(SQ_SEL_X),
631b7e1c893Smrg                                 DST_SEL_Y(SQ_SEL_Y),
632b7e1c893Smrg                                 DST_SEL_Z(SQ_SEL_0),
633b7e1c893Smrg                                 DST_SEL_W(SQ_SEL_1),
634b7e1c893Smrg                                 USE_CONST_FIELDS(0),
635b7e1c893Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
636ad43ddacSmrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
637b7e1c893Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
638b7e1c893Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
639b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
640b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
641b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
642b13dfe66Smrg#else
643b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
644b13dfe66Smrg#endif
645b7e1c893Smrg                             CONST_BUF_NO_STRIDE(0),
646b7e1c893Smrg                             MEGA_FETCH(0));
647b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
648b7e1c893Smrg
649b7e1c893Smrg    return i;
650b7e1c893Smrg}
651b7e1c893Smrg
652b7e1c893Smrgint R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
653b7e1c893Smrg{
654b7e1c893Smrg    int i = 0;
655b7e1c893Smrg
656b7e1c893Smrg    /* 0 */
657ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(16));
658b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
659b7e1c893Smrg                            CF_CONST(0),
660b7e1c893Smrg                            COND(SQ_CF_COND_BOOL),
661b7e1c893Smrg                            I_COUNT(0),
662b7e1c893Smrg                            CALL_COUNT(0),
663b7e1c893Smrg                            END_OF_PROGRAM(0),
664b7e1c893Smrg                            VALID_PIXEL_MODE(0),
665b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
666b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
667b7e1c893Smrg                            BARRIER(0));
668b7e1c893Smrg    /* 1 */
669ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(24));
670b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
671b7e1c893Smrg                            CF_CONST(0),
672b7e1c893Smrg                            COND(SQ_CF_COND_NOT_BOOL),
673b7e1c893Smrg                            I_COUNT(0),
674b7e1c893Smrg                            CALL_COUNT(0),
675b7e1c893Smrg                            END_OF_PROGRAM(0),
676b7e1c893Smrg                            VALID_PIXEL_MODE(0),
677b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
678b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
679b7e1c893Smrg                            BARRIER(0));
680b7e1c893Smrg    /* 2 */
681b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
682b7e1c893Smrg                                KCACHE_BANK0(0),
683b7e1c893Smrg                                KCACHE_BANK1(0),
684b7e1c893Smrg                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
685b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
686b7e1c893Smrg                                KCACHE_ADDR0(0),
687b7e1c893Smrg                                KCACHE_ADDR1(0),
688ad43ddacSmrg                                I_COUNT(12),
689b7e1c893Smrg                                USES_WATERFALL(0),
690b7e1c893Smrg                                CF_INST(SQ_CF_INST_ALU),
691b7e1c893Smrg                                WHOLE_QUAD_MODE(0),
692b7e1c893Smrg                                BARRIER(1));
693b7e1c893Smrg    /* 3 */
694b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
695b7e1c893Smrg                                          TYPE(SQ_EXPORT_PIXEL),
696b7e1c893Smrg                                          RW_GPR(2),
697b7e1c893Smrg                                          RW_REL(ABSOLUTE),
698b7e1c893Smrg                                          INDEX_GPR(0),
699b7e1c893Smrg                                          ELEM_SIZE(3));
700b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
701b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
702b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
703b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
704b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
705b7e1c893Smrg                                               BURST_COUNT(1),
706b7e1c893Smrg                                               END_OF_PROGRAM(1),
707b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
708b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
709b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
710b7e1c893Smrg                                               BARRIER(1));
711ad43ddacSmrg    /* 4,5,6,7 */
712ad43ddacSmrg    /* r2.x = MAD(c0.w, r1.x, c0.x) */
713921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
714b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
715ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
716b7e1c893Smrg                             SRC0_NEG(0),
717921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
718b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
719b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
720b7e1c893Smrg                             SRC1_NEG(0),
721b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
722b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
723b7e1c893Smrg                             LAST(0));
724921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
725b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
726ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
727b7e1c893Smrg                                 SRC2_NEG(0),
728b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
729b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
730ad43ddacSmrg                                 DST_GPR(2),
731b7e1c893Smrg                                 DST_REL(ABSOLUTE),
732b7e1c893Smrg                                 DST_ELEM(ELEM_X),
733ad43ddacSmrg                                 CLAMP(0));
734ad43ddacSmrg    /* r2.y = MAD(c0.w, r1.x, c0.y) */
735921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
736b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
737ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
738b7e1c893Smrg                             SRC0_NEG(0),
739921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
740b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
741ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
742b7e1c893Smrg                             SRC1_NEG(0),
743b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
744b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
745b7e1c893Smrg                             LAST(0));
746921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
747b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
748ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
749b7e1c893Smrg                                 SRC2_NEG(0),
750b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
751b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
752ad43ddacSmrg                                 DST_GPR(2),
753b7e1c893Smrg                                 DST_REL(ABSOLUTE),
754b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
755b7e1c893Smrg                                 CLAMP(0));
756ad43ddacSmrg    /* r2.z = MAD(c0.w, r1.x, c0.z) */
757921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
758b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
759ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
760b7e1c893Smrg                             SRC0_NEG(0),
761921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
762b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
763ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
764b7e1c893Smrg                             SRC1_NEG(0),
765b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
766b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
767b7e1c893Smrg                             LAST(0));
768921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
769b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
770ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
771b7e1c893Smrg                                 SRC2_NEG(0),
772b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
773b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
774ad43ddacSmrg                                 DST_GPR(2),
775b7e1c893Smrg                                 DST_REL(ABSOLUTE),
776b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
777b7e1c893Smrg                                 CLAMP(0));
778ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
779b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
780b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
781b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
782b7e1c893Smrg                             SRC0_NEG(0),
783b7e1c893Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
784b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
785b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
786b7e1c893Smrg                             SRC1_NEG(0),
787b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
788b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
789b7e1c893Smrg                             LAST(1));
790ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
791ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
792ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
793ad43ddacSmrg                                 SRC2_NEG(0),
794ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
795b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
796ad43ddacSmrg                                 DST_GPR(2),
797b7e1c893Smrg                                 DST_REL(ABSOLUTE),
798b7e1c893Smrg                                 DST_ELEM(ELEM_W),
799b7e1c893Smrg                                 CLAMP(0));
800ad43ddacSmrg
801ad43ddacSmrg    /* 8,9,10,11 */
802ad43ddacSmrg    /* r2.x = MAD(c1.x, r1.y, pv.x) */
803921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
804b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
805b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
806b7e1c893Smrg                             SRC0_NEG(0),
807921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
808b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
809ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
810b7e1c893Smrg                             SRC1_NEG(0),
811b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
812b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
813b7e1c893Smrg                             LAST(0));
814ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
815ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
816ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
817ad43ddacSmrg                                 SRC2_NEG(0),
818ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
819ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
820b7e1c893Smrg                                 DST_GPR(2),
821b7e1c893Smrg                                 DST_REL(ABSOLUTE),
822b7e1c893Smrg                                 DST_ELEM(ELEM_X),
823ad43ddacSmrg                                 CLAMP(0));
824ad43ddacSmrg    /* r2.y = MAD(c1.y, r1.y, pv.y) */
825921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
826b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
827b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
828b7e1c893Smrg                             SRC0_NEG(0),
829921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
830b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
831b7e1c893Smrg                             SRC1_ELEM(ELEM_Y),
832b7e1c893Smrg                             SRC1_NEG(0),
833b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
834b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
835b7e1c893Smrg                             LAST(0));
836ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
837ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
838ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
839ad43ddacSmrg                                 SRC2_NEG(0),
840ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
841ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
842ad43ddacSmrg                                 DST_GPR(2),
843b7e1c893Smrg                                 DST_REL(ABSOLUTE),
844b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
845ad43ddacSmrg                                 CLAMP(0));
846ad43ddacSmrg    /* r2.z = MAD(c1.z, r1.y, pv.z) */
847921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
848b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
849b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
850b7e1c893Smrg                             SRC0_NEG(0),
851921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
852b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
853ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
854b7e1c893Smrg                             SRC1_NEG(0),
855b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
856b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
857b7e1c893Smrg                             LAST(0));
858ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
859ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
860ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
861ad43ddacSmrg                                 SRC2_NEG(0),
862ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
863ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
864ad43ddacSmrg                                 DST_GPR(2),
865b7e1c893Smrg                                 DST_REL(ABSOLUTE),
866b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
867ad43ddacSmrg                                 CLAMP(0));
868ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
869ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
870b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
871ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
872b7e1c893Smrg                             SRC0_NEG(0),
873ad43ddacSmrg                             SRC1_SEL(SQ_ALU_SRC_0),
874b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
875ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
876b7e1c893Smrg                             SRC1_NEG(0),
877b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
878b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
879b7e1c893Smrg                             LAST(1));
880ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
881ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
882ad43ddacSmrg                                 SRC2_ELEM(ELEM_W),
883ad43ddacSmrg                                 SRC2_NEG(0),
884ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
885ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
886ad43ddacSmrg                                 DST_GPR(2),
887b7e1c893Smrg                                 DST_REL(ABSOLUTE),
888b7e1c893Smrg                                 DST_ELEM(ELEM_W),
889ad43ddacSmrg                                 CLAMP(0));
890ad43ddacSmrg    /* 12,13,14,15 */
891ad43ddacSmrg    /* r2.x = MAD(c2.x, r1.z, pv.x) */
892921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
893b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
894b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
895b7e1c893Smrg                             SRC0_NEG(0),
896921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
897b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
898ad43ddacSmrg                             SRC1_ELEM(ELEM_Z),
899b7e1c893Smrg                             SRC1_NEG(0),
900b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
901b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
902b7e1c893Smrg                             LAST(0));
903ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
904ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
905ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
906ad43ddacSmrg                                 SRC2_NEG(0),
907ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
908ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
909ad43ddacSmrg                                 DST_GPR(2),
910b7e1c893Smrg                                 DST_REL(ABSOLUTE),
911b7e1c893Smrg                                 DST_ELEM(ELEM_X),
912b7e1c893Smrg                                 CLAMP(1));
913ad43ddacSmrg    /* r2.y = MAD(c2.y, r1.z, pv.y) */
914921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
915b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
916b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
917b7e1c893Smrg                             SRC0_NEG(0),
918921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
919b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
920ad43ddacSmrg                             SRC1_ELEM(ELEM_Z),
921b7e1c893Smrg                             SRC1_NEG(0),
922b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
923b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
924b7e1c893Smrg                             LAST(0));
925ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
926ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
927ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
928ad43ddacSmrg                                 SRC2_NEG(0),
929ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
930ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
931b7e1c893Smrg                                 DST_GPR(2),
932b7e1c893Smrg                                 DST_REL(ABSOLUTE),
933b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
934b7e1c893Smrg                                 CLAMP(1));
935ad43ddacSmrg    /* r2.z = MAD(c2.z, r1.z, pv.z) */
936921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
937b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
938b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
939b7e1c893Smrg                             SRC0_NEG(0),
940921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
941b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
942b7e1c893Smrg                             SRC1_ELEM(ELEM_Z),
943b7e1c893Smrg                             SRC1_NEG(0),
944b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
945b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
946b7e1c893Smrg                             LAST(0));
947ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
948ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
949ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
950ad43ddacSmrg                                 SRC2_NEG(0),
951ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
952ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
953ad43ddacSmrg                                 DST_GPR(2),
954b7e1c893Smrg                                 DST_REL(ABSOLUTE),
955b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
956b7e1c893Smrg                                 CLAMP(1));
957ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
958ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
959b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
960b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
961b7e1c893Smrg                             SRC0_NEG(0),
962ad43ddacSmrg                             SRC1_SEL(SQ_ALU_SRC_0),
963b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
964b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
965b7e1c893Smrg                             SRC1_NEG(0),
966b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
967b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
968b7e1c893Smrg                             LAST(1));
969ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
970ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
971ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
972ad43ddacSmrg                                 SRC2_NEG(0),
973ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
974ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
975ad43ddacSmrg                                 DST_GPR(2),
976b7e1c893Smrg                                 DST_REL(ABSOLUTE),
977b7e1c893Smrg                                 DST_ELEM(ELEM_W),
978b7e1c893Smrg                                 CLAMP(1));
979ad43ddacSmrg
980ad43ddacSmrg    /* 16 */
981ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(18));
982b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
983b7e1c893Smrg                            CF_CONST(0),
984b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
985b7e1c893Smrg                            I_COUNT(3),
986b7e1c893Smrg                            CALL_COUNT(0),
987b7e1c893Smrg                            END_OF_PROGRAM(0),
988b7e1c893Smrg                            VALID_PIXEL_MODE(0),
989b7e1c893Smrg                            CF_INST(SQ_CF_INST_TEX),
990b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
991b7e1c893Smrg                            BARRIER(1));
992ad43ddacSmrg    /* 17 */
993b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
994b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
995b7e1c893Smrg			    CF_CONST(0),
996b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
997b7e1c893Smrg			    I_COUNT(0),
998b7e1c893Smrg			    CALL_COUNT(0),
999b7e1c893Smrg			    END_OF_PROGRAM(0),
1000b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1001b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1002b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1003b7e1c893Smrg			    BARRIER(1));
1004ad43ddacSmrg    /* 18/19 */
1005b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1006b7e1c893Smrg                             BC_FRAC_MODE(0),
1007b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1008b7e1c893Smrg                             RESOURCE_ID(0),
1009b7e1c893Smrg                             SRC_GPR(0),
1010b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1011b7e1c893Smrg                             R7xx_ALT_CONST(0));
1012b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1013b7e1c893Smrg                             DST_REL(ABSOLUTE),
1014b7e1c893Smrg                             DST_SEL_X(SQ_SEL_X),
1015b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1016b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1017b7e1c893Smrg                             DST_SEL_W(SQ_SEL_1),
1018b7e1c893Smrg                             LOD_BIAS(0),
1019b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1020b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1021b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1022b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1023b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1024b7e1c893Smrg                             OFFSET_Y(0),
1025b7e1c893Smrg                             OFFSET_Z(0),
1026b7e1c893Smrg                             SAMPLER_ID(0),
1027b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1028b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1029b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1030b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1031b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1032ad43ddacSmrg    /* 20/21 */
1033b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1034b7e1c893Smrg                             BC_FRAC_MODE(0),
1035b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1036b7e1c893Smrg                             RESOURCE_ID(1),
1037b7e1c893Smrg                             SRC_GPR(0),
1038b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1039b7e1c893Smrg                             R7xx_ALT_CONST(0));
1040b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1041b7e1c893Smrg                             DST_REL(ABSOLUTE),
1042b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1043b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1044b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_X),
1045b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1046b7e1c893Smrg                             LOD_BIAS(0),
1047b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1048b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1049b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1050b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1051b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1052b7e1c893Smrg                             OFFSET_Y(0),
1053b7e1c893Smrg                             OFFSET_Z(0),
1054b7e1c893Smrg                             SAMPLER_ID(1),
1055b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1056b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1057b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1058b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1059b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1060ad43ddacSmrg    /* 22/23 */
1061b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1062b7e1c893Smrg                             BC_FRAC_MODE(0),
1063b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1064b7e1c893Smrg                             RESOURCE_ID(2),
1065b7e1c893Smrg                             SRC_GPR(0),
1066b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1067b7e1c893Smrg                             R7xx_ALT_CONST(0));
1068b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1069b7e1c893Smrg                             DST_REL(ABSOLUTE),
1070b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1071b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_X),
1072b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1073b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1074b7e1c893Smrg                             LOD_BIAS(0),
1075b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1076b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1077b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1078b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1079b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1080b7e1c893Smrg                             OFFSET_Y(0),
1081b7e1c893Smrg                             OFFSET_Z(0),
1082b7e1c893Smrg                             SAMPLER_ID(2),
1083b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1084b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1085b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1086b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1087b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1088ad43ddacSmrg    /* 24 */
1089ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(26));
1090b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1091b7e1c893Smrg                            CF_CONST(0),
1092b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
1093b7e1c893Smrg                            I_COUNT(2),
1094b7e1c893Smrg                            CALL_COUNT(0),
1095b7e1c893Smrg                            END_OF_PROGRAM(0),
1096b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1097b7e1c893Smrg                            CF_INST(SQ_CF_INST_TEX),
1098b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1099b7e1c893Smrg                            BARRIER(1));
1100ad43ddacSmrg    /* 25 */
1101b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1102b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1103b7e1c893Smrg			    CF_CONST(0),
1104b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1105b7e1c893Smrg			    I_COUNT(0),
1106b7e1c893Smrg			    CALL_COUNT(0),
1107b7e1c893Smrg			    END_OF_PROGRAM(0),
1108b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1109b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1110b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1111b7e1c893Smrg			    BARRIER(1));
1112ad43ddacSmrg    /* 26/27 */
1113b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1114b7e1c893Smrg                             BC_FRAC_MODE(0),
1115b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1116b7e1c893Smrg                             RESOURCE_ID(0),
1117b7e1c893Smrg                             SRC_GPR(0),
1118b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1119b7e1c893Smrg                             R7xx_ALT_CONST(0));
1120b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1121b7e1c893Smrg                             DST_REL(ABSOLUTE),
1122b7e1c893Smrg                             DST_SEL_X(SQ_SEL_X),
1123b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1124b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1125b7e1c893Smrg                             DST_SEL_W(SQ_SEL_1),
1126b7e1c893Smrg                             LOD_BIAS(0),
1127b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1128b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1129b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1130b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1131b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1132b7e1c893Smrg                             OFFSET_Y(0),
1133b7e1c893Smrg                             OFFSET_Z(0),
1134b7e1c893Smrg                             SAMPLER_ID(0),
1135b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1136b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1137b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1138b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1139b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1140ad43ddacSmrg    /* 28/29 */
1141b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1142b7e1c893Smrg                             BC_FRAC_MODE(0),
1143b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1144b7e1c893Smrg                             RESOURCE_ID(1),
1145b7e1c893Smrg                             SRC_GPR(0),
1146b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1147b7e1c893Smrg                             R7xx_ALT_CONST(0));
1148b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1149b7e1c893Smrg                             DST_REL(ABSOLUTE),
1150b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1151b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_X),
1152b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_Y),
1153b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1154b7e1c893Smrg                             LOD_BIAS(0),
1155b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1156b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1157b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1158b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1159b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1160b7e1c893Smrg                             OFFSET_Y(0),
1161b7e1c893Smrg                             OFFSET_Z(0),
1162b7e1c893Smrg                             SAMPLER_ID(1),
1163b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1164b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1165b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1166b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1167b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1168b7e1c893Smrg
1169b7e1c893Smrg    return i;
1170b7e1c893Smrg}
1171b7e1c893Smrg
1172b7e1c893Smrg/* comp vs --------------------------------------- */
1173b7e1c893Smrgint R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1174b7e1c893Smrg{
1175b7e1c893Smrg    int i = 0;
1176b7e1c893Smrg
1177b7e1c893Smrg    /* 0 */
1178b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(3));
1179b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1180b7e1c893Smrg                            CF_CONST(0),
1181b7e1c893Smrg                            COND(SQ_CF_COND_BOOL),
1182b7e1c893Smrg                            I_COUNT(0),
1183b7e1c893Smrg                            CALL_COUNT(0),
1184b7e1c893Smrg                            END_OF_PROGRAM(0),
1185b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1186b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
1187b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1188b7e1c893Smrg                            BARRIER(0));
1189b7e1c893Smrg    /* 1 */
11900974d292Smrg    shader[i++] = CF_DWORD0(ADDR(9));
1191b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1192b7e1c893Smrg                            CF_CONST(0),
1193b7e1c893Smrg                            COND(SQ_CF_COND_NOT_BOOL),
1194b7e1c893Smrg                            I_COUNT(0),
1195b7e1c893Smrg                            CALL_COUNT(0),
1196b7e1c893Smrg                            END_OF_PROGRAM(0),
1197b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1198b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
1199b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1200b7e1c893Smrg                            BARRIER(0));
1201b7e1c893Smrg    /* 2 */
12022f39173dSmrg    shader[i++] = CF_DWORD0(ADDR(0));
1203b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1204b7e1c893Smrg                            CF_CONST(0),
1205b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
1206b7e1c893Smrg                            I_COUNT(0),
1207b7e1c893Smrg                            CALL_COUNT(0),
1208b7e1c893Smrg                            END_OF_PROGRAM(1),
1209b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1210b7e1c893Smrg                            CF_INST(SQ_CF_INST_NOP),
1211b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1212b7e1c893Smrg                            BARRIER(1));
1213b7e1c893Smrg    /* 3 - mask sub */
1214921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(44));
1215b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1216b7e1c893Smrg			    CF_CONST(0),
1217b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1218b7e1c893Smrg			    I_COUNT(3),
1219b7e1c893Smrg			    CALL_COUNT(0),
1220b7e1c893Smrg			    END_OF_PROGRAM(0),
1221b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1222b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
1223b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1224b7e1c893Smrg			    BARRIER(1));
1225ad43ddacSmrg
1226ad43ddacSmrg    /* 4 - ALU */
12270974d292Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1228ad43ddacSmrg				KCACHE_BANK0(0),
1229ad43ddacSmrg				KCACHE_BANK1(0),
1230ad43ddacSmrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1231ad43ddacSmrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1232ad43ddacSmrg				KCACHE_ADDR0(0),
1233ad43ddacSmrg				KCACHE_ADDR1(0),
1234921a55d8Smrg				I_COUNT(20),
1235ad43ddacSmrg				USES_WATERFALL(0),
1236ad43ddacSmrg				CF_INST(SQ_CF_INST_ALU),
1237ad43ddacSmrg				WHOLE_QUAD_MODE(0),
1238ad43ddacSmrg				BARRIER(1));
1239ad43ddacSmrg
1240ad43ddacSmrg    /* 5 - dst */
1241b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1242b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
1243b7e1c893Smrg					  RW_GPR(2),
1244b7e1c893Smrg					  RW_REL(ABSOLUTE),
1245b7e1c893Smrg					  INDEX_GPR(0),
1246b7e1c893Smrg					  ELEM_SIZE(0));
1247b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1248b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1249ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1250ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1251b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1252b7e1c893Smrg					       BURST_COUNT(1),
1253b7e1c893Smrg					       END_OF_PROGRAM(0),
1254b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1255b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1256b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1257b7e1c893Smrg					       BARRIER(1));
1258ad43ddacSmrg    /* 6 - src */
1259b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1260b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1261b7e1c893Smrg					  RW_GPR(1),
1262b7e1c893Smrg					  RW_REL(ABSOLUTE),
1263b7e1c893Smrg					  INDEX_GPR(0),
1264b7e1c893Smrg					  ELEM_SIZE(0));
1265b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1266b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1267ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1268ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1269b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1270b7e1c893Smrg					       BURST_COUNT(1),
1271b7e1c893Smrg					       END_OF_PROGRAM(0),
1272b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1273b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT),
1274b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1275b7e1c893Smrg					       BARRIER(0));
1276ad43ddacSmrg    /* 7 - mask */
1277b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1278b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1279b7e1c893Smrg					  RW_GPR(0),
1280b7e1c893Smrg					  RW_REL(ABSOLUTE),
1281b7e1c893Smrg					  INDEX_GPR(0),
1282b7e1c893Smrg					  ELEM_SIZE(0));
1283b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1284b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1285ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1286ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1287b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1288b7e1c893Smrg					       BURST_COUNT(1),
1289b7e1c893Smrg					       END_OF_PROGRAM(0),
1290b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1291b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1292b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1293b7e1c893Smrg					       BARRIER(0));
1294ad43ddacSmrg    /* 8 */
1295b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1296b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1297b7e1c893Smrg			    CF_CONST(0),
1298b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1299b7e1c893Smrg			    I_COUNT(0),
1300b7e1c893Smrg			    CALL_COUNT(0),
1301b7e1c893Smrg			    END_OF_PROGRAM(0),
1302b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1303b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1304b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1305b7e1c893Smrg			    BARRIER(1));
13060974d292Smrg    /* 9 - non-mask sub */
1307921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(50));
13080974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
13090974d292Smrg			    CF_CONST(0),
13100974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
13110974d292Smrg			    I_COUNT(2),
13120974d292Smrg			    CALL_COUNT(0),
13130974d292Smrg			    END_OF_PROGRAM(0),
13140974d292Smrg			    VALID_PIXEL_MODE(0),
13150974d292Smrg			    CF_INST(SQ_CF_INST_VTX),
13160974d292Smrg			    WHOLE_QUAD_MODE(0),
13170974d292Smrg			    BARRIER(1));
1318b7e1c893Smrg
13190974d292Smrg    /* 10 - ALU */
1320921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(34),
13210974d292Smrg				KCACHE_BANK0(0),
13220974d292Smrg				KCACHE_BANK1(0),
13230974d292Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
13240974d292Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
13250974d292Smrg				KCACHE_ADDR0(0),
13260974d292Smrg				KCACHE_ADDR1(0),
1327921a55d8Smrg				I_COUNT(10),
13280974d292Smrg				USES_WATERFALL(0),
13290974d292Smrg				CF_INST(SQ_CF_INST_ALU),
13300974d292Smrg				WHOLE_QUAD_MODE(0),
13310974d292Smrg				BARRIER(1));
1332ad43ddacSmrg
13330974d292Smrg    /* 11 - dst */
13340974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
13350974d292Smrg					  TYPE(SQ_EXPORT_POS),
13360974d292Smrg					  RW_GPR(1),
13370974d292Smrg					  RW_REL(ABSOLUTE),
13380974d292Smrg					  INDEX_GPR(0),
13390974d292Smrg					  ELEM_SIZE(0));
13400974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
13410974d292Smrg					       SRC_SEL_Y(SQ_SEL_Y),
13420974d292Smrg					       SRC_SEL_Z(SQ_SEL_0),
13430974d292Smrg					       SRC_SEL_W(SQ_SEL_1),
13440974d292Smrg					       R6xx_ELEM_LOOP(0),
13450974d292Smrg					       BURST_COUNT(0),
13460974d292Smrg					       END_OF_PROGRAM(0),
13470974d292Smrg					       VALID_PIXEL_MODE(0),
13480974d292Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
13490974d292Smrg					       WHOLE_QUAD_MODE(0),
13500974d292Smrg					       BARRIER(1));
13510974d292Smrg    /* 12 - src */
13520974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
13530974d292Smrg					  TYPE(SQ_EXPORT_PARAM),
13540974d292Smrg					  RW_GPR(0),
13550974d292Smrg					  RW_REL(ABSOLUTE),
13560974d292Smrg					  INDEX_GPR(0),
13570974d292Smrg					  ELEM_SIZE(0));
13580974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
13590974d292Smrg					       SRC_SEL_Y(SQ_SEL_Y),
13600974d292Smrg					       SRC_SEL_Z(SQ_SEL_0),
13610974d292Smrg					       SRC_SEL_W(SQ_SEL_1),
13620974d292Smrg					       R6xx_ELEM_LOOP(0),
13630974d292Smrg					       BURST_COUNT(0),
13640974d292Smrg					       END_OF_PROGRAM(0),
13650974d292Smrg					       VALID_PIXEL_MODE(0),
13660974d292Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
13670974d292Smrg					       WHOLE_QUAD_MODE(0),
13680974d292Smrg					       BARRIER(0));
13690974d292Smrg    /* 13 */
13700974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
13710974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
13720974d292Smrg			    CF_CONST(0),
13730974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
13740974d292Smrg			    I_COUNT(0),
13750974d292Smrg			    CALL_COUNT(0),
13760974d292Smrg			    END_OF_PROGRAM(0),
13770974d292Smrg			    VALID_PIXEL_MODE(0),
13780974d292Smrg			    CF_INST(SQ_CF_INST_RETURN),
13790974d292Smrg			    WHOLE_QUAD_MODE(0),
13800974d292Smrg			    BARRIER(1));
13810974d292Smrg
13820974d292Smrg
1383921a55d8Smrg    /* 14 srcX.x DOT4 - mask */
1384921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1385921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1386921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1387921a55d8Smrg                             SRC0_NEG(0),
1388921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1389921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1390921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1391921a55d8Smrg                             SRC1_NEG(0),
1392921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1393921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1394921a55d8Smrg                             LAST(0));
1395921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1396921a55d8Smrg                                 SRC0_ABS(0),
1397921a55d8Smrg                                 SRC1_ABS(0),
1398921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1399921a55d8Smrg                                 UPDATE_PRED(0),
1400921a55d8Smrg                                 WRITE_MASK(1),
1401921a55d8Smrg                                 FOG_MERGE(0),
1402921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1403921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1404921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1405921a55d8Smrg                                 DST_GPR(3),
1406921a55d8Smrg                                 DST_REL(ABSOLUTE),
1407921a55d8Smrg                                 DST_ELEM(ELEM_X),
1408921a55d8Smrg                                 CLAMP(0));
1409921a55d8Smrg
1410921a55d8Smrg    /* 15 srcX.y DOT4 - mask */
1411921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
14120974d292Smrg                             SRC0_REL(ABSOLUTE),
14130974d292Smrg                             SRC0_ELEM(ELEM_Y),
14140974d292Smrg                             SRC0_NEG(0),
1415921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
14160974d292Smrg                             SRC1_REL(ABSOLUTE),
14170974d292Smrg                             SRC1_ELEM(ELEM_Y),
14180974d292Smrg                             SRC1_NEG(0),
14190974d292Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
14200974d292Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1421921a55d8Smrg                             LAST(0));
1422921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1423921a55d8Smrg                                 SRC0_ABS(0),
1424921a55d8Smrg                                 SRC1_ABS(0),
1425921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1426921a55d8Smrg                                 UPDATE_PRED(0),
1427921a55d8Smrg                                 WRITE_MASK(0),
1428921a55d8Smrg                                 FOG_MERGE(0),
1429921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1430921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
14310974d292Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1432921a55d8Smrg                                 DST_GPR(3),
1433921a55d8Smrg                                 DST_REL(ABSOLUTE),
1434921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1435921a55d8Smrg                                 CLAMP(0));
1436921a55d8Smrg
1437921a55d8Smrg    /* 16 srcX.z DOT4 - mask */
1438921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1439921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1440921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1441921a55d8Smrg                             SRC0_NEG(0),
1442921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1443921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1444921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1445921a55d8Smrg                             SRC1_NEG(0),
1446921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1447921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1448921a55d8Smrg                             LAST(0));
1449921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1450921a55d8Smrg                                 SRC0_ABS(0),
1451921a55d8Smrg                                 SRC1_ABS(0),
1452921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1453921a55d8Smrg                                 UPDATE_PRED(0),
1454921a55d8Smrg                                 WRITE_MASK(0),
1455921a55d8Smrg                                 FOG_MERGE(0),
1456921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1457921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1458921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1459921a55d8Smrg                                 DST_GPR(3),
14600974d292Smrg                                 DST_REL(ABSOLUTE),
14610974d292Smrg                                 DST_ELEM(ELEM_Z),
14620974d292Smrg                                 CLAMP(0));
1463921a55d8Smrg
1464921a55d8Smrg    /* 17 srcX.w DOT4 - mask */
1465921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1466ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1467921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1468ad43ddacSmrg                             SRC0_NEG(0),
1469921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1470ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1471921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1472ad43ddacSmrg                             SRC1_NEG(0),
1473ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1474ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1475ad43ddacSmrg                             LAST(1));
1476921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1477921a55d8Smrg                                 SRC0_ABS(0),
1478921a55d8Smrg                                 SRC1_ABS(0),
1479921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1480921a55d8Smrg                                 UPDATE_PRED(0),
1481921a55d8Smrg                                 WRITE_MASK(0),
1482921a55d8Smrg                                 FOG_MERGE(0),
1483921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1484921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1485ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1486921a55d8Smrg                                 DST_GPR(3),
1487ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1488ad43ddacSmrg                                 DST_ELEM(ELEM_W),
1489ad43ddacSmrg                                 CLAMP(0));
1490ad43ddacSmrg
1491921a55d8Smrg    /* 18 srcY.x DOT4 - mask */
1492921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1493ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1494ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1495ad43ddacSmrg                             SRC0_NEG(0),
1496921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1497ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1498ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1499ad43ddacSmrg                             SRC1_NEG(0),
1500ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1501ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1502ad43ddacSmrg                             LAST(0));
1503921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1504921a55d8Smrg                                 SRC0_ABS(0),
1505921a55d8Smrg                                 SRC1_ABS(0),
1506921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1507921a55d8Smrg                                 UPDATE_PRED(0),
1508921a55d8Smrg                                 WRITE_MASK(0),
1509921a55d8Smrg                                 FOG_MERGE(0),
1510921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1511921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1512ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1513921a55d8Smrg                                 DST_GPR(3),
1514ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1515ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1516ad43ddacSmrg                                 CLAMP(0));
1517921a55d8Smrg
1518921a55d8Smrg    /* 19 srcY.y DOT4 - mask */
1519921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1520921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1521921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1522921a55d8Smrg                             SRC0_NEG(0),
1523921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1524921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1525921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1526921a55d8Smrg                             SRC1_NEG(0),
1527921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1528921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1529921a55d8Smrg                             LAST(0));
1530921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1531921a55d8Smrg                                 SRC0_ABS(0),
1532921a55d8Smrg                                 SRC1_ABS(0),
1533921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1534921a55d8Smrg                                 UPDATE_PRED(0),
1535921a55d8Smrg                                 WRITE_MASK(1),
1536921a55d8Smrg                                 FOG_MERGE(0),
1537921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1538921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1539921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1540921a55d8Smrg                                 DST_GPR(3),
1541921a55d8Smrg                                 DST_REL(ABSOLUTE),
1542921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1543921a55d8Smrg                                 CLAMP(0));
1544921a55d8Smrg
1545921a55d8Smrg    /* 20 srcY.z DOT4 - mask */
1546921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1547921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1548921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1549921a55d8Smrg                             SRC0_NEG(0),
1550921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1551921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1552921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1553921a55d8Smrg                             SRC1_NEG(0),
1554921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1555921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1556921a55d8Smrg                             LAST(0));
1557921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1558921a55d8Smrg                                 SRC0_ABS(0),
1559921a55d8Smrg                                 SRC1_ABS(0),
1560921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1561921a55d8Smrg                                 UPDATE_PRED(0),
1562921a55d8Smrg                                 WRITE_MASK(0),
1563921a55d8Smrg                                 FOG_MERGE(0),
1564921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1565921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1566921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1567921a55d8Smrg                                 DST_GPR(3),
1568921a55d8Smrg                                 DST_REL(ABSOLUTE),
1569921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1570921a55d8Smrg                                 CLAMP(0));
1571921a55d8Smrg
1572921a55d8Smrg    /* 21 srcY.w DOT4 - mask */
1573921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1574921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1575921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1576921a55d8Smrg                             SRC0_NEG(0),
1577921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1578921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1579921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1580921a55d8Smrg                             SRC1_NEG(0),
1581921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1582921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1583921a55d8Smrg                             LAST(1));
1584921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1585921a55d8Smrg                                 SRC0_ABS(0),
1586921a55d8Smrg                                 SRC1_ABS(0),
1587921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1588921a55d8Smrg                                 UPDATE_PRED(0),
1589921a55d8Smrg                                 WRITE_MASK(0),
1590921a55d8Smrg                                 FOG_MERGE(0),
1591921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1592921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1593921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1594921a55d8Smrg                                 DST_GPR(3),
1595921a55d8Smrg                                 DST_REL(ABSOLUTE),
1596921a55d8Smrg                                 DST_ELEM(ELEM_W),
1597921a55d8Smrg                                 CLAMP(0));
1598921a55d8Smrg
1599921a55d8Smrg    /* 22 maskX.x DOT4 - mask */
1600921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1601ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1602ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1603ad43ddacSmrg                             SRC0_NEG(0),
1604921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1605ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1606ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1607ad43ddacSmrg                             SRC1_NEG(0),
1608ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1609ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1610921a55d8Smrg                             LAST(0));
1611921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1612921a55d8Smrg                                 SRC0_ABS(0),
1613921a55d8Smrg                                 SRC1_ABS(0),
1614921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1615921a55d8Smrg                                 UPDATE_PRED(0),
1616921a55d8Smrg                                 WRITE_MASK(1),
1617921a55d8Smrg                                 FOG_MERGE(0),
1618921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1619921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1620ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1621921a55d8Smrg                                 DST_GPR(4),
1622ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1623921a55d8Smrg                                 DST_ELEM(ELEM_X),
1624ad43ddacSmrg                                 CLAMP(0));
1625ad43ddacSmrg
1626921a55d8Smrg    /* 23 maskX.y DOT4 - mask */
1627921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1628ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1629ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1630ad43ddacSmrg                             SRC0_NEG(0),
1631921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1632ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1633ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1634ad43ddacSmrg                             SRC1_NEG(0),
1635ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1636ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1637921a55d8Smrg                             LAST(0));
1638921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1639921a55d8Smrg                                 SRC0_ABS(0),
1640921a55d8Smrg                                 SRC1_ABS(0),
1641921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1642921a55d8Smrg                                 UPDATE_PRED(0),
1643921a55d8Smrg                                 WRITE_MASK(0),
1644921a55d8Smrg                                 FOG_MERGE(0),
1645921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1646921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1647ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1648921a55d8Smrg                                 DST_GPR(4),
1649921a55d8Smrg                                 DST_REL(ABSOLUTE),
1650921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1651921a55d8Smrg                                 CLAMP(0));
1652921a55d8Smrg
1653921a55d8Smrg    /* 24 maskX.z DOT4 - mask */
1654921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1655921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1656921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1657921a55d8Smrg                             SRC0_NEG(0),
1658921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1659921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1660921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1661921a55d8Smrg                             SRC1_NEG(0),
1662921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1663921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1664921a55d8Smrg                             LAST(0));
1665921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1666921a55d8Smrg                                 SRC0_ABS(0),
1667921a55d8Smrg                                 SRC1_ABS(0),
1668921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1669921a55d8Smrg                                 UPDATE_PRED(0),
1670921a55d8Smrg                                 WRITE_MASK(0),
1671921a55d8Smrg                                 FOG_MERGE(0),
1672921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1673921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1674921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1675921a55d8Smrg                                 DST_GPR(4),
1676ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1677ad43ddacSmrg                                 DST_ELEM(ELEM_Z),
1678ad43ddacSmrg                                 CLAMP(0));
1679ad43ddacSmrg
1680921a55d8Smrg    /* 25 maskX.w DOT4 - mask */
1681921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1682ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1683921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1684ad43ddacSmrg                             SRC0_NEG(0),
1685921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1686ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1687921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1688ad43ddacSmrg                             SRC1_NEG(0),
1689ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1690ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1691ad43ddacSmrg                             LAST(1));
1692921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1693921a55d8Smrg                                 SRC0_ABS(0),
1694921a55d8Smrg                                 SRC1_ABS(0),
1695921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1696921a55d8Smrg                                 UPDATE_PRED(0),
1697921a55d8Smrg                                 WRITE_MASK(0),
1698921a55d8Smrg                                 FOG_MERGE(0),
1699921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1700921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1701ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1702921a55d8Smrg                                 DST_GPR(4),
1703ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1704ad43ddacSmrg                                 DST_ELEM(ELEM_W),
1705ad43ddacSmrg                                 CLAMP(0));
1706ad43ddacSmrg
1707921a55d8Smrg    /* 26 maskY.x DOT4 - mask */
1708921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1709ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1710ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1711ad43ddacSmrg                             SRC0_NEG(0),
1712921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1713ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1714ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1715ad43ddacSmrg                             SRC1_NEG(0),
1716ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1717ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1718ad43ddacSmrg                             LAST(0));
1719921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1720921a55d8Smrg                                 SRC0_ABS(0),
1721921a55d8Smrg                                 SRC1_ABS(0),
1722921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1723921a55d8Smrg                                 UPDATE_PRED(0),
1724921a55d8Smrg                                 WRITE_MASK(0),
1725921a55d8Smrg                                 FOG_MERGE(0),
1726921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1727921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1728ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1729921a55d8Smrg                                 DST_GPR(4),
1730ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1731ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1732ad43ddacSmrg                                 CLAMP(0));
1733921a55d8Smrg
1734921a55d8Smrg    /* 27 maskY.y DOT4 - mask */
1735921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1736ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1737921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1738ad43ddacSmrg                             SRC0_NEG(0),
1739921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1740ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1741921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1742ad43ddacSmrg                             SRC1_NEG(0),
1743ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1744ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1745921a55d8Smrg                             LAST(0));
1746921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1747921a55d8Smrg                                 SRC0_ABS(0),
1748921a55d8Smrg                                 SRC1_ABS(0),
1749921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1750921a55d8Smrg                                 UPDATE_PRED(0),
1751921a55d8Smrg                                 WRITE_MASK(1),
1752921a55d8Smrg                                 FOG_MERGE(0),
1753921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1754921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1755ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1756921a55d8Smrg                                 DST_GPR(4),
1757ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1758ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1759ad43ddacSmrg                                 CLAMP(0));
1760ad43ddacSmrg
1761921a55d8Smrg    /* 28 maskY.z DOT4 - mask */
1762921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1763921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1764921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1765921a55d8Smrg                             SRC0_NEG(0),
1766921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1767921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1768921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1769921a55d8Smrg                             SRC1_NEG(0),
1770921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1771921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1772921a55d8Smrg                             LAST(0));
1773921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1774921a55d8Smrg                                 SRC0_ABS(0),
1775921a55d8Smrg                                 SRC1_ABS(0),
1776921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1777921a55d8Smrg                                 UPDATE_PRED(0),
1778921a55d8Smrg                                 WRITE_MASK(0),
1779921a55d8Smrg                                 FOG_MERGE(0),
1780921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1781921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1782921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1783921a55d8Smrg                                 DST_GPR(4),
1784921a55d8Smrg                                 DST_REL(ABSOLUTE),
1785921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1786921a55d8Smrg                                 CLAMP(0));
1787921a55d8Smrg
1788921a55d8Smrg    /* 29 maskY.w DOT4 - mask */
1789921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1790921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1791921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1792921a55d8Smrg                             SRC0_NEG(0),
1793921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1794921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1795921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1796921a55d8Smrg                             SRC1_NEG(0),
1797921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1798921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1799921a55d8Smrg                             LAST(1));
1800921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1801921a55d8Smrg                                 SRC0_ABS(0),
1802921a55d8Smrg                                 SRC1_ABS(0),
1803921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1804921a55d8Smrg                                 UPDATE_PRED(0),
1805921a55d8Smrg                                 WRITE_MASK(0),
1806921a55d8Smrg                                 FOG_MERGE(0),
1807921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1808921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1809921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1810921a55d8Smrg                                 DST_GPR(4),
1811921a55d8Smrg                                 DST_REL(ABSOLUTE),
1812921a55d8Smrg                                 DST_ELEM(ELEM_W),
1813921a55d8Smrg                                 CLAMP(0));
1814921a55d8Smrg
1815921a55d8Smrg    /* 30 srcX / w */
1816921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1817ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1818ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1819ad43ddacSmrg                             SRC0_NEG(0),
1820921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1821ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1822ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1823ad43ddacSmrg                             SRC1_NEG(0),
1824ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1825ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1826ad43ddacSmrg                             LAST(1));
1827ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1828ad43ddacSmrg                                 SRC0_ABS(0),
1829ad43ddacSmrg                                 SRC1_ABS(0),
1830ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1831ad43ddacSmrg                                 UPDATE_PRED(0),
1832ad43ddacSmrg                                 WRITE_MASK(1),
1833ad43ddacSmrg                                 FOG_MERGE(0),
1834ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1835ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1836ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1837ad43ddacSmrg                                 DST_GPR(1),
1838ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1839ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1840ad43ddacSmrg                                 CLAMP(0));
1841ad43ddacSmrg
1842921a55d8Smrg    /* 31 srcY / h */
1843921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1844ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1845ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1846ad43ddacSmrg                             SRC0_NEG(0),
1847921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1848ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1849ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1850ad43ddacSmrg                             SRC1_NEG(0),
1851ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1852ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1853ad43ddacSmrg                             LAST(1));
1854ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1855ad43ddacSmrg                                 SRC0_ABS(0),
1856ad43ddacSmrg                                 SRC1_ABS(0),
1857ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1858ad43ddacSmrg                                 UPDATE_PRED(0),
1859ad43ddacSmrg                                 WRITE_MASK(1),
1860ad43ddacSmrg                                 FOG_MERGE(0),
1861ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1862ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1863ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1864ad43ddacSmrg                                 DST_GPR(1),
1865ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1866ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1867ad43ddacSmrg                                 CLAMP(0));
1868ad43ddacSmrg
1869921a55d8Smrg    /* 32 maskX / w */
1870921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1871ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1872ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1873ad43ddacSmrg                             SRC0_NEG(0),
1874921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1875ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1876ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1877ad43ddacSmrg                             SRC1_NEG(0),
1878ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1879ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1880ad43ddacSmrg                             LAST(1));
1881ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1882ad43ddacSmrg                                 SRC0_ABS(0),
1883ad43ddacSmrg                                 SRC1_ABS(0),
1884ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1885ad43ddacSmrg                                 UPDATE_PRED(0),
1886ad43ddacSmrg                                 WRITE_MASK(1),
1887ad43ddacSmrg                                 FOG_MERGE(0),
1888ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1889ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1890ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1891ad43ddacSmrg                                 DST_GPR(0),
1892ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1893ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1894ad43ddacSmrg                                 CLAMP(0));
1895ad43ddacSmrg
1896921a55d8Smrg    /* 33 maskY / h */
1897921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1898ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1899ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1900ad43ddacSmrg                             SRC0_NEG(0),
1901921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1902ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1903ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1904ad43ddacSmrg                             SRC1_NEG(0),
1905ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1906ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1907ad43ddacSmrg                             LAST(1));
1908ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1909ad43ddacSmrg                                 SRC0_ABS(0),
1910ad43ddacSmrg                                 SRC1_ABS(0),
1911ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1912ad43ddacSmrg                                 UPDATE_PRED(0),
1913ad43ddacSmrg                                 WRITE_MASK(1),
1914ad43ddacSmrg                                 FOG_MERGE(0),
1915ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1916ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1917ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1918ad43ddacSmrg                                 DST_GPR(0),
1919ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1920ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1921ad43ddacSmrg                                 CLAMP(0));
1922ad43ddacSmrg
1923921a55d8Smrg    /* 34 srcX.x DOT4 - non-mask */
1924921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1925921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1926921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1927921a55d8Smrg                             SRC0_NEG(0),
1928921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1929921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1930921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1931921a55d8Smrg                             SRC1_NEG(0),
1932921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1933921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1934921a55d8Smrg                             LAST(0));
1935921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1936921a55d8Smrg                                 SRC0_ABS(0),
1937921a55d8Smrg                                 SRC1_ABS(0),
1938921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1939921a55d8Smrg                                 UPDATE_PRED(0),
1940921a55d8Smrg                                 WRITE_MASK(1),
1941921a55d8Smrg                                 FOG_MERGE(0),
1942921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1943921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1944921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1945921a55d8Smrg                                 DST_GPR(2),
1946921a55d8Smrg                                 DST_REL(ABSOLUTE),
1947921a55d8Smrg                                 DST_ELEM(ELEM_X),
1948921a55d8Smrg                                 CLAMP(0));
1949921a55d8Smrg
1950921a55d8Smrg    /* 35 srcX.y DOT4 - non-mask */
1951921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1952ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1953ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1954ad43ddacSmrg                             SRC0_NEG(0),
1955921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1956ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1957ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1958ad43ddacSmrg                             SRC1_NEG(0),
1959ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1960ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1961921a55d8Smrg                             LAST(0));
1962921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1963921a55d8Smrg                                 SRC0_ABS(0),
1964921a55d8Smrg                                 SRC1_ABS(0),
1965921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1966921a55d8Smrg                                 UPDATE_PRED(0),
1967921a55d8Smrg                                 WRITE_MASK(0),
1968921a55d8Smrg                                 FOG_MERGE(0),
1969921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1970921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1971ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1972921a55d8Smrg                                 DST_GPR(2),
1973921a55d8Smrg                                 DST_REL(ABSOLUTE),
1974921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1975921a55d8Smrg                                 CLAMP(0));
1976921a55d8Smrg
1977921a55d8Smrg    /* 36 srcX.z DOT4 - non-mask */
1978921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1979921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1980921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1981921a55d8Smrg                             SRC0_NEG(0),
1982921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1983921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1984921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1985921a55d8Smrg                             SRC1_NEG(0),
1986921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1987921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1988921a55d8Smrg                             LAST(0));
1989921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1990921a55d8Smrg                                 SRC0_ABS(0),
1991921a55d8Smrg                                 SRC1_ABS(0),
1992921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1993921a55d8Smrg                                 UPDATE_PRED(0),
1994921a55d8Smrg                                 WRITE_MASK(0),
1995921a55d8Smrg                                 FOG_MERGE(0),
1996921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1997921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1998921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1999921a55d8Smrg                                 DST_GPR(2),
2000ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2001ad43ddacSmrg                                 DST_ELEM(ELEM_Z),
2002ad43ddacSmrg                                 CLAMP(0));
2003921a55d8Smrg
2004921a55d8Smrg    /* 37 srcX.w DOT4 - non-mask */
2005921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2006ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2007921a55d8Smrg                             SRC0_ELEM(ELEM_W),
2008ad43ddacSmrg                             SRC0_NEG(0),
2009921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
2010ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2011921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2012ad43ddacSmrg                             SRC1_NEG(0),
2013ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
2014ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2015ad43ddacSmrg                             LAST(1));
2016921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2017921a55d8Smrg                                 SRC0_ABS(0),
2018921a55d8Smrg                                 SRC1_ABS(0),
2019921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2020921a55d8Smrg                                 UPDATE_PRED(0),
2021921a55d8Smrg                                 WRITE_MASK(0),
2022921a55d8Smrg                                 FOG_MERGE(0),
2023921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2024921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2025ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2026921a55d8Smrg                                 DST_GPR(2),
2027ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2028ad43ddacSmrg                                 DST_ELEM(ELEM_W),
2029ad43ddacSmrg                                 CLAMP(0));
2030ad43ddacSmrg
2031921a55d8Smrg    /* 38 srcY.x DOT4 - non-mask */
2032921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2033ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2034ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
2035ad43ddacSmrg                             SRC0_NEG(0),
2036921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2037ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2038ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
2039ad43ddacSmrg                             SRC1_NEG(0),
2040ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
2041ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2042ad43ddacSmrg                             LAST(0));
2043921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2044921a55d8Smrg                                 SRC0_ABS(0),
2045921a55d8Smrg                                 SRC1_ABS(0),
2046921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2047921a55d8Smrg                                 UPDATE_PRED(0),
2048921a55d8Smrg                                 WRITE_MASK(0),
2049921a55d8Smrg                                 FOG_MERGE(0),
2050921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2051921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2052ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2053921a55d8Smrg                                 DST_GPR(2),
2054ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2055ad43ddacSmrg                                 DST_ELEM(ELEM_X),
2056ad43ddacSmrg                                 CLAMP(0));
2057921a55d8Smrg
2058921a55d8Smrg    /* 39 srcY.y DOT4 - non-mask */
2059921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2060ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2061921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
2062ad43ddacSmrg                             SRC0_NEG(0),
2063921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2064ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2065921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
2066ad43ddacSmrg                             SRC1_NEG(0),
2067ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
2068ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2069921a55d8Smrg                             LAST(0));
2070921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2071921a55d8Smrg                                 SRC0_ABS(0),
2072921a55d8Smrg                                 SRC1_ABS(0),
2073921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2074921a55d8Smrg                                 UPDATE_PRED(0),
2075921a55d8Smrg                                 WRITE_MASK(1),
2076921a55d8Smrg                                 FOG_MERGE(0),
2077921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2078921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2079ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2080921a55d8Smrg                                 DST_GPR(2),
2081ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2082ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
2083ad43ddacSmrg                                 CLAMP(0));
2084921a55d8Smrg
2085921a55d8Smrg    /* 40 srcY.z DOT4 - non-mask */
2086921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2087921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2088921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
2089921a55d8Smrg                             SRC0_NEG(0),
2090921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2091921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2092921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
2093921a55d8Smrg                             SRC1_NEG(0),
2094921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2095921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2096921a55d8Smrg                             LAST(0));
2097921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2098921a55d8Smrg                                 SRC0_ABS(0),
2099921a55d8Smrg                                 SRC1_ABS(0),
2100921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2101921a55d8Smrg                                 UPDATE_PRED(0),
2102921a55d8Smrg                                 WRITE_MASK(0),
2103921a55d8Smrg                                 FOG_MERGE(0),
2104921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2105921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2106921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2107921a55d8Smrg                                 DST_GPR(2),
2108921a55d8Smrg                                 DST_REL(ABSOLUTE),
2109921a55d8Smrg                                 DST_ELEM(ELEM_Z),
2110921a55d8Smrg                                 CLAMP(0));
2111921a55d8Smrg
2112921a55d8Smrg    /* 41 srcY.w DOT4 - non-mask */
2113921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2114921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2115921a55d8Smrg                             SRC0_ELEM(ELEM_W),
2116921a55d8Smrg                             SRC0_NEG(0),
2117921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2118921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2119921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2120921a55d8Smrg                             SRC1_NEG(0),
2121921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2122921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2123921a55d8Smrg                             LAST(1));
2124921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2125921a55d8Smrg                                 SRC0_ABS(0),
2126921a55d8Smrg                                 SRC1_ABS(0),
2127921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2128921a55d8Smrg                                 UPDATE_PRED(0),
2129921a55d8Smrg                                 WRITE_MASK(0),
2130921a55d8Smrg                                 FOG_MERGE(0),
2131921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2132921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2133921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2134921a55d8Smrg                                 DST_GPR(2),
2135921a55d8Smrg                                 DST_REL(ABSOLUTE),
2136921a55d8Smrg                                 DST_ELEM(ELEM_W),
2137921a55d8Smrg                                 CLAMP(0));
2138921a55d8Smrg
2139921a55d8Smrg    /* 42 srcX / w */
2140921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2141ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2142ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
2143ad43ddacSmrg                             SRC0_NEG(0),
2144921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
2145ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2146ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
2147ad43ddacSmrg                             SRC1_NEG(0),
2148ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
2149ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2150ad43ddacSmrg                             LAST(1));
2151ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2152ad43ddacSmrg                                 SRC0_ABS(0),
2153ad43ddacSmrg                                 SRC1_ABS(0),
2154ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
2155ad43ddacSmrg                                 UPDATE_PRED(0),
2156ad43ddacSmrg                                 WRITE_MASK(1),
2157ad43ddacSmrg                                 FOG_MERGE(0),
2158ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
2159ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
2160ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2161ad43ddacSmrg                                 DST_GPR(0),
2162ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2163ad43ddacSmrg                                 DST_ELEM(ELEM_X),
2164ad43ddacSmrg                                 CLAMP(0));
2165ad43ddacSmrg
2166921a55d8Smrg    /* 43 srcY / h */
2167921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2168ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2169ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
2170ad43ddacSmrg                             SRC0_NEG(0),
2171921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2172ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2173ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
2174ad43ddacSmrg                             SRC1_NEG(0),
2175ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
2176ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2177ad43ddacSmrg                             LAST(1));
2178ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2179ad43ddacSmrg                                 SRC0_ABS(0),
2180ad43ddacSmrg                                 SRC1_ABS(0),
2181ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
2182ad43ddacSmrg                                 UPDATE_PRED(0),
2183ad43ddacSmrg                                 WRITE_MASK(1),
2184ad43ddacSmrg                                 FOG_MERGE(0),
2185ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
2186ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
2187ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2188ad43ddacSmrg                                 DST_GPR(0),
2189ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2190ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
2191ad43ddacSmrg                                 CLAMP(0));
2192ad43ddacSmrg
2193921a55d8Smrg    /* 44/45 - dst - mask */
2194b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2195b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2196b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
2197b7e1c893Smrg			     BUFFER_ID(0),
2198b7e1c893Smrg			     SRC_GPR(0),
2199b7e1c893Smrg			     SRC_REL(ABSOLUTE),
2200b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
22010974d292Smrg			     MEGA_FETCH_COUNT(24));
22020974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2203b7e1c893Smrg				 DST_REL(0),
2204b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
2205b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
2206b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
2207b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
2208b7e1c893Smrg				 USE_CONST_FIELDS(0),
2209ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2210ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2211ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2212b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2213b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2214b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2215b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2216b13dfe66Smrg#else
2217b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2218b13dfe66Smrg#endif
2219b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
2220b7e1c893Smrg			     MEGA_FETCH(1));
2221b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
2222921a55d8Smrg    /* 46/47 - src */
2223b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2224b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2225b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
2226b7e1c893Smrg			     BUFFER_ID(0),
2227b7e1c893Smrg			     SRC_GPR(0),
2228b7e1c893Smrg			     SRC_REL(ABSOLUTE),
2229b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
2230b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
22310974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2232b7e1c893Smrg				 DST_REL(0),
2233b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
2234b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
2235ad43ddacSmrg				 DST_SEL_Z(SQ_SEL_1),
2236ad43ddacSmrg				 DST_SEL_W(SQ_SEL_0),
2237b7e1c893Smrg				 USE_CONST_FIELDS(0),
2238ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2239ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2240ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2241b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2242b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2243b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2244b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2245b13dfe66Smrg#else
2246b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2247b13dfe66Smrg#endif
2248b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
2249b7e1c893Smrg			     MEGA_FETCH(0));
2250b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
2251921a55d8Smrg    /* 48/49 - mask */
22520974d292Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
22530974d292Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
22540974d292Smrg			     FETCH_WHOLE_QUAD(0),
22550974d292Smrg			     BUFFER_ID(0),
22560974d292Smrg			     SRC_GPR(0),
22570974d292Smrg			     SRC_REL(ABSOLUTE),
22580974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
22590974d292Smrg			     MEGA_FETCH_COUNT(8));
22600974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
22610974d292Smrg				 DST_REL(0),
22620974d292Smrg				 DST_SEL_X(SQ_SEL_X),
22630974d292Smrg				 DST_SEL_Y(SQ_SEL_Y),
22640974d292Smrg				 DST_SEL_Z(SQ_SEL_1),
22650974d292Smrg				 DST_SEL_W(SQ_SEL_0),
22660974d292Smrg				 USE_CONST_FIELDS(0),
22670974d292Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
22680974d292Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
22690974d292Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
22700974d292Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
22710974d292Smrg    shader[i++] = VTX_DWORD2(OFFSET(16),
2272b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2273b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2274b13dfe66Smrg#else
2275b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2276b13dfe66Smrg#endif
22770974d292Smrg			     CONST_BUF_NO_STRIDE(0),
22780974d292Smrg			     MEGA_FETCH(0));
22790974d292Smrg    shader[i++] = VTX_DWORD_PAD;
2280b7e1c893Smrg
2281921a55d8Smrg    /* 50/51 - dst - non-mask */
22820974d292Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
22830974d292Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
22840974d292Smrg			     FETCH_WHOLE_QUAD(0),
22850974d292Smrg			     BUFFER_ID(0),
22860974d292Smrg			     SRC_GPR(0),
22870974d292Smrg			     SRC_REL(ABSOLUTE),
22880974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
22890974d292Smrg			     MEGA_FETCH_COUNT(16));
22900974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
22910974d292Smrg				 DST_REL(0),
22920974d292Smrg				 DST_SEL_X(SQ_SEL_X),
22930974d292Smrg				 DST_SEL_Y(SQ_SEL_Y),
22940974d292Smrg				 DST_SEL_Z(SQ_SEL_0),
22950974d292Smrg				 DST_SEL_W(SQ_SEL_1),
22960974d292Smrg				 USE_CONST_FIELDS(0),
22970974d292Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
22980974d292Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
22990974d292Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
23000974d292Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
23010974d292Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2302b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2303b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2304b13dfe66Smrg#else
2305b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2306b13dfe66Smrg#endif
23070974d292Smrg			     CONST_BUF_NO_STRIDE(0),
23080974d292Smrg			     MEGA_FETCH(1));
23090974d292Smrg    shader[i++] = VTX_DWORD_PAD;
2310921a55d8Smrg    /* 52/53 - src */
23110974d292Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
23120974d292Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
23130974d292Smrg			     FETCH_WHOLE_QUAD(0),
23140974d292Smrg			     BUFFER_ID(0),
23150974d292Smrg			     SRC_GPR(0),
23160974d292Smrg			     SRC_REL(ABSOLUTE),
23170974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
23180974d292Smrg			     MEGA_FETCH_COUNT(8));
23190974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
23200974d292Smrg				 DST_REL(0),
23210974d292Smrg				 DST_SEL_X(SQ_SEL_X),
23220974d292Smrg				 DST_SEL_Y(SQ_SEL_Y),
23230974d292Smrg				 DST_SEL_Z(SQ_SEL_1),
23240974d292Smrg				 DST_SEL_W(SQ_SEL_0),
23250974d292Smrg				 USE_CONST_FIELDS(0),
23260974d292Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
23270974d292Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
23280974d292Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
23290974d292Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
23300974d292Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2331b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2332b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2333b13dfe66Smrg#else
2334b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2335b13dfe66Smrg#endif
23360974d292Smrg			     CONST_BUF_NO_STRIDE(0),
23370974d292Smrg			     MEGA_FETCH(0));
23380974d292Smrg    shader[i++] = VTX_DWORD_PAD;
23390974d292Smrg
23400974d292Smrg    return i;
23410974d292Smrg}
23420974d292Smrg
23430974d292Smrg/* comp ps --------------------------------------- */
23440974d292Smrgint R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
23450974d292Smrg{
23460974d292Smrg    int i = 0;
23470974d292Smrg
23480974d292Smrg    /* 0 */
23490974d292Smrg    shader[i++] = CF_DWORD0(ADDR(3));
23500974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23510974d292Smrg                            CF_CONST(0),
23520974d292Smrg                            COND(SQ_CF_COND_BOOL),
23530974d292Smrg                            I_COUNT(0),
23540974d292Smrg                            CALL_COUNT(0),
23550974d292Smrg                            END_OF_PROGRAM(0),
23560974d292Smrg                            VALID_PIXEL_MODE(0),
23570974d292Smrg                            CF_INST(SQ_CF_INST_CALL),
23580974d292Smrg                            WHOLE_QUAD_MODE(0),
23590974d292Smrg                            BARRIER(0));
23600974d292Smrg    /* 1 */
23610974d292Smrg    shader[i++] = CF_DWORD0(ADDR(7));
23620974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23630974d292Smrg                            CF_CONST(0),
23640974d292Smrg                            COND(SQ_CF_COND_NOT_BOOL),
23650974d292Smrg                            I_COUNT(0),
23660974d292Smrg                            CALL_COUNT(0),
23670974d292Smrg                            END_OF_PROGRAM(0),
23680974d292Smrg                            VALID_PIXEL_MODE(0),
23690974d292Smrg                            CF_INST(SQ_CF_INST_CALL),
23700974d292Smrg                            WHOLE_QUAD_MODE(0),
23710974d292Smrg                            BARRIER(0));
23720974d292Smrg    /* 2 */
23730974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
23740974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23750974d292Smrg                            CF_CONST(0),
23760974d292Smrg                            COND(SQ_CF_COND_ACTIVE),
23770974d292Smrg                            I_COUNT(0),
23780974d292Smrg                            CALL_COUNT(0),
23790974d292Smrg                            END_OF_PROGRAM(1),
23800974d292Smrg                            VALID_PIXEL_MODE(0),
23810974d292Smrg                            CF_INST(SQ_CF_INST_NOP),
23820974d292Smrg                            WHOLE_QUAD_MODE(0),
23830974d292Smrg                            BARRIER(1));
23840974d292Smrg
23850974d292Smrg    /* 3 - mask sub */
23860974d292Smrg    shader[i++] = CF_DWORD0(ADDR(14));
23870974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23880974d292Smrg			    CF_CONST(0),
23890974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
23900974d292Smrg			    I_COUNT(2),
23910974d292Smrg			    CALL_COUNT(0),
23920974d292Smrg			    END_OF_PROGRAM(0),
23930974d292Smrg			    VALID_PIXEL_MODE(0),
23940974d292Smrg			    CF_INST(SQ_CF_INST_TEX),
23950974d292Smrg			    WHOLE_QUAD_MODE(0),
23960974d292Smrg			    BARRIER(1));
23970974d292Smrg
23980974d292Smrg    /* 4 */
23990974d292Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(10),
24000974d292Smrg				KCACHE_BANK0(0),
24010974d292Smrg				KCACHE_BANK1(0),
24020974d292Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
24030974d292Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
24040974d292Smrg				KCACHE_ADDR0(0),
24050974d292Smrg				KCACHE_ADDR1(0),
24060974d292Smrg				I_COUNT(4),
24070974d292Smrg				USES_WATERFALL(0),
24080974d292Smrg				CF_INST(SQ_CF_INST_ALU),
24090974d292Smrg				WHOLE_QUAD_MODE(0),
24100974d292Smrg				BARRIER(1));
24110974d292Smrg
24120974d292Smrg    /* 5 */
24130974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
24140974d292Smrg					  TYPE(SQ_EXPORT_PIXEL),
24150974d292Smrg					  RW_GPR(2),
24160974d292Smrg					  RW_REL(ABSOLUTE),
24170974d292Smrg					  INDEX_GPR(0),
24180974d292Smrg					  ELEM_SIZE(1));
24190974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
24200974d292Smrg					       SRC_SEL_Y(SQ_SEL_Y),
24210974d292Smrg					       SRC_SEL_Z(SQ_SEL_Z),
24220974d292Smrg					       SRC_SEL_W(SQ_SEL_W),
24230974d292Smrg					       R6xx_ELEM_LOOP(0),
24240974d292Smrg					       BURST_COUNT(1),
24250974d292Smrg					       END_OF_PROGRAM(0),
24260974d292Smrg					       VALID_PIXEL_MODE(0),
24270974d292Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
24280974d292Smrg					       WHOLE_QUAD_MODE(0),
24290974d292Smrg					       BARRIER(1));
24300974d292Smrg    /* 6 */
24310974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
24320974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
24330974d292Smrg			    CF_CONST(0),
24340974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
24350974d292Smrg			    I_COUNT(0),
24360974d292Smrg			    CALL_COUNT(0),
24370974d292Smrg			    END_OF_PROGRAM(0),
24380974d292Smrg			    VALID_PIXEL_MODE(0),
24390974d292Smrg			    CF_INST(SQ_CF_INST_RETURN),
24400974d292Smrg			    WHOLE_QUAD_MODE(0),
24410974d292Smrg			    BARRIER(1));
24420974d292Smrg
24430974d292Smrg    /* 7 non-mask sub */
24440974d292Smrg    shader[i++] = CF_DWORD0(ADDR(18));
2445b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2446b7e1c893Smrg			    CF_CONST(0),
2447b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
2448b7e1c893Smrg			    I_COUNT(1),
2449b7e1c893Smrg			    CALL_COUNT(0),
2450b7e1c893Smrg			    END_OF_PROGRAM(0),
2451b7e1c893Smrg			    VALID_PIXEL_MODE(0),
2452b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
2453b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
2454b7e1c893Smrg			    BARRIER(1));
24550974d292Smrg    /* 8 */
2456b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2457b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
2458b7e1c893Smrg					  RW_GPR(0),
2459b7e1c893Smrg					  RW_REL(ABSOLUTE),
2460b7e1c893Smrg					  INDEX_GPR(0),
2461b7e1c893Smrg					  ELEM_SIZE(1));
2462b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2463b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
2464b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
2465b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
2466b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
2467b7e1c893Smrg					       BURST_COUNT(1),
24680974d292Smrg					       END_OF_PROGRAM(0),
2469b7e1c893Smrg					       VALID_PIXEL_MODE(0),
2470b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2471b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
2472b7e1c893Smrg					       BARRIER(1));
24730974d292Smrg    /* 9 */
24740974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
24750974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
24760974d292Smrg			    CF_CONST(0),
24770974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
24780974d292Smrg			    I_COUNT(0),
24790974d292Smrg			    CALL_COUNT(0),
24800974d292Smrg			    END_OF_PROGRAM(0),
24810974d292Smrg			    VALID_PIXEL_MODE(0),
24820974d292Smrg			    CF_INST(SQ_CF_INST_RETURN),
24830974d292Smrg			    WHOLE_QUAD_MODE(0),
24840974d292Smrg			    BARRIER(1));
24850974d292Smrg
24860974d292Smrg    /* 10 - alu 0 */
24870974d292Smrg    /* MUL gpr[2].x gpr[1].x gpr[0].x */
2488921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
24890974d292Smrg			     SRC0_REL(ABSOLUTE),
24900974d292Smrg			     SRC0_ELEM(ELEM_X),
24910974d292Smrg			     SRC0_NEG(0),
2492921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
24930974d292Smrg			     SRC1_REL(ABSOLUTE),
24940974d292Smrg			     SRC1_ELEM(ELEM_X),
24950974d292Smrg			     SRC1_NEG(0),
24960974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
24970974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
24980974d292Smrg			     LAST(0));
24990974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
25000974d292Smrg				 SRC0_ABS(0),
25010974d292Smrg				 SRC1_ABS(0),
25020974d292Smrg				 UPDATE_EXECUTE_MASK(0),
25030974d292Smrg				 UPDATE_PRED(0),
25040974d292Smrg				 WRITE_MASK(1),
25050974d292Smrg				 FOG_MERGE(0),
25060974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
25070974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
25080974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
25090974d292Smrg				 DST_GPR(2),
25100974d292Smrg				 DST_REL(ABSOLUTE),
25110974d292Smrg				 DST_ELEM(ELEM_X),
25120974d292Smrg				 CLAMP(1));
25130974d292Smrg    /* 11 - alu 1 */
25140974d292Smrg    /* MUL gpr[2].y gpr[1].y gpr[0].y */
2515921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
25160974d292Smrg			     SRC0_REL(ABSOLUTE),
25170974d292Smrg			     SRC0_ELEM(ELEM_Y),
25180974d292Smrg			     SRC0_NEG(0),
2519921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
25200974d292Smrg			     SRC1_REL(ABSOLUTE),
25210974d292Smrg			     SRC1_ELEM(ELEM_Y),
25220974d292Smrg			     SRC1_NEG(0),
25230974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
25240974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
25250974d292Smrg			     LAST(0));
25260974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
25270974d292Smrg				 SRC0_ABS(0),
25280974d292Smrg				 SRC1_ABS(0),
25290974d292Smrg				 UPDATE_EXECUTE_MASK(0),
25300974d292Smrg				 UPDATE_PRED(0),
25310974d292Smrg				 WRITE_MASK(1),
25320974d292Smrg				 FOG_MERGE(0),
25330974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
25340974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
25350974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
25360974d292Smrg				 DST_GPR(2),
25370974d292Smrg				 DST_REL(ABSOLUTE),
25380974d292Smrg				 DST_ELEM(ELEM_Y),
25390974d292Smrg				 CLAMP(1));
25400974d292Smrg    /* 12 - alu 2 */
25410974d292Smrg    /* MUL gpr[2].z gpr[1].z gpr[0].z */
2542921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
25430974d292Smrg			     SRC0_REL(ABSOLUTE),
25440974d292Smrg			     SRC0_ELEM(ELEM_Z),
25450974d292Smrg			     SRC0_NEG(0),
2546921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
25470974d292Smrg			     SRC1_REL(ABSOLUTE),
25480974d292Smrg			     SRC1_ELEM(ELEM_Z),
25490974d292Smrg			     SRC1_NEG(0),
25500974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
25510974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
25520974d292Smrg			     LAST(0));
25530974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
25540974d292Smrg				 SRC0_ABS(0),
25550974d292Smrg				 SRC1_ABS(0),
25560974d292Smrg				 UPDATE_EXECUTE_MASK(0),
25570974d292Smrg				 UPDATE_PRED(0),
25580974d292Smrg				 WRITE_MASK(1),
25590974d292Smrg				 FOG_MERGE(0),
25600974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
25610974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
25620974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
25630974d292Smrg				 DST_GPR(2),
25640974d292Smrg				 DST_REL(ABSOLUTE),
25650974d292Smrg				 DST_ELEM(ELEM_Z),
25660974d292Smrg				 CLAMP(1));
25670974d292Smrg    /* 13 - alu 3 */
25680974d292Smrg    /* MUL gpr[2].w gpr[1].w gpr[0].w */
2569921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
25700974d292Smrg			     SRC0_REL(ABSOLUTE),
25710974d292Smrg			     SRC0_ELEM(ELEM_W),
25720974d292Smrg			     SRC0_NEG(0),
2573921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
25740974d292Smrg			     SRC1_REL(ABSOLUTE),
25750974d292Smrg			     SRC1_ELEM(ELEM_W),
25760974d292Smrg			     SRC1_NEG(0),
25770974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
25780974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
25790974d292Smrg			     LAST(1));
25800974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
25810974d292Smrg				 SRC0_ABS(0),
25820974d292Smrg				 SRC1_ABS(0),
25830974d292Smrg				 UPDATE_EXECUTE_MASK(0),
25840974d292Smrg				 UPDATE_PRED(0),
25850974d292Smrg				 WRITE_MASK(1),
25860974d292Smrg				 FOG_MERGE(0),
25870974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
25880974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
25890974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
25900974d292Smrg				 DST_GPR(2),
25910974d292Smrg				 DST_REL(ABSOLUTE),
25920974d292Smrg				 DST_ELEM(ELEM_W),
25930974d292Smrg				 CLAMP(1));
2594b7e1c893Smrg
25950974d292Smrg    /* 14/15 - src - mask */
25960974d292Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
25970974d292Smrg			     BC_FRAC_MODE(0),
25980974d292Smrg			     FETCH_WHOLE_QUAD(0),
25990974d292Smrg			     RESOURCE_ID(0),
26000974d292Smrg			     SRC_GPR(0),
26010974d292Smrg			     SRC_REL(ABSOLUTE),
26020974d292Smrg			     R7xx_ALT_CONST(0));
26030974d292Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
26040974d292Smrg			     DST_REL(ABSOLUTE),
26050974d292Smrg			     DST_SEL_X(SQ_SEL_X),
26060974d292Smrg			     DST_SEL_Y(SQ_SEL_Y),
26070974d292Smrg			     DST_SEL_Z(SQ_SEL_Z),
26080974d292Smrg			     DST_SEL_W(SQ_SEL_W),
26090974d292Smrg			     LOD_BIAS(0),
26100974d292Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
26110974d292Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
26120974d292Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
26130974d292Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
26140974d292Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
26150974d292Smrg			     OFFSET_Y(0),
26160974d292Smrg			     OFFSET_Z(0),
26170974d292Smrg			     SAMPLER_ID(0),
26180974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
26190974d292Smrg			     SRC_SEL_Y(SQ_SEL_Y),
26200974d292Smrg			     SRC_SEL_Z(SQ_SEL_0),
26210974d292Smrg			     SRC_SEL_W(SQ_SEL_1));
26220974d292Smrg    shader[i++] = TEX_DWORD_PAD;
26230974d292Smrg    /* 16/17 - mask */
26240974d292Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
26250974d292Smrg			     BC_FRAC_MODE(0),
26260974d292Smrg			     FETCH_WHOLE_QUAD(0),
26270974d292Smrg			     RESOURCE_ID(1),
26280974d292Smrg			     SRC_GPR(1),
26290974d292Smrg			     SRC_REL(ABSOLUTE),
26300974d292Smrg			     R7xx_ALT_CONST(0));
26310974d292Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
26320974d292Smrg			     DST_REL(ABSOLUTE),
26330974d292Smrg			     DST_SEL_X(SQ_SEL_X),
26340974d292Smrg			     DST_SEL_Y(SQ_SEL_Y),
26350974d292Smrg			     DST_SEL_Z(SQ_SEL_Z),
26360974d292Smrg			     DST_SEL_W(SQ_SEL_W),
26370974d292Smrg			     LOD_BIAS(0),
26380974d292Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
26390974d292Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
26400974d292Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
26410974d292Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
26420974d292Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
26430974d292Smrg			     OFFSET_Y(0),
26440974d292Smrg			     OFFSET_Z(0),
26450974d292Smrg			     SAMPLER_ID(1),
26460974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
26470974d292Smrg			     SRC_SEL_Y(SQ_SEL_Y),
26480974d292Smrg			     SRC_SEL_Z(SQ_SEL_0),
26490974d292Smrg			     SRC_SEL_W(SQ_SEL_1));
26500974d292Smrg    shader[i++] = TEX_DWORD_PAD;
2651b7e1c893Smrg
26520974d292Smrg    /* 18/19 - src - non-mask */
2653b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2654b7e1c893Smrg			     BC_FRAC_MODE(0),
2655b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
2656b7e1c893Smrg			     RESOURCE_ID(0),
2657b7e1c893Smrg			     SRC_GPR(0),
2658b7e1c893Smrg			     SRC_REL(ABSOLUTE),
2659b7e1c893Smrg			     R7xx_ALT_CONST(0));
2660b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
2661b7e1c893Smrg			     DST_REL(ABSOLUTE),
2662b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X),
2663b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y),
2664b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z),
2665b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W),
2666b7e1c893Smrg			     LOD_BIAS(0),
2667b7e1c893Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
2668b7e1c893Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
2669b7e1c893Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
2670b7e1c893Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
2671b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2672b7e1c893Smrg			     OFFSET_Y(0),
2673b7e1c893Smrg			     OFFSET_Z(0),
2674b7e1c893Smrg			     SAMPLER_ID(0),
2675b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
2676b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
2677b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
2678b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
2679b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
2680b7e1c893Smrg
2681b7e1c893Smrg    return i;
2682b7e1c893Smrg}
2683