1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "radeon.h"
34b7e1c893Smrg#include "r600_shader.h"
35b7e1c893Smrg#include "r600_reg.h"
36b7e1c893Smrg
37b7e1c893Smrg/* solid vs --------------------------------------- */
38b7e1c893Smrgint R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39b7e1c893Smrg{
40b7e1c893Smrg    int i = 0;
41b7e1c893Smrg
42b7e1c893Smrg    /* 0 */
43b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
44b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
45b7e1c893Smrg			    CF_CONST(0),
46b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
47b7e1c893Smrg			    I_COUNT(1),
48b7e1c893Smrg			    CALL_COUNT(0),
49b7e1c893Smrg			    END_OF_PROGRAM(0),
50b7e1c893Smrg			    VALID_PIXEL_MODE(0),
51b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
52b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
53b7e1c893Smrg			    BARRIER(1));
54b7e1c893Smrg    /* 1 */
55b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
57b7e1c893Smrg					  RW_GPR(1),
58b7e1c893Smrg					  RW_REL(ABSOLUTE),
59b7e1c893Smrg					  INDEX_GPR(0),
60b7e1c893Smrg					  ELEM_SIZE(0));
61b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
63b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
64b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
65b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
66b7e1c893Smrg					       BURST_COUNT(1),
67b7e1c893Smrg					       END_OF_PROGRAM(0),
68b7e1c893Smrg					       VALID_PIXEL_MODE(0),
69b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
71b7e1c893Smrg					       BARRIER(1));
72b7e1c893Smrg    /* 2 - always export a param whether it's used or not */
73b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
75b7e1c893Smrg					  RW_GPR(0),
76b7e1c893Smrg					  RW_REL(ABSOLUTE),
77b7e1c893Smrg					  INDEX_GPR(0),
78b7e1c893Smrg					  ELEM_SIZE(0));
79b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
81b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
82b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
83b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
84b7e1c893Smrg					       BURST_COUNT(0),
85b7e1c893Smrg					       END_OF_PROGRAM(1),
86b7e1c893Smrg					       VALID_PIXEL_MODE(0),
87b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
88b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
89b7e1c893Smrg					       BARRIER(0));
90b7e1c893Smrg    /* 3 - padding */
91b7e1c893Smrg    shader[i++] = 0x00000000;
92b7e1c893Smrg    shader[i++] = 0x00000000;
93b7e1c893Smrg    /* 4/5 */
94b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
97b7e1c893Smrg			     BUFFER_ID(0),
98b7e1c893Smrg			     SRC_GPR(0),
99b7e1c893Smrg			     SRC_REL(ABSOLUTE),
100b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
101b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
102b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103b7e1c893Smrg				 DST_REL(0),
104b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
105b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
106b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
107b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
108b7e1c893Smrg				 USE_CONST_FIELDS(0),
109ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
110ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
111ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
112b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
114b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
115b13dfe66Smrg			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
116b13dfe66Smrg#else
117b13dfe66Smrg			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
118b13dfe66Smrg#endif
119b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
120b7e1c893Smrg			     MEGA_FETCH(1));
121b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
122b7e1c893Smrg
123b7e1c893Smrg    return i;
124b7e1c893Smrg}
125b7e1c893Smrg
126b7e1c893Smrg/* solid ps --------------------------------------- */
127b7e1c893Smrgint R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
128b7e1c893Smrg{
129b7e1c893Smrg    int i = 0;
130b7e1c893Smrg
131b7e1c893Smrg    /* 0 */
132b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(2),
133b7e1c893Smrg				KCACHE_BANK0(0),
134b7e1c893Smrg				KCACHE_BANK1(0),
135b7e1c893Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
136b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
137b7e1c893Smrg				KCACHE_ADDR0(0),
138b7e1c893Smrg				KCACHE_ADDR1(0),
139b7e1c893Smrg				I_COUNT(4),
140b7e1c893Smrg				USES_WATERFALL(0),
141b7e1c893Smrg				CF_INST(SQ_CF_INST_ALU),
142b7e1c893Smrg				WHOLE_QUAD_MODE(0),
143b7e1c893Smrg				BARRIER(1));
144b7e1c893Smrg    /* 1 */
145b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
146b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
147b7e1c893Smrg					  RW_GPR(0),
148b7e1c893Smrg					  RW_REL(ABSOLUTE),
149b7e1c893Smrg					  INDEX_GPR(0),
150b7e1c893Smrg					  ELEM_SIZE(1));
151b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
152b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
153b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
154b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
155b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
156b7e1c893Smrg					       BURST_COUNT(1),
157b7e1c893Smrg					       END_OF_PROGRAM(1),
158b7e1c893Smrg					       VALID_PIXEL_MODE(0),
159b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
160b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
161b7e1c893Smrg					       BARRIER(1));
162b7e1c893Smrg
163b7e1c893Smrg    /* 2 */
164921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
165b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
166b7e1c893Smrg			     SRC0_ELEM(ELEM_X),
167b7e1c893Smrg			     SRC0_NEG(0),
168921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
169b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
170b7e1c893Smrg			     SRC1_ELEM(ELEM_X),
171b7e1c893Smrg			     SRC1_NEG(0),
172b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
173b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
174b7e1c893Smrg			     LAST(0));
175b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
176b7e1c893Smrg				 SRC0_ABS(0),
177b7e1c893Smrg				 SRC1_ABS(0),
178b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
179b7e1c893Smrg				 UPDATE_PRED(0),
180b7e1c893Smrg				 WRITE_MASK(1),
181b7e1c893Smrg				 FOG_MERGE(0),
182b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
183b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
184b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
185b7e1c893Smrg				 DST_GPR(0),
186b7e1c893Smrg				 DST_REL(ABSOLUTE),
187b7e1c893Smrg				 DST_ELEM(ELEM_X),
188b7e1c893Smrg				 CLAMP(1));
189b7e1c893Smrg    /* 3 */
190921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
191b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
192b7e1c893Smrg			     SRC0_ELEM(ELEM_Y),
193b7e1c893Smrg			     SRC0_NEG(0),
194921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
195b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
196b7e1c893Smrg			     SRC1_ELEM(ELEM_Y),
197b7e1c893Smrg			     SRC1_NEG(0),
198b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
199b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
200b7e1c893Smrg			     LAST(0));
201b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
202b7e1c893Smrg				 SRC0_ABS(0),
203b7e1c893Smrg				 SRC1_ABS(0),
204b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
205b7e1c893Smrg				 UPDATE_PRED(0),
206b7e1c893Smrg				 WRITE_MASK(1),
207b7e1c893Smrg				 FOG_MERGE(0),
208b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
209b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
210b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
211b7e1c893Smrg				 DST_GPR(0),
212b7e1c893Smrg				 DST_REL(ABSOLUTE),
213b7e1c893Smrg				 DST_ELEM(ELEM_Y),
214b7e1c893Smrg				 CLAMP(1));
215b7e1c893Smrg    /* 4 */
216921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
217b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
218b7e1c893Smrg			     SRC0_ELEM(ELEM_Z),
219b7e1c893Smrg			     SRC0_NEG(0),
220921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
221b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
222b7e1c893Smrg			     SRC1_ELEM(ELEM_Z),
223b7e1c893Smrg			     SRC1_NEG(0),
224b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
225b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
226b7e1c893Smrg			     LAST(0));
227b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
228b7e1c893Smrg				 SRC0_ABS(0),
229b7e1c893Smrg				 SRC1_ABS(0),
230b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
231b7e1c893Smrg				 UPDATE_PRED(0),
232b7e1c893Smrg				 WRITE_MASK(1),
233b7e1c893Smrg				 FOG_MERGE(0),
234b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
235b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
236b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
237b7e1c893Smrg				 DST_GPR(0),
238b7e1c893Smrg				 DST_REL(ABSOLUTE),
239b7e1c893Smrg				 DST_ELEM(ELEM_Z),
240b7e1c893Smrg				 CLAMP(1));
241b7e1c893Smrg    /* 5 */
242921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
243b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
244b7e1c893Smrg			     SRC0_ELEM(ELEM_W),
245b7e1c893Smrg			     SRC0_NEG(0),
246921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
247b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
248b7e1c893Smrg			     SRC1_ELEM(ELEM_W),
249b7e1c893Smrg			     SRC1_NEG(0),
250b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
251b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
252b7e1c893Smrg			     LAST(1));
253b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
254b7e1c893Smrg				 SRC0_ABS(0),
255b7e1c893Smrg				 SRC1_ABS(0),
256b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
257b7e1c893Smrg				 UPDATE_PRED(0),
258b7e1c893Smrg				 WRITE_MASK(1),
259b7e1c893Smrg				 FOG_MERGE(0),
260b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
261b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
262b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
263b7e1c893Smrg				 DST_GPR(0),
264b7e1c893Smrg				 DST_REL(ABSOLUTE),
265b7e1c893Smrg				 DST_ELEM(ELEM_W),
266b7e1c893Smrg				 CLAMP(1));
267b7e1c893Smrg
268b7e1c893Smrg    return i;
269b7e1c893Smrg}
270b7e1c893Smrg
271b7e1c893Smrg/* copy vs --------------------------------------- */
272b7e1c893Smrgint R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
273b7e1c893Smrg{
274b7e1c893Smrg    int i = 0;
275b7e1c893Smrg
276b7e1c893Smrg    /* 0 */
277b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
278b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
279b7e1c893Smrg			    CF_CONST(0),
280b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
281b7e1c893Smrg			    I_COUNT(2),
282b7e1c893Smrg			    CALL_COUNT(0),
283b7e1c893Smrg			    END_OF_PROGRAM(0),
284b7e1c893Smrg			    VALID_PIXEL_MODE(0),
285b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
286b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
287b7e1c893Smrg			    BARRIER(1));
288b7e1c893Smrg    /* 1 */
289b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
290b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
291b7e1c893Smrg					  RW_GPR(1),
292b7e1c893Smrg					  RW_REL(ABSOLUTE),
293b7e1c893Smrg					  INDEX_GPR(0),
294b7e1c893Smrg					  ELEM_SIZE(0));
295b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
296b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
297b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
298b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
299b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
300b7e1c893Smrg					       BURST_COUNT(0),
301b7e1c893Smrg					       END_OF_PROGRAM(0),
302b7e1c893Smrg					       VALID_PIXEL_MODE(0),
303b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
304b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
305b7e1c893Smrg					       BARRIER(1));
306b7e1c893Smrg    /* 2 */
307b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
308b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
309b7e1c893Smrg					  RW_GPR(0),
310b7e1c893Smrg					  RW_REL(ABSOLUTE),
311b7e1c893Smrg					  INDEX_GPR(0),
312b7e1c893Smrg					  ELEM_SIZE(0));
313b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
314b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
315b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
316b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
317b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
318b7e1c893Smrg					       BURST_COUNT(0),
319b7e1c893Smrg					       END_OF_PROGRAM(1),
320b7e1c893Smrg					       VALID_PIXEL_MODE(0),
321b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
322b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
323b7e1c893Smrg					       BARRIER(0));
324b7e1c893Smrg    /* 3 */
325b7e1c893Smrg    shader[i++] = 0x00000000;
326b7e1c893Smrg    shader[i++] = 0x00000000;
327b7e1c893Smrg    /* 4/5 */
328b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
329b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
330b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
331b7e1c893Smrg			     BUFFER_ID(0),
332b7e1c893Smrg			     SRC_GPR(0),
333b7e1c893Smrg			     SRC_REL(ABSOLUTE),
334b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
335b7e1c893Smrg			     MEGA_FETCH_COUNT(16));
336b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
337b7e1c893Smrg				 DST_REL(0),
338b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
339b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
340b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
341b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
342b7e1c893Smrg				 USE_CONST_FIELDS(0),
343ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
344ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
345ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
346b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
347b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
348b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
349b13dfe66Smrg			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
350b13dfe66Smrg#else
351b13dfe66Smrg			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
352b13dfe66Smrg#endif
353b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
354b7e1c893Smrg			     MEGA_FETCH(1));
355b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
356b7e1c893Smrg    /* 6/7 */
357b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
358b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
359b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
360b7e1c893Smrg			     BUFFER_ID(0),
361b7e1c893Smrg			     SRC_GPR(0),
362b7e1c893Smrg			     SRC_REL(ABSOLUTE),
363b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
364b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
365b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
366b7e1c893Smrg				 DST_REL(0),
367b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
368b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
369b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
370b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
371b7e1c893Smrg				 USE_CONST_FIELDS(0),
372ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
373ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
374ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
375b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
376b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
377b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
378b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
379b13dfe66Smrg#else
380b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
381b13dfe66Smrg#endif
382b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
383b7e1c893Smrg			     MEGA_FETCH(0));
384b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
385b7e1c893Smrg
386b7e1c893Smrg    return i;
387b7e1c893Smrg}
388b7e1c893Smrg
389b7e1c893Smrg/* copy ps --------------------------------------- */
390b7e1c893Smrgint R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
391b7e1c893Smrg{
392b7e1c893Smrg    int i=0;
393b7e1c893Smrg
394b7e1c893Smrg    /* CF INST 0 */
395b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(2));
396b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
397b7e1c893Smrg			    CF_CONST(0),
398b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
399b7e1c893Smrg			    I_COUNT(1),
400b7e1c893Smrg			    CALL_COUNT(0),
401b7e1c893Smrg			    END_OF_PROGRAM(0),
402b7e1c893Smrg			    VALID_PIXEL_MODE(0),
403b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
404b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
405b7e1c893Smrg			    BARRIER(1));
406b7e1c893Smrg    /* CF INST 1 */
407b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
408b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
409b7e1c893Smrg					  RW_GPR(0),
410b7e1c893Smrg					  RW_REL(ABSOLUTE),
411b7e1c893Smrg					  INDEX_GPR(0),
412b7e1c893Smrg					  ELEM_SIZE(1));
413b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
414b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
415b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
416b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
417b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
418b7e1c893Smrg					       BURST_COUNT(1),
419b7e1c893Smrg					       END_OF_PROGRAM(1),
420b7e1c893Smrg					       VALID_PIXEL_MODE(0),
421b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
422b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
423b7e1c893Smrg					       BARRIER(1));
424b7e1c893Smrg    /* TEX INST 0 */
425b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
426b7e1c893Smrg			     BC_FRAC_MODE(0),
427b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
428b7e1c893Smrg			     RESOURCE_ID(0),
429b7e1c893Smrg			     SRC_GPR(0),
430b7e1c893Smrg			     SRC_REL(ABSOLUTE),
431b7e1c893Smrg			     R7xx_ALT_CONST(0));
432b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
433b7e1c893Smrg			     DST_REL(ABSOLUTE),
434b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X), /* R */
435b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y), /* G */
436b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z), /* B */
437b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W), /* A */
438b7e1c893Smrg			     LOD_BIAS(0),
439b7e1c893Smrg			     COORD_TYPE_X(TEX_UNNORMALIZED),
440b7e1c893Smrg			     COORD_TYPE_Y(TEX_UNNORMALIZED),
441b7e1c893Smrg			     COORD_TYPE_Z(TEX_UNNORMALIZED),
442b7e1c893Smrg			     COORD_TYPE_W(TEX_UNNORMALIZED));
443b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
444b7e1c893Smrg			     OFFSET_Y(0),
445b7e1c893Smrg			     OFFSET_Z(0),
446b7e1c893Smrg			     SAMPLER_ID(0),
447b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
448b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
449b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
450b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
451b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
452b7e1c893Smrg
453b7e1c893Smrg    return i;
454b7e1c893Smrg}
455b7e1c893Smrg
456b7e1c893Smrg/*
457b7e1c893Smrg * ; xv vertex shader
458b7e1c893Smrg * 00 VTX: ADDR(4) CNT(2)
459b7e1c893Smrg *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT)
460b7e1c893Smrg *          FORMAT_COMP(SIGNED)
461b7e1c893Smrg *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
462b7e1c893Smrg *          FORMAT_COMP(SIGNED)
463b7e1c893Smrg * 01 EXP_DONE: POS0, R1
464b7e1c893Smrg * 02 EXP_DONE: PARAM0, R0  NO_BARRIER
465b7e1c893Smrg * END_OF_PROGRAM
466b7e1c893Smrg */
467b7e1c893Smrgint R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
468b7e1c893Smrg{
469b7e1c893Smrg    int i = 0;
470b7e1c893Smrg
471b7e1c893Smrg    /* 0 */
472ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(6));
473b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
474b7e1c893Smrg                            CF_CONST(0),
475b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
476b7e1c893Smrg                            I_COUNT(2),
477b7e1c893Smrg                            CALL_COUNT(0),
478b7e1c893Smrg                            END_OF_PROGRAM(0),
479b7e1c893Smrg                            VALID_PIXEL_MODE(0),
480b7e1c893Smrg                            CF_INST(SQ_CF_INST_VTX),
481b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
482b7e1c893Smrg                            BARRIER(1));
483ad43ddacSmrg
484ad43ddacSmrg    /* 1 - ALU */
485ad43ddacSmrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
486ad43ddacSmrg				KCACHE_BANK0(0),
487ad43ddacSmrg				KCACHE_BANK1(0),
488ad43ddacSmrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
489ad43ddacSmrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
490ad43ddacSmrg				KCACHE_ADDR0(0),
491ad43ddacSmrg				KCACHE_ADDR1(0),
492ad43ddacSmrg				I_COUNT(2),
493ad43ddacSmrg				USES_WATERFALL(0),
494ad43ddacSmrg				CF_INST(SQ_CF_INST_ALU),
495ad43ddacSmrg				WHOLE_QUAD_MODE(0),
496ad43ddacSmrg				BARRIER(1));
497ad43ddacSmrg
498ad43ddacSmrg    /* 2 */
499b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
500b7e1c893Smrg                                          TYPE(SQ_EXPORT_POS),
501b7e1c893Smrg                                          RW_GPR(1),
502b7e1c893Smrg                                          RW_REL(ABSOLUTE),
503b7e1c893Smrg                                          INDEX_GPR(0),
504b7e1c893Smrg                                          ELEM_SIZE(3));
505b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
506b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
507b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
508b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
509b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
510b7e1c893Smrg                                               BURST_COUNT(1),
511b7e1c893Smrg                                               END_OF_PROGRAM(0),
512b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
513b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
514b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
515b7e1c893Smrg                                               BARRIER(1));
516ad43ddacSmrg    /* 3 */
517b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
518b7e1c893Smrg                                          TYPE(SQ_EXPORT_PARAM),
519b7e1c893Smrg                                          RW_GPR(0),
520b7e1c893Smrg                                          RW_REL(ABSOLUTE),
521b7e1c893Smrg                                          INDEX_GPR(0),
522b7e1c893Smrg                                          ELEM_SIZE(3));
523b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
524b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
525b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
526b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
527b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
528b7e1c893Smrg                                               BURST_COUNT(1),
529b7e1c893Smrg                                               END_OF_PROGRAM(1),
530b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
531b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
532b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
533b7e1c893Smrg                                               BARRIER(0));
534ad43ddacSmrg
535ad43ddacSmrg
536ad43ddacSmrg    /* 4 texX / w */
537921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
538ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
539ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
540ad43ddacSmrg                             SRC0_NEG(0),
541921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
542ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
543ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
544ad43ddacSmrg                             SRC1_NEG(0),
545ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
546ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
547ad43ddacSmrg                             LAST(0));
548ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
549ad43ddacSmrg                                 SRC0_ABS(0),
550ad43ddacSmrg                                 SRC1_ABS(0),
551ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
552ad43ddacSmrg                                 UPDATE_PRED(0),
553ad43ddacSmrg                                 WRITE_MASK(1),
554ad43ddacSmrg                                 FOG_MERGE(0),
555ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
556ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
557ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
558ad43ddacSmrg                                 DST_GPR(0),
559ad43ddacSmrg                                 DST_REL(ABSOLUTE),
560ad43ddacSmrg                                 DST_ELEM(ELEM_X),
561ad43ddacSmrg                                 CLAMP(0));
562ad43ddacSmrg
563ad43ddacSmrg    /* 5 texY / h */
564921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
565ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
566ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
567ad43ddacSmrg                             SRC0_NEG(0),
568921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
569ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
570ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
571ad43ddacSmrg                             SRC1_NEG(0),
572ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
573ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
574ad43ddacSmrg                             LAST(1));
575ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
576ad43ddacSmrg                                 SRC0_ABS(0),
577ad43ddacSmrg                                 SRC1_ABS(0),
578ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
579ad43ddacSmrg                                 UPDATE_PRED(0),
580ad43ddacSmrg                                 WRITE_MASK(1),
581ad43ddacSmrg                                 FOG_MERGE(0),
582ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
583ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
584ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
585ad43ddacSmrg                                 DST_GPR(0),
586ad43ddacSmrg                                 DST_REL(ABSOLUTE),
587ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
588ad43ddacSmrg                                 CLAMP(0));
589ad43ddacSmrg
590ad43ddacSmrg    /* 6/7 */
591b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
592b7e1c893Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
593b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
594b7e1c893Smrg                             BUFFER_ID(0),
595b7e1c893Smrg                             SRC_GPR(0),
596b7e1c893Smrg                             SRC_REL(ABSOLUTE),
597b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
598b7e1c893Smrg                             MEGA_FETCH_COUNT(16));
599b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
600b7e1c893Smrg                                 DST_REL(ABSOLUTE),
601b7e1c893Smrg                                 DST_SEL_X(SQ_SEL_X),
602b7e1c893Smrg                                 DST_SEL_Y(SQ_SEL_Y),
603b7e1c893Smrg                                 DST_SEL_Z(SQ_SEL_0),
604b7e1c893Smrg                                 DST_SEL_W(SQ_SEL_1),
605b7e1c893Smrg                                 USE_CONST_FIELDS(0),
606b7e1c893Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
607ad43ddacSmrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
608b7e1c893Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
609b7e1c893Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
610b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
611b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
612b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
613b13dfe66Smrg#else
614b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
615b13dfe66Smrg#endif
616b7e1c893Smrg                             CONST_BUF_NO_STRIDE(0),
617b7e1c893Smrg                             MEGA_FETCH(1));
618b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
619ad43ddacSmrg    /* 8/9 */
620b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
621b7e1c893Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
622b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
623b7e1c893Smrg                             BUFFER_ID(0),
624b7e1c893Smrg                             SRC_GPR(0),
625b7e1c893Smrg                             SRC_REL(ABSOLUTE),
626b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
627b7e1c893Smrg                             MEGA_FETCH_COUNT(8));
628b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
629b7e1c893Smrg                                 DST_REL(ABSOLUTE),
630b7e1c893Smrg                                 DST_SEL_X(SQ_SEL_X),
631b7e1c893Smrg                                 DST_SEL_Y(SQ_SEL_Y),
632b7e1c893Smrg                                 DST_SEL_Z(SQ_SEL_0),
633b7e1c893Smrg                                 DST_SEL_W(SQ_SEL_1),
634b7e1c893Smrg                                 USE_CONST_FIELDS(0),
635b7e1c893Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
636ad43ddacSmrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
637b7e1c893Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
638b7e1c893Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
639b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
640b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
641b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
642b13dfe66Smrg#else
643b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
644b13dfe66Smrg#endif
645b7e1c893Smrg                             CONST_BUF_NO_STRIDE(0),
646b7e1c893Smrg                             MEGA_FETCH(0));
647b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
648b7e1c893Smrg
649b7e1c893Smrg    return i;
650b7e1c893Smrg}
651b7e1c893Smrg
652b7e1c893Smrgint R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
653b7e1c893Smrg{
654b7e1c893Smrg    int i = 0;
655b7e1c893Smrg
656b7e1c893Smrg    /* 0 */
657ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(16));
658b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
659b7e1c893Smrg                            CF_CONST(0),
660b7e1c893Smrg                            COND(SQ_CF_COND_BOOL),
661b7e1c893Smrg                            I_COUNT(0),
662b7e1c893Smrg                            CALL_COUNT(0),
663b7e1c893Smrg                            END_OF_PROGRAM(0),
664b7e1c893Smrg                            VALID_PIXEL_MODE(0),
665b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
666b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
667b7e1c893Smrg                            BARRIER(0));
668b7e1c893Smrg    /* 1 */
669ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(24));
670b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
671b7e1c893Smrg                            CF_CONST(0),
672b7e1c893Smrg                            COND(SQ_CF_COND_NOT_BOOL),
673b7e1c893Smrg                            I_COUNT(0),
674b7e1c893Smrg                            CALL_COUNT(0),
675b7e1c893Smrg                            END_OF_PROGRAM(0),
676b7e1c893Smrg                            VALID_PIXEL_MODE(0),
677b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
678b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
679b7e1c893Smrg                            BARRIER(0));
680b7e1c893Smrg    /* 2 */
681b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
682b7e1c893Smrg                                KCACHE_BANK0(0),
683b7e1c893Smrg                                KCACHE_BANK1(0),
684b7e1c893Smrg                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
685b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
686b7e1c893Smrg                                KCACHE_ADDR0(0),
687b7e1c893Smrg                                KCACHE_ADDR1(0),
688ad43ddacSmrg                                I_COUNT(12),
689b7e1c893Smrg                                USES_WATERFALL(0),
690b7e1c893Smrg                                CF_INST(SQ_CF_INST_ALU),
691b7e1c893Smrg                                WHOLE_QUAD_MODE(0),
692b7e1c893Smrg                                BARRIER(1));
693b7e1c893Smrg    /* 3 */
694b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
695b7e1c893Smrg                                          TYPE(SQ_EXPORT_PIXEL),
696b7e1c893Smrg                                          RW_GPR(2),
697b7e1c893Smrg                                          RW_REL(ABSOLUTE),
698b7e1c893Smrg                                          INDEX_GPR(0),
699b7e1c893Smrg                                          ELEM_SIZE(3));
700b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
701b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
702b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
703b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
704b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
705b7e1c893Smrg                                               BURST_COUNT(1),
706b7e1c893Smrg                                               END_OF_PROGRAM(1),
707b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
708b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
709b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
710b7e1c893Smrg                                               BARRIER(1));
711ad43ddacSmrg    /* 4,5,6,7 */
712ad43ddacSmrg    /* r2.x = MAD(c0.w, r1.x, c0.x) */
713921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
714b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
715ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
716b7e1c893Smrg                             SRC0_NEG(0),
717921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
718b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
719b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
720b7e1c893Smrg                             SRC1_NEG(0),
721b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
722b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
723b7e1c893Smrg                             LAST(0));
724921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
725b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
726ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
727b7e1c893Smrg                                 SRC2_NEG(0),
728b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
729b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
730ad43ddacSmrg                                 DST_GPR(2),
731b7e1c893Smrg                                 DST_REL(ABSOLUTE),
732b7e1c893Smrg                                 DST_ELEM(ELEM_X),
733ad43ddacSmrg                                 CLAMP(0));
734ad43ddacSmrg    /* r2.y = MAD(c0.w, r1.x, c0.y) */
735921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
736b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
737ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
738b7e1c893Smrg                             SRC0_NEG(0),
739921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
740b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
741ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
742b7e1c893Smrg                             SRC1_NEG(0),
743b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
744b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
745b7e1c893Smrg                             LAST(0));
746921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
747b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
748ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
749b7e1c893Smrg                                 SRC2_NEG(0),
750b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
751b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
752ad43ddacSmrg                                 DST_GPR(2),
753b7e1c893Smrg                                 DST_REL(ABSOLUTE),
754b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
755b7e1c893Smrg                                 CLAMP(0));
756ad43ddacSmrg    /* r2.z = MAD(c0.w, r1.x, c0.z) */
757921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
758b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
759ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
760b7e1c893Smrg                             SRC0_NEG(0),
761921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
762b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
763ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
764b7e1c893Smrg                             SRC1_NEG(0),
765b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
766b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
767b7e1c893Smrg                             LAST(0));
768921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
769b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
770ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
771b7e1c893Smrg                                 SRC2_NEG(0),
772b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
773b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
774ad43ddacSmrg                                 DST_GPR(2),
775b7e1c893Smrg                                 DST_REL(ABSOLUTE),
776b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
777b7e1c893Smrg                                 CLAMP(0));
778ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
779b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
780b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
781b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
782b7e1c893Smrg                             SRC0_NEG(0),
783b7e1c893Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
784b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
785b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
786b7e1c893Smrg                             SRC1_NEG(0),
787b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
788b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
789b7e1c893Smrg                             LAST(1));
790ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
791ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
792ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
793ad43ddacSmrg                                 SRC2_NEG(0),
794ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
795b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
796ad43ddacSmrg                                 DST_GPR(2),
797b7e1c893Smrg                                 DST_REL(ABSOLUTE),
798b7e1c893Smrg                                 DST_ELEM(ELEM_W),
799b7e1c893Smrg                                 CLAMP(0));
800ad43ddacSmrg
801ad43ddacSmrg    /* 8,9,10,11 */
802ad43ddacSmrg    /* r2.x = MAD(c1.x, r1.y, pv.x) */
803921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
804b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
805b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
806b7e1c893Smrg                             SRC0_NEG(0),
807921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
808b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
809ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
810b7e1c893Smrg                             SRC1_NEG(0),
811b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
812b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
813b7e1c893Smrg                             LAST(0));
814ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
815ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
816ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
817ad43ddacSmrg                                 SRC2_NEG(0),
818ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
819ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
820b7e1c893Smrg                                 DST_GPR(2),
821b7e1c893Smrg                                 DST_REL(ABSOLUTE),
822b7e1c893Smrg                                 DST_ELEM(ELEM_X),
823ad43ddacSmrg                                 CLAMP(0));
824ad43ddacSmrg    /* r2.y = MAD(c1.y, r1.y, pv.y) */
825921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
826b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
827b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
828b7e1c893Smrg                             SRC0_NEG(0),
829921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
830b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
831b7e1c893Smrg                             SRC1_ELEM(ELEM_Y),
832b7e1c893Smrg                             SRC1_NEG(0),
833b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
834b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
835b7e1c893Smrg                             LAST(0));
836ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
837ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
838ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
839ad43ddacSmrg                                 SRC2_NEG(0),
840ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
841ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
842ad43ddacSmrg                                 DST_GPR(2),
843b7e1c893Smrg                                 DST_REL(ABSOLUTE),
844b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
845ad43ddacSmrg                                 CLAMP(0));
846ad43ddacSmrg    /* r2.z = MAD(c1.z, r1.y, pv.z) */
847921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
848b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
849b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
850b7e1c893Smrg                             SRC0_NEG(0),
851921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
852b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
853ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
854b7e1c893Smrg                             SRC1_NEG(0),
855b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
856b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
857b7e1c893Smrg                             LAST(0));
858ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
859ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
860ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
861ad43ddacSmrg                                 SRC2_NEG(0),
862ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
863ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
864ad43ddacSmrg                                 DST_GPR(2),
865b7e1c893Smrg                                 DST_REL(ABSOLUTE),
866b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
867ad43ddacSmrg                                 CLAMP(0));
868ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
869ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
870b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
871ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
872b7e1c893Smrg                             SRC0_NEG(0),
873ad43ddacSmrg                             SRC1_SEL(SQ_ALU_SRC_0),
874b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
875ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
876b7e1c893Smrg                             SRC1_NEG(0),
877b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
878b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
879b7e1c893Smrg                             LAST(1));
880ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
881ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
882ad43ddacSmrg                                 SRC2_ELEM(ELEM_W),
883ad43ddacSmrg                                 SRC2_NEG(0),
884ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
885ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
886ad43ddacSmrg                                 DST_GPR(2),
887b7e1c893Smrg                                 DST_REL(ABSOLUTE),
888b7e1c893Smrg                                 DST_ELEM(ELEM_W),
889ad43ddacSmrg                                 CLAMP(0));
890ad43ddacSmrg    /* 12,13,14,15 */
891ad43ddacSmrg    /* r2.x = MAD(c2.x, r1.z, pv.x) */
892921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
893b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
894b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
895b7e1c893Smrg                             SRC0_NEG(0),
896921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
897b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
898ad43ddacSmrg                             SRC1_ELEM(ELEM_Z),
899b7e1c893Smrg                             SRC1_NEG(0),
900b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
901b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
902b7e1c893Smrg                             LAST(0));
903ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
904ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
905ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
906ad43ddacSmrg                                 SRC2_NEG(0),
907ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
908ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
909ad43ddacSmrg                                 DST_GPR(2),
910b7e1c893Smrg                                 DST_REL(ABSOLUTE),
911b7e1c893Smrg                                 DST_ELEM(ELEM_X),
912b7e1c893Smrg                                 CLAMP(1));
913ad43ddacSmrg    /* r2.y = MAD(c2.y, r1.z, pv.y) */
914921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
915b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
916b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
917b7e1c893Smrg                             SRC0_NEG(0),
918921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
919b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
920ad43ddacSmrg                             SRC1_ELEM(ELEM_Z),
921b7e1c893Smrg                             SRC1_NEG(0),
922b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
923b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
924b7e1c893Smrg                             LAST(0));
925ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
926ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
927ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
928ad43ddacSmrg                                 SRC2_NEG(0),
929ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
930ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
931b7e1c893Smrg                                 DST_GPR(2),
932b7e1c893Smrg                                 DST_REL(ABSOLUTE),
933b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
934b7e1c893Smrg                                 CLAMP(1));
935ad43ddacSmrg    /* r2.z = MAD(c2.z, r1.z, pv.z) */
936921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
937b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
938b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
939b7e1c893Smrg                             SRC0_NEG(0),
940921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
941b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
942b7e1c893Smrg                             SRC1_ELEM(ELEM_Z),
943b7e1c893Smrg                             SRC1_NEG(0),
944b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
945b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
946b7e1c893Smrg                             LAST(0));
947ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
948ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
949ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
950ad43ddacSmrg                                 SRC2_NEG(0),
951ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
952ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
953ad43ddacSmrg                                 DST_GPR(2),
954b7e1c893Smrg                                 DST_REL(ABSOLUTE),
955b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
956b7e1c893Smrg                                 CLAMP(1));
957ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
958ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
959b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
960b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
961b7e1c893Smrg                             SRC0_NEG(0),
962ad43ddacSmrg                             SRC1_SEL(SQ_ALU_SRC_0),
963b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
964b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
965b7e1c893Smrg                             SRC1_NEG(0),
966b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
967b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
968b7e1c893Smrg                             LAST(1));
969ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
970ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
971ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
972ad43ddacSmrg                                 SRC2_NEG(0),
973ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
974ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
975ad43ddacSmrg                                 DST_GPR(2),
976b7e1c893Smrg                                 DST_REL(ABSOLUTE),
977b7e1c893Smrg                                 DST_ELEM(ELEM_W),
978b7e1c893Smrg                                 CLAMP(1));
979ad43ddacSmrg
980ad43ddacSmrg    /* 16 */
981ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(18));
982b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
983b7e1c893Smrg                            CF_CONST(0),
984b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
985b7e1c893Smrg                            I_COUNT(3),
986b7e1c893Smrg                            CALL_COUNT(0),
987b7e1c893Smrg                            END_OF_PROGRAM(0),
988b7e1c893Smrg                            VALID_PIXEL_MODE(0),
989b7e1c893Smrg                            CF_INST(SQ_CF_INST_TEX),
990b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
991b7e1c893Smrg                            BARRIER(1));
992ad43ddacSmrg    /* 17 */
993b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
994b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
995b7e1c893Smrg			    CF_CONST(0),
996b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
997b7e1c893Smrg			    I_COUNT(0),
998b7e1c893Smrg			    CALL_COUNT(0),
999b7e1c893Smrg			    END_OF_PROGRAM(0),
1000b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1001b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1002b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1003b7e1c893Smrg			    BARRIER(1));
1004ad43ddacSmrg    /* 18/19 */
1005b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1006b7e1c893Smrg                             BC_FRAC_MODE(0),
1007b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1008b7e1c893Smrg                             RESOURCE_ID(0),
1009b7e1c893Smrg                             SRC_GPR(0),
1010b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1011b7e1c893Smrg                             R7xx_ALT_CONST(0));
1012b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1013b7e1c893Smrg                             DST_REL(ABSOLUTE),
1014b7e1c893Smrg                             DST_SEL_X(SQ_SEL_X),
1015b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1016b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1017b7e1c893Smrg                             DST_SEL_W(SQ_SEL_1),
1018b7e1c893Smrg                             LOD_BIAS(0),
1019b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1020b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1021b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1022b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1023b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1024b7e1c893Smrg                             OFFSET_Y(0),
1025b7e1c893Smrg                             OFFSET_Z(0),
1026b7e1c893Smrg                             SAMPLER_ID(0),
1027b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1028b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1029b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1030b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1031b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1032ad43ddacSmrg    /* 20/21 */
1033b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1034b7e1c893Smrg                             BC_FRAC_MODE(0),
1035b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1036b7e1c893Smrg                             RESOURCE_ID(1),
1037b7e1c893Smrg                             SRC_GPR(0),
1038b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1039b7e1c893Smrg                             R7xx_ALT_CONST(0));
1040b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1041b7e1c893Smrg                             DST_REL(ABSOLUTE),
1042b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1043b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1044b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_X),
1045b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1046b7e1c893Smrg                             LOD_BIAS(0),
1047b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1048b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1049b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1050b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1051b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1052b7e1c893Smrg                             OFFSET_Y(0),
1053b7e1c893Smrg                             OFFSET_Z(0),
1054b7e1c893Smrg                             SAMPLER_ID(1),
1055b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1056b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1057b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1058b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1059b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1060ad43ddacSmrg    /* 22/23 */
1061b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1062b7e1c893Smrg                             BC_FRAC_MODE(0),
1063b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1064b7e1c893Smrg                             RESOURCE_ID(2),
1065b7e1c893Smrg                             SRC_GPR(0),
1066b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1067b7e1c893Smrg                             R7xx_ALT_CONST(0));
1068b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1069b7e1c893Smrg                             DST_REL(ABSOLUTE),
1070b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1071b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_X),
1072b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1073b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1074b7e1c893Smrg                             LOD_BIAS(0),
1075b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1076b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1077b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1078b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1079b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1080b7e1c893Smrg                             OFFSET_Y(0),
1081b7e1c893Smrg                             OFFSET_Z(0),
1082b7e1c893Smrg                             SAMPLER_ID(2),
1083b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1084b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1085b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1086b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1087b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1088ad43ddacSmrg    /* 24 */
1089ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(26));
1090b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1091b7e1c893Smrg                            CF_CONST(0),
1092b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
109368105dcbSveego                            I_COUNT(1),
1094b7e1c893Smrg                            CALL_COUNT(0),
1095b7e1c893Smrg                            END_OF_PROGRAM(0),
1096b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1097b7e1c893Smrg                            CF_INST(SQ_CF_INST_TEX),
1098b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1099b7e1c893Smrg                            BARRIER(1));
1100ad43ddacSmrg    /* 25 */
1101b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1102b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1103b7e1c893Smrg			    CF_CONST(0),
1104b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1105b7e1c893Smrg			    I_COUNT(0),
1106b7e1c893Smrg			    CALL_COUNT(0),
1107b7e1c893Smrg			    END_OF_PROGRAM(0),
1108b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1109b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1110b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1111b7e1c893Smrg			    BARRIER(1));
1112ad43ddacSmrg    /* 26/27 */
1113b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1114b7e1c893Smrg                             BC_FRAC_MODE(0),
1115b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1116b7e1c893Smrg                             RESOURCE_ID(0),
1117b7e1c893Smrg                             SRC_GPR(0),
1118b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1119b7e1c893Smrg                             R7xx_ALT_CONST(0));
1120b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1121b7e1c893Smrg                             DST_REL(ABSOLUTE),
1122b7e1c893Smrg                             DST_SEL_X(SQ_SEL_X),
112368105dcbSveego                             DST_SEL_Y(SQ_SEL_Y),
112468105dcbSveego                             DST_SEL_Z(SQ_SEL_Z),
1125b7e1c893Smrg                             DST_SEL_W(SQ_SEL_1),
1126b7e1c893Smrg                             LOD_BIAS(0),
1127b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1128b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1129b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1130b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1131b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1132b7e1c893Smrg                             OFFSET_Y(0),
1133b7e1c893Smrg                             OFFSET_Z(0),
1134b7e1c893Smrg                             SAMPLER_ID(0),
1135b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1136b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1137b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1138b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1139b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1140b7e1c893Smrg
1141b7e1c893Smrg    return i;
1142b7e1c893Smrg}
1143b7e1c893Smrg
1144b7e1c893Smrg/* comp vs --------------------------------------- */
1145b7e1c893Smrgint R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1146b7e1c893Smrg{
1147b7e1c893Smrg    int i = 0;
1148b7e1c893Smrg
1149b7e1c893Smrg    /* 0 */
1150b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(3));
1151b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1152b7e1c893Smrg                            CF_CONST(0),
1153b7e1c893Smrg                            COND(SQ_CF_COND_BOOL),
1154b7e1c893Smrg                            I_COUNT(0),
1155b7e1c893Smrg                            CALL_COUNT(0),
1156b7e1c893Smrg                            END_OF_PROGRAM(0),
1157b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1158b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
1159b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1160b7e1c893Smrg                            BARRIER(0));
1161b7e1c893Smrg    /* 1 */
11620974d292Smrg    shader[i++] = CF_DWORD0(ADDR(9));
1163b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1164b7e1c893Smrg                            CF_CONST(0),
1165b7e1c893Smrg                            COND(SQ_CF_COND_NOT_BOOL),
1166b7e1c893Smrg                            I_COUNT(0),
1167b7e1c893Smrg                            CALL_COUNT(0),
1168b7e1c893Smrg                            END_OF_PROGRAM(0),
1169b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1170b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
1171b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1172b7e1c893Smrg                            BARRIER(0));
1173b7e1c893Smrg    /* 2 */
11742f39173dSmrg    shader[i++] = CF_DWORD0(ADDR(0));
1175b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1176b7e1c893Smrg                            CF_CONST(0),
1177b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
1178b7e1c893Smrg                            I_COUNT(0),
1179b7e1c893Smrg                            CALL_COUNT(0),
1180b7e1c893Smrg                            END_OF_PROGRAM(1),
1181b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1182b7e1c893Smrg                            CF_INST(SQ_CF_INST_NOP),
1183b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1184b7e1c893Smrg                            BARRIER(1));
1185b7e1c893Smrg    /* 3 - mask sub */
1186921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(44));
1187b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1188b7e1c893Smrg			    CF_CONST(0),
1189b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1190b7e1c893Smrg			    I_COUNT(3),
1191b7e1c893Smrg			    CALL_COUNT(0),
1192b7e1c893Smrg			    END_OF_PROGRAM(0),
1193b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1194b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
1195b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1196b7e1c893Smrg			    BARRIER(1));
1197ad43ddacSmrg
1198ad43ddacSmrg    /* 4 - ALU */
11990974d292Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1200ad43ddacSmrg				KCACHE_BANK0(0),
1201ad43ddacSmrg				KCACHE_BANK1(0),
1202ad43ddacSmrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1203ad43ddacSmrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1204ad43ddacSmrg				KCACHE_ADDR0(0),
1205ad43ddacSmrg				KCACHE_ADDR1(0),
1206921a55d8Smrg				I_COUNT(20),
1207ad43ddacSmrg				USES_WATERFALL(0),
1208ad43ddacSmrg				CF_INST(SQ_CF_INST_ALU),
1209ad43ddacSmrg				WHOLE_QUAD_MODE(0),
1210ad43ddacSmrg				BARRIER(1));
1211ad43ddacSmrg
1212ad43ddacSmrg    /* 5 - dst */
1213b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1214b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
1215b7e1c893Smrg					  RW_GPR(2),
1216b7e1c893Smrg					  RW_REL(ABSOLUTE),
1217b7e1c893Smrg					  INDEX_GPR(0),
1218b7e1c893Smrg					  ELEM_SIZE(0));
1219b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1220b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1221ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1222ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1223b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1224b7e1c893Smrg					       BURST_COUNT(1),
1225b7e1c893Smrg					       END_OF_PROGRAM(0),
1226b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1227b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1228b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1229b7e1c893Smrg					       BARRIER(1));
1230ad43ddacSmrg    /* 6 - src */
1231b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1232b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1233b7e1c893Smrg					  RW_GPR(1),
1234b7e1c893Smrg					  RW_REL(ABSOLUTE),
1235b7e1c893Smrg					  INDEX_GPR(0),
1236b7e1c893Smrg					  ELEM_SIZE(0));
1237b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1238b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1239ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1240ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1241b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1242b7e1c893Smrg					       BURST_COUNT(1),
1243b7e1c893Smrg					       END_OF_PROGRAM(0),
1244b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1245b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT),
1246b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1247b7e1c893Smrg					       BARRIER(0));
1248ad43ddacSmrg    /* 7 - mask */
1249b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1250b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1251b7e1c893Smrg					  RW_GPR(0),
1252b7e1c893Smrg					  RW_REL(ABSOLUTE),
1253b7e1c893Smrg					  INDEX_GPR(0),
1254b7e1c893Smrg					  ELEM_SIZE(0));
1255b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1256b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1257ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1258ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1259b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1260b7e1c893Smrg					       BURST_COUNT(1),
1261b7e1c893Smrg					       END_OF_PROGRAM(0),
1262b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1263b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1264b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1265b7e1c893Smrg					       BARRIER(0));
1266ad43ddacSmrg    /* 8 */
1267b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1268b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1269b7e1c893Smrg			    CF_CONST(0),
1270b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1271b7e1c893Smrg			    I_COUNT(0),
1272b7e1c893Smrg			    CALL_COUNT(0),
1273b7e1c893Smrg			    END_OF_PROGRAM(0),
1274b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1275b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1276b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1277b7e1c893Smrg			    BARRIER(1));
12780974d292Smrg    /* 9 - non-mask sub */
1279921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(50));
12800974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
12810974d292Smrg			    CF_CONST(0),
12820974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
12830974d292Smrg			    I_COUNT(2),
12840974d292Smrg			    CALL_COUNT(0),
12850974d292Smrg			    END_OF_PROGRAM(0),
12860974d292Smrg			    VALID_PIXEL_MODE(0),
12870974d292Smrg			    CF_INST(SQ_CF_INST_VTX),
12880974d292Smrg			    WHOLE_QUAD_MODE(0),
12890974d292Smrg			    BARRIER(1));
1290b7e1c893Smrg
12910974d292Smrg    /* 10 - ALU */
1292921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(34),
12930974d292Smrg				KCACHE_BANK0(0),
12940974d292Smrg				KCACHE_BANK1(0),
12950974d292Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
12960974d292Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
12970974d292Smrg				KCACHE_ADDR0(0),
12980974d292Smrg				KCACHE_ADDR1(0),
1299921a55d8Smrg				I_COUNT(10),
13000974d292Smrg				USES_WATERFALL(0),
13010974d292Smrg				CF_INST(SQ_CF_INST_ALU),
13020974d292Smrg				WHOLE_QUAD_MODE(0),
13030974d292Smrg				BARRIER(1));
1304ad43ddacSmrg
13050974d292Smrg    /* 11 - dst */
13060974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
13070974d292Smrg					  TYPE(SQ_EXPORT_POS),
13080974d292Smrg					  RW_GPR(1),
13090974d292Smrg					  RW_REL(ABSOLUTE),
13100974d292Smrg					  INDEX_GPR(0),
13110974d292Smrg					  ELEM_SIZE(0));
13120974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
13130974d292Smrg					       SRC_SEL_Y(SQ_SEL_Y),
13140974d292Smrg					       SRC_SEL_Z(SQ_SEL_0),
13150974d292Smrg					       SRC_SEL_W(SQ_SEL_1),
13160974d292Smrg					       R6xx_ELEM_LOOP(0),
13170974d292Smrg					       BURST_COUNT(0),
13180974d292Smrg					       END_OF_PROGRAM(0),
13190974d292Smrg					       VALID_PIXEL_MODE(0),
13200974d292Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
13210974d292Smrg					       WHOLE_QUAD_MODE(0),
13220974d292Smrg					       BARRIER(1));
13230974d292Smrg    /* 12 - src */
13240974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
13250974d292Smrg					  TYPE(SQ_EXPORT_PARAM),
13260974d292Smrg					  RW_GPR(0),
13270974d292Smrg					  RW_REL(ABSOLUTE),
13280974d292Smrg					  INDEX_GPR(0),
13290974d292Smrg					  ELEM_SIZE(0));
13300974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
13310974d292Smrg					       SRC_SEL_Y(SQ_SEL_Y),
13320974d292Smrg					       SRC_SEL_Z(SQ_SEL_0),
13330974d292Smrg					       SRC_SEL_W(SQ_SEL_1),
13340974d292Smrg					       R6xx_ELEM_LOOP(0),
13350974d292Smrg					       BURST_COUNT(0),
13360974d292Smrg					       END_OF_PROGRAM(0),
13370974d292Smrg					       VALID_PIXEL_MODE(0),
13380974d292Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
13390974d292Smrg					       WHOLE_QUAD_MODE(0),
13400974d292Smrg					       BARRIER(0));
13410974d292Smrg    /* 13 */
13420974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
13430974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
13440974d292Smrg			    CF_CONST(0),
13450974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
13460974d292Smrg			    I_COUNT(0),
13470974d292Smrg			    CALL_COUNT(0),
13480974d292Smrg			    END_OF_PROGRAM(0),
13490974d292Smrg			    VALID_PIXEL_MODE(0),
13500974d292Smrg			    CF_INST(SQ_CF_INST_RETURN),
13510974d292Smrg			    WHOLE_QUAD_MODE(0),
13520974d292Smrg			    BARRIER(1));
13530974d292Smrg
13540974d292Smrg
1355921a55d8Smrg    /* 14 srcX.x DOT4 - mask */
1356921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1357921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1358921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1359921a55d8Smrg                             SRC0_NEG(0),
1360921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1361921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1362921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1363921a55d8Smrg                             SRC1_NEG(0),
1364921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1365921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1366921a55d8Smrg                             LAST(0));
1367921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1368921a55d8Smrg                                 SRC0_ABS(0),
1369921a55d8Smrg                                 SRC1_ABS(0),
1370921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1371921a55d8Smrg                                 UPDATE_PRED(0),
1372921a55d8Smrg                                 WRITE_MASK(1),
1373921a55d8Smrg                                 FOG_MERGE(0),
1374921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1375921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1376921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1377921a55d8Smrg                                 DST_GPR(3),
1378921a55d8Smrg                                 DST_REL(ABSOLUTE),
1379921a55d8Smrg                                 DST_ELEM(ELEM_X),
1380921a55d8Smrg                                 CLAMP(0));
1381921a55d8Smrg
1382921a55d8Smrg    /* 15 srcX.y DOT4 - mask */
1383921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
13840974d292Smrg                             SRC0_REL(ABSOLUTE),
13850974d292Smrg                             SRC0_ELEM(ELEM_Y),
13860974d292Smrg                             SRC0_NEG(0),
1387921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
13880974d292Smrg                             SRC1_REL(ABSOLUTE),
13890974d292Smrg                             SRC1_ELEM(ELEM_Y),
13900974d292Smrg                             SRC1_NEG(0),
13910974d292Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
13920974d292Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1393921a55d8Smrg                             LAST(0));
1394921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1395921a55d8Smrg                                 SRC0_ABS(0),
1396921a55d8Smrg                                 SRC1_ABS(0),
1397921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1398921a55d8Smrg                                 UPDATE_PRED(0),
1399921a55d8Smrg                                 WRITE_MASK(0),
1400921a55d8Smrg                                 FOG_MERGE(0),
1401921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1402921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
14030974d292Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1404921a55d8Smrg                                 DST_GPR(3),
1405921a55d8Smrg                                 DST_REL(ABSOLUTE),
1406921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1407921a55d8Smrg                                 CLAMP(0));
1408921a55d8Smrg
1409921a55d8Smrg    /* 16 srcX.z DOT4 - mask */
1410921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1411921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1412921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1413921a55d8Smrg                             SRC0_NEG(0),
1414921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1415921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1416921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1417921a55d8Smrg                             SRC1_NEG(0),
1418921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1419921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1420921a55d8Smrg                             LAST(0));
1421921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1422921a55d8Smrg                                 SRC0_ABS(0),
1423921a55d8Smrg                                 SRC1_ABS(0),
1424921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1425921a55d8Smrg                                 UPDATE_PRED(0),
1426921a55d8Smrg                                 WRITE_MASK(0),
1427921a55d8Smrg                                 FOG_MERGE(0),
1428921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1429921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1430921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1431921a55d8Smrg                                 DST_GPR(3),
14320974d292Smrg                                 DST_REL(ABSOLUTE),
14330974d292Smrg                                 DST_ELEM(ELEM_Z),
14340974d292Smrg                                 CLAMP(0));
1435921a55d8Smrg
1436921a55d8Smrg    /* 17 srcX.w DOT4 - mask */
1437921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1438ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1439921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1440ad43ddacSmrg                             SRC0_NEG(0),
1441921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1442ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1443921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1444ad43ddacSmrg                             SRC1_NEG(0),
1445ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1446ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1447ad43ddacSmrg                             LAST(1));
1448921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1449921a55d8Smrg                                 SRC0_ABS(0),
1450921a55d8Smrg                                 SRC1_ABS(0),
1451921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1452921a55d8Smrg                                 UPDATE_PRED(0),
1453921a55d8Smrg                                 WRITE_MASK(0),
1454921a55d8Smrg                                 FOG_MERGE(0),
1455921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1456921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1457ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1458921a55d8Smrg                                 DST_GPR(3),
1459ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1460ad43ddacSmrg                                 DST_ELEM(ELEM_W),
1461ad43ddacSmrg                                 CLAMP(0));
1462ad43ddacSmrg
1463921a55d8Smrg    /* 18 srcY.x DOT4 - mask */
1464921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1465ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1466ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1467ad43ddacSmrg                             SRC0_NEG(0),
1468921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1469ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1470ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1471ad43ddacSmrg                             SRC1_NEG(0),
1472ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1473ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1474ad43ddacSmrg                             LAST(0));
1475921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1476921a55d8Smrg                                 SRC0_ABS(0),
1477921a55d8Smrg                                 SRC1_ABS(0),
1478921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1479921a55d8Smrg                                 UPDATE_PRED(0),
1480921a55d8Smrg                                 WRITE_MASK(0),
1481921a55d8Smrg                                 FOG_MERGE(0),
1482921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1483921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1484ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1485921a55d8Smrg                                 DST_GPR(3),
1486ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1487ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1488ad43ddacSmrg                                 CLAMP(0));
1489921a55d8Smrg
1490921a55d8Smrg    /* 19 srcY.y DOT4 - mask */
1491921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1492921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1493921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1494921a55d8Smrg                             SRC0_NEG(0),
1495921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1496921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1497921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1498921a55d8Smrg                             SRC1_NEG(0),
1499921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1500921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1501921a55d8Smrg                             LAST(0));
1502921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1503921a55d8Smrg                                 SRC0_ABS(0),
1504921a55d8Smrg                                 SRC1_ABS(0),
1505921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1506921a55d8Smrg                                 UPDATE_PRED(0),
1507921a55d8Smrg                                 WRITE_MASK(1),
1508921a55d8Smrg                                 FOG_MERGE(0),
1509921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1510921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1511921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1512921a55d8Smrg                                 DST_GPR(3),
1513921a55d8Smrg                                 DST_REL(ABSOLUTE),
1514921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1515921a55d8Smrg                                 CLAMP(0));
1516921a55d8Smrg
1517921a55d8Smrg    /* 20 srcY.z DOT4 - mask */
1518921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1519921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1520921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1521921a55d8Smrg                             SRC0_NEG(0),
1522921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1523921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1524921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1525921a55d8Smrg                             SRC1_NEG(0),
1526921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1527921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1528921a55d8Smrg                             LAST(0));
1529921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1530921a55d8Smrg                                 SRC0_ABS(0),
1531921a55d8Smrg                                 SRC1_ABS(0),
1532921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1533921a55d8Smrg                                 UPDATE_PRED(0),
1534921a55d8Smrg                                 WRITE_MASK(0),
1535921a55d8Smrg                                 FOG_MERGE(0),
1536921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1537921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1538921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1539921a55d8Smrg                                 DST_GPR(3),
1540921a55d8Smrg                                 DST_REL(ABSOLUTE),
1541921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1542921a55d8Smrg                                 CLAMP(0));
1543921a55d8Smrg
1544921a55d8Smrg    /* 21 srcY.w DOT4 - mask */
1545921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1546921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1547921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1548921a55d8Smrg                             SRC0_NEG(0),
1549921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1550921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1551921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1552921a55d8Smrg                             SRC1_NEG(0),
1553921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1554921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1555921a55d8Smrg                             LAST(1));
1556921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1557921a55d8Smrg                                 SRC0_ABS(0),
1558921a55d8Smrg                                 SRC1_ABS(0),
1559921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1560921a55d8Smrg                                 UPDATE_PRED(0),
1561921a55d8Smrg                                 WRITE_MASK(0),
1562921a55d8Smrg                                 FOG_MERGE(0),
1563921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1564921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1565921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1566921a55d8Smrg                                 DST_GPR(3),
1567921a55d8Smrg                                 DST_REL(ABSOLUTE),
1568921a55d8Smrg                                 DST_ELEM(ELEM_W),
1569921a55d8Smrg                                 CLAMP(0));
1570921a55d8Smrg
1571921a55d8Smrg    /* 22 maskX.x DOT4 - mask */
1572921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1573ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1574ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1575ad43ddacSmrg                             SRC0_NEG(0),
1576921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1577ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1578ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1579ad43ddacSmrg                             SRC1_NEG(0),
1580ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1581ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1582921a55d8Smrg                             LAST(0));
1583921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1584921a55d8Smrg                                 SRC0_ABS(0),
1585921a55d8Smrg                                 SRC1_ABS(0),
1586921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1587921a55d8Smrg                                 UPDATE_PRED(0),
1588921a55d8Smrg                                 WRITE_MASK(1),
1589921a55d8Smrg                                 FOG_MERGE(0),
1590921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1591921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1592ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1593921a55d8Smrg                                 DST_GPR(4),
1594ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1595921a55d8Smrg                                 DST_ELEM(ELEM_X),
1596ad43ddacSmrg                                 CLAMP(0));
1597ad43ddacSmrg
1598921a55d8Smrg    /* 23 maskX.y DOT4 - mask */
1599921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1600ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1601ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1602ad43ddacSmrg                             SRC0_NEG(0),
1603921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1604ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1605ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1606ad43ddacSmrg                             SRC1_NEG(0),
1607ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1608ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1609921a55d8Smrg                             LAST(0));
1610921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1611921a55d8Smrg                                 SRC0_ABS(0),
1612921a55d8Smrg                                 SRC1_ABS(0),
1613921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1614921a55d8Smrg                                 UPDATE_PRED(0),
1615921a55d8Smrg                                 WRITE_MASK(0),
1616921a55d8Smrg                                 FOG_MERGE(0),
1617921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1618921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1619ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1620921a55d8Smrg                                 DST_GPR(4),
1621921a55d8Smrg                                 DST_REL(ABSOLUTE),
1622921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1623921a55d8Smrg                                 CLAMP(0));
1624921a55d8Smrg
1625921a55d8Smrg    /* 24 maskX.z DOT4 - mask */
1626921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1627921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1628921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1629921a55d8Smrg                             SRC0_NEG(0),
1630921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1631921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1632921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1633921a55d8Smrg                             SRC1_NEG(0),
1634921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1635921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1636921a55d8Smrg                             LAST(0));
1637921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1638921a55d8Smrg                                 SRC0_ABS(0),
1639921a55d8Smrg                                 SRC1_ABS(0),
1640921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1641921a55d8Smrg                                 UPDATE_PRED(0),
1642921a55d8Smrg                                 WRITE_MASK(0),
1643921a55d8Smrg                                 FOG_MERGE(0),
1644921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1645921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1646921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1647921a55d8Smrg                                 DST_GPR(4),
1648ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1649ad43ddacSmrg                                 DST_ELEM(ELEM_Z),
1650ad43ddacSmrg                                 CLAMP(0));
1651ad43ddacSmrg
1652921a55d8Smrg    /* 25 maskX.w DOT4 - mask */
1653921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1654ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1655921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1656ad43ddacSmrg                             SRC0_NEG(0),
1657921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1658ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1659921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1660ad43ddacSmrg                             SRC1_NEG(0),
1661ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1662ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1663ad43ddacSmrg                             LAST(1));
1664921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1665921a55d8Smrg                                 SRC0_ABS(0),
1666921a55d8Smrg                                 SRC1_ABS(0),
1667921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1668921a55d8Smrg                                 UPDATE_PRED(0),
1669921a55d8Smrg                                 WRITE_MASK(0),
1670921a55d8Smrg                                 FOG_MERGE(0),
1671921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1672921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1673ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1674921a55d8Smrg                                 DST_GPR(4),
1675ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1676ad43ddacSmrg                                 DST_ELEM(ELEM_W),
1677ad43ddacSmrg                                 CLAMP(0));
1678ad43ddacSmrg
1679921a55d8Smrg    /* 26 maskY.x DOT4 - mask */
1680921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1681ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1682ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1683ad43ddacSmrg                             SRC0_NEG(0),
1684921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1685ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1686ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1687ad43ddacSmrg                             SRC1_NEG(0),
1688ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1689ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1690ad43ddacSmrg                             LAST(0));
1691921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1692921a55d8Smrg                                 SRC0_ABS(0),
1693921a55d8Smrg                                 SRC1_ABS(0),
1694921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1695921a55d8Smrg                                 UPDATE_PRED(0),
1696921a55d8Smrg                                 WRITE_MASK(0),
1697921a55d8Smrg                                 FOG_MERGE(0),
1698921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1699921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1700ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1701921a55d8Smrg                                 DST_GPR(4),
1702ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1703ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1704ad43ddacSmrg                                 CLAMP(0));
1705921a55d8Smrg
1706921a55d8Smrg    /* 27 maskY.y DOT4 - mask */
1707921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1708ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1709921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1710ad43ddacSmrg                             SRC0_NEG(0),
1711921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1712ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1713921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1714ad43ddacSmrg                             SRC1_NEG(0),
1715ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1716ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1717921a55d8Smrg                             LAST(0));
1718921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1719921a55d8Smrg                                 SRC0_ABS(0),
1720921a55d8Smrg                                 SRC1_ABS(0),
1721921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1722921a55d8Smrg                                 UPDATE_PRED(0),
1723921a55d8Smrg                                 WRITE_MASK(1),
1724921a55d8Smrg                                 FOG_MERGE(0),
1725921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1726921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1727ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1728921a55d8Smrg                                 DST_GPR(4),
1729ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1730ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1731ad43ddacSmrg                                 CLAMP(0));
1732ad43ddacSmrg
1733921a55d8Smrg    /* 28 maskY.z DOT4 - mask */
1734921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1735921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1736921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1737921a55d8Smrg                             SRC0_NEG(0),
1738921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1739921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1740921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1741921a55d8Smrg                             SRC1_NEG(0),
1742921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1743921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1744921a55d8Smrg                             LAST(0));
1745921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1746921a55d8Smrg                                 SRC0_ABS(0),
1747921a55d8Smrg                                 SRC1_ABS(0),
1748921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1749921a55d8Smrg                                 UPDATE_PRED(0),
1750921a55d8Smrg                                 WRITE_MASK(0),
1751921a55d8Smrg                                 FOG_MERGE(0),
1752921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1753921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1754921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1755921a55d8Smrg                                 DST_GPR(4),
1756921a55d8Smrg                                 DST_REL(ABSOLUTE),
1757921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1758921a55d8Smrg                                 CLAMP(0));
1759921a55d8Smrg
1760921a55d8Smrg    /* 29 maskY.w DOT4 - mask */
1761921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1762921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1763921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1764921a55d8Smrg                             SRC0_NEG(0),
1765921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1766921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1767921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1768921a55d8Smrg                             SRC1_NEG(0),
1769921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1770921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1771921a55d8Smrg                             LAST(1));
1772921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1773921a55d8Smrg                                 SRC0_ABS(0),
1774921a55d8Smrg                                 SRC1_ABS(0),
1775921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1776921a55d8Smrg                                 UPDATE_PRED(0),
1777921a55d8Smrg                                 WRITE_MASK(0),
1778921a55d8Smrg                                 FOG_MERGE(0),
1779921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1780921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1781921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1782921a55d8Smrg                                 DST_GPR(4),
1783921a55d8Smrg                                 DST_REL(ABSOLUTE),
1784921a55d8Smrg                                 DST_ELEM(ELEM_W),
1785921a55d8Smrg                                 CLAMP(0));
1786921a55d8Smrg
1787921a55d8Smrg    /* 30 srcX / w */
1788921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1789ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1790ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1791ad43ddacSmrg                             SRC0_NEG(0),
1792921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1793ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1794ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1795ad43ddacSmrg                             SRC1_NEG(0),
1796ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1797ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1798ad43ddacSmrg                             LAST(1));
1799ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1800ad43ddacSmrg                                 SRC0_ABS(0),
1801ad43ddacSmrg                                 SRC1_ABS(0),
1802ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1803ad43ddacSmrg                                 UPDATE_PRED(0),
1804ad43ddacSmrg                                 WRITE_MASK(1),
1805ad43ddacSmrg                                 FOG_MERGE(0),
1806ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1807ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1808ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1809ad43ddacSmrg                                 DST_GPR(1),
1810ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1811ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1812ad43ddacSmrg                                 CLAMP(0));
1813ad43ddacSmrg
1814921a55d8Smrg    /* 31 srcY / h */
1815921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1816ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1817ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1818ad43ddacSmrg                             SRC0_NEG(0),
1819921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1820ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1821ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1822ad43ddacSmrg                             SRC1_NEG(0),
1823ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1824ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1825ad43ddacSmrg                             LAST(1));
1826ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1827ad43ddacSmrg                                 SRC0_ABS(0),
1828ad43ddacSmrg                                 SRC1_ABS(0),
1829ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1830ad43ddacSmrg                                 UPDATE_PRED(0),
1831ad43ddacSmrg                                 WRITE_MASK(1),
1832ad43ddacSmrg                                 FOG_MERGE(0),
1833ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1834ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1835ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1836ad43ddacSmrg                                 DST_GPR(1),
1837ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1838ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1839ad43ddacSmrg                                 CLAMP(0));
1840ad43ddacSmrg
1841921a55d8Smrg    /* 32 maskX / w */
1842921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1843ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1844ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1845ad43ddacSmrg                             SRC0_NEG(0),
1846921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1847ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1848ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1849ad43ddacSmrg                             SRC1_NEG(0),
1850ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1851ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1852ad43ddacSmrg                             LAST(1));
1853ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1854ad43ddacSmrg                                 SRC0_ABS(0),
1855ad43ddacSmrg                                 SRC1_ABS(0),
1856ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1857ad43ddacSmrg                                 UPDATE_PRED(0),
1858ad43ddacSmrg                                 WRITE_MASK(1),
1859ad43ddacSmrg                                 FOG_MERGE(0),
1860ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1861ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1862ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1863ad43ddacSmrg                                 DST_GPR(0),
1864ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1865ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1866ad43ddacSmrg                                 CLAMP(0));
1867ad43ddacSmrg
1868921a55d8Smrg    /* 33 maskY / h */
1869921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1870ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1871ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1872ad43ddacSmrg                             SRC0_NEG(0),
1873921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1874ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1875ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1876ad43ddacSmrg                             SRC1_NEG(0),
1877ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1878ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1879ad43ddacSmrg                             LAST(1));
1880ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1881ad43ddacSmrg                                 SRC0_ABS(0),
1882ad43ddacSmrg                                 SRC1_ABS(0),
1883ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1884ad43ddacSmrg                                 UPDATE_PRED(0),
1885ad43ddacSmrg                                 WRITE_MASK(1),
1886ad43ddacSmrg                                 FOG_MERGE(0),
1887ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1888ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1889ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1890ad43ddacSmrg                                 DST_GPR(0),
1891ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1892ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1893ad43ddacSmrg                                 CLAMP(0));
1894ad43ddacSmrg
1895921a55d8Smrg    /* 34 srcX.x DOT4 - non-mask */
1896921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1897921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1898921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1899921a55d8Smrg                             SRC0_NEG(0),
1900921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1901921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1902921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1903921a55d8Smrg                             SRC1_NEG(0),
1904921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1905921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1906921a55d8Smrg                             LAST(0));
1907921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1908921a55d8Smrg                                 SRC0_ABS(0),
1909921a55d8Smrg                                 SRC1_ABS(0),
1910921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1911921a55d8Smrg                                 UPDATE_PRED(0),
1912921a55d8Smrg                                 WRITE_MASK(1),
1913921a55d8Smrg                                 FOG_MERGE(0),
1914921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1915921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1916921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1917921a55d8Smrg                                 DST_GPR(2),
1918921a55d8Smrg                                 DST_REL(ABSOLUTE),
1919921a55d8Smrg                                 DST_ELEM(ELEM_X),
1920921a55d8Smrg                                 CLAMP(0));
1921921a55d8Smrg
1922921a55d8Smrg    /* 35 srcX.y DOT4 - non-mask */
1923921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1924ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1925ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1926ad43ddacSmrg                             SRC0_NEG(0),
1927921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1928ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1929ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1930ad43ddacSmrg                             SRC1_NEG(0),
1931ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1932ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1933921a55d8Smrg                             LAST(0));
1934921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1935921a55d8Smrg                                 SRC0_ABS(0),
1936921a55d8Smrg                                 SRC1_ABS(0),
1937921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1938921a55d8Smrg                                 UPDATE_PRED(0),
1939921a55d8Smrg                                 WRITE_MASK(0),
1940921a55d8Smrg                                 FOG_MERGE(0),
1941921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1942921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1943ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1944921a55d8Smrg                                 DST_GPR(2),
1945921a55d8Smrg                                 DST_REL(ABSOLUTE),
1946921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1947921a55d8Smrg                                 CLAMP(0));
1948921a55d8Smrg
1949921a55d8Smrg    /* 36 srcX.z DOT4 - non-mask */
1950921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1951921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1952921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1953921a55d8Smrg                             SRC0_NEG(0),
1954921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1955921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1956921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1957921a55d8Smrg                             SRC1_NEG(0),
1958921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1959921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1960921a55d8Smrg                             LAST(0));
1961921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1962921a55d8Smrg                                 SRC0_ABS(0),
1963921a55d8Smrg                                 SRC1_ABS(0),
1964921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1965921a55d8Smrg                                 UPDATE_PRED(0),
1966921a55d8Smrg                                 WRITE_MASK(0),
1967921a55d8Smrg                                 FOG_MERGE(0),
1968921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1969921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1970921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1971921a55d8Smrg                                 DST_GPR(2),
1972ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1973ad43ddacSmrg                                 DST_ELEM(ELEM_Z),
1974ad43ddacSmrg                                 CLAMP(0));
1975921a55d8Smrg
1976921a55d8Smrg    /* 37 srcX.w DOT4 - non-mask */
1977921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1978ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1979921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1980ad43ddacSmrg                             SRC0_NEG(0),
1981921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1982ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1983921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1984ad43ddacSmrg                             SRC1_NEG(0),
1985ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1986ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1987ad43ddacSmrg                             LAST(1));
1988921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1989921a55d8Smrg                                 SRC0_ABS(0),
1990921a55d8Smrg                                 SRC1_ABS(0),
1991921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1992921a55d8Smrg                                 UPDATE_PRED(0),
1993921a55d8Smrg                                 WRITE_MASK(0),
1994921a55d8Smrg                                 FOG_MERGE(0),
1995921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1996921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1997ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1998921a55d8Smrg                                 DST_GPR(2),
1999ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2000ad43ddacSmrg                                 DST_ELEM(ELEM_W),
2001ad43ddacSmrg                                 CLAMP(0));
2002ad43ddacSmrg
2003921a55d8Smrg    /* 38 srcY.x DOT4 - non-mask */
2004921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2005ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2006ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
2007ad43ddacSmrg                             SRC0_NEG(0),
2008921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2009ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2010ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
2011ad43ddacSmrg                             SRC1_NEG(0),
2012ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
2013ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2014ad43ddacSmrg                             LAST(0));
2015921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2016921a55d8Smrg                                 SRC0_ABS(0),
2017921a55d8Smrg                                 SRC1_ABS(0),
2018921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2019921a55d8Smrg                                 UPDATE_PRED(0),
2020921a55d8Smrg                                 WRITE_MASK(0),
2021921a55d8Smrg                                 FOG_MERGE(0),
2022921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2023921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2024ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2025921a55d8Smrg                                 DST_GPR(2),
2026ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2027ad43ddacSmrg                                 DST_ELEM(ELEM_X),
2028ad43ddacSmrg                                 CLAMP(0));
2029921a55d8Smrg
2030921a55d8Smrg    /* 39 srcY.y DOT4 - non-mask */
2031921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2032ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2033921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
2034ad43ddacSmrg                             SRC0_NEG(0),
2035921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2036ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2037921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
2038ad43ddacSmrg                             SRC1_NEG(0),
2039ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
2040ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2041921a55d8Smrg                             LAST(0));
2042921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2043921a55d8Smrg                                 SRC0_ABS(0),
2044921a55d8Smrg                                 SRC1_ABS(0),
2045921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2046921a55d8Smrg                                 UPDATE_PRED(0),
2047921a55d8Smrg                                 WRITE_MASK(1),
2048921a55d8Smrg                                 FOG_MERGE(0),
2049921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2050921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2051ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2052921a55d8Smrg                                 DST_GPR(2),
2053ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2054ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
2055ad43ddacSmrg                                 CLAMP(0));
2056921a55d8Smrg
2057921a55d8Smrg    /* 40 srcY.z DOT4 - non-mask */
2058921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2059921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2060921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
2061921a55d8Smrg                             SRC0_NEG(0),
2062921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2063921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2064921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
2065921a55d8Smrg                             SRC1_NEG(0),
2066921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2067921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2068921a55d8Smrg                             LAST(0));
2069921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2070921a55d8Smrg                                 SRC0_ABS(0),
2071921a55d8Smrg                                 SRC1_ABS(0),
2072921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2073921a55d8Smrg                                 UPDATE_PRED(0),
2074921a55d8Smrg                                 WRITE_MASK(0),
2075921a55d8Smrg                                 FOG_MERGE(0),
2076921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2077921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2078921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2079921a55d8Smrg                                 DST_GPR(2),
2080921a55d8Smrg                                 DST_REL(ABSOLUTE),
2081921a55d8Smrg                                 DST_ELEM(ELEM_Z),
2082921a55d8Smrg                                 CLAMP(0));
2083921a55d8Smrg
2084921a55d8Smrg    /* 41 srcY.w DOT4 - non-mask */
2085921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2086921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2087921a55d8Smrg                             SRC0_ELEM(ELEM_W),
2088921a55d8Smrg                             SRC0_NEG(0),
2089921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2090921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2091921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2092921a55d8Smrg                             SRC1_NEG(0),
2093921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2094921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2095921a55d8Smrg                             LAST(1));
2096921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2097921a55d8Smrg                                 SRC0_ABS(0),
2098921a55d8Smrg                                 SRC1_ABS(0),
2099921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2100921a55d8Smrg                                 UPDATE_PRED(0),
2101921a55d8Smrg                                 WRITE_MASK(0),
2102921a55d8Smrg                                 FOG_MERGE(0),
2103921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2104921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2105921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2106921a55d8Smrg                                 DST_GPR(2),
2107921a55d8Smrg                                 DST_REL(ABSOLUTE),
2108921a55d8Smrg                                 DST_ELEM(ELEM_W),
2109921a55d8Smrg                                 CLAMP(0));
2110921a55d8Smrg
2111921a55d8Smrg    /* 42 srcX / w */
2112921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2113ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2114ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
2115ad43ddacSmrg                             SRC0_NEG(0),
2116921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
2117ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2118ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
2119ad43ddacSmrg                             SRC1_NEG(0),
2120ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
2121ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2122ad43ddacSmrg                             LAST(1));
2123ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2124ad43ddacSmrg                                 SRC0_ABS(0),
2125ad43ddacSmrg                                 SRC1_ABS(0),
2126ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
2127ad43ddacSmrg                                 UPDATE_PRED(0),
2128ad43ddacSmrg                                 WRITE_MASK(1),
2129ad43ddacSmrg                                 FOG_MERGE(0),
2130ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
2131ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
2132ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2133ad43ddacSmrg                                 DST_GPR(0),
2134ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2135ad43ddacSmrg                                 DST_ELEM(ELEM_X),
2136ad43ddacSmrg                                 CLAMP(0));
2137ad43ddacSmrg
2138921a55d8Smrg    /* 43 srcY / h */
2139921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2140ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2141ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
2142ad43ddacSmrg                             SRC0_NEG(0),
2143921a55d8Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2144ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2145ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
2146ad43ddacSmrg                             SRC1_NEG(0),
2147ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
2148ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2149ad43ddacSmrg                             LAST(1));
2150ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2151ad43ddacSmrg                                 SRC0_ABS(0),
2152ad43ddacSmrg                                 SRC1_ABS(0),
2153ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
2154ad43ddacSmrg                                 UPDATE_PRED(0),
2155ad43ddacSmrg                                 WRITE_MASK(1),
2156ad43ddacSmrg                                 FOG_MERGE(0),
2157ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
2158ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
2159ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2160ad43ddacSmrg                                 DST_GPR(0),
2161ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2162ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
2163ad43ddacSmrg                                 CLAMP(0));
2164ad43ddacSmrg
2165921a55d8Smrg    /* 44/45 - dst - mask */
2166b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2167b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2168b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
2169b7e1c893Smrg			     BUFFER_ID(0),
2170b7e1c893Smrg			     SRC_GPR(0),
2171b7e1c893Smrg			     SRC_REL(ABSOLUTE),
2172b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
21730974d292Smrg			     MEGA_FETCH_COUNT(24));
21740974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2175b7e1c893Smrg				 DST_REL(0),
2176b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
2177b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
2178b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
2179b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
2180b7e1c893Smrg				 USE_CONST_FIELDS(0),
2181ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2182ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2183ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2184b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2185b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2186b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2187b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2188b13dfe66Smrg#else
2189b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2190b13dfe66Smrg#endif
2191b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
2192b7e1c893Smrg			     MEGA_FETCH(1));
2193b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
2194921a55d8Smrg    /* 46/47 - src */
2195b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2196b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2197b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
2198b7e1c893Smrg			     BUFFER_ID(0),
2199b7e1c893Smrg			     SRC_GPR(0),
2200b7e1c893Smrg			     SRC_REL(ABSOLUTE),
2201b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
2202b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
22030974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2204b7e1c893Smrg				 DST_REL(0),
2205b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
2206b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
2207ad43ddacSmrg				 DST_SEL_Z(SQ_SEL_1),
2208ad43ddacSmrg				 DST_SEL_W(SQ_SEL_0),
2209b7e1c893Smrg				 USE_CONST_FIELDS(0),
2210ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2211ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2212ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2213b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2214b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2215b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2216b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2217b13dfe66Smrg#else
2218b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2219b13dfe66Smrg#endif
2220b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
2221b7e1c893Smrg			     MEGA_FETCH(0));
2222b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
2223921a55d8Smrg    /* 48/49 - mask */
22240974d292Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
22250974d292Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
22260974d292Smrg			     FETCH_WHOLE_QUAD(0),
22270974d292Smrg			     BUFFER_ID(0),
22280974d292Smrg			     SRC_GPR(0),
22290974d292Smrg			     SRC_REL(ABSOLUTE),
22300974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
22310974d292Smrg			     MEGA_FETCH_COUNT(8));
22320974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
22330974d292Smrg				 DST_REL(0),
22340974d292Smrg				 DST_SEL_X(SQ_SEL_X),
22350974d292Smrg				 DST_SEL_Y(SQ_SEL_Y),
22360974d292Smrg				 DST_SEL_Z(SQ_SEL_1),
22370974d292Smrg				 DST_SEL_W(SQ_SEL_0),
22380974d292Smrg				 USE_CONST_FIELDS(0),
22390974d292Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
22400974d292Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
22410974d292Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
22420974d292Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
22430974d292Smrg    shader[i++] = VTX_DWORD2(OFFSET(16),
2244b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2245b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2246b13dfe66Smrg#else
2247b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2248b13dfe66Smrg#endif
22490974d292Smrg			     CONST_BUF_NO_STRIDE(0),
22500974d292Smrg			     MEGA_FETCH(0));
22510974d292Smrg    shader[i++] = VTX_DWORD_PAD;
2252b7e1c893Smrg
2253921a55d8Smrg    /* 50/51 - dst - non-mask */
22540974d292Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
22550974d292Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
22560974d292Smrg			     FETCH_WHOLE_QUAD(0),
22570974d292Smrg			     BUFFER_ID(0),
22580974d292Smrg			     SRC_GPR(0),
22590974d292Smrg			     SRC_REL(ABSOLUTE),
22600974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
22610974d292Smrg			     MEGA_FETCH_COUNT(16));
22620974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
22630974d292Smrg				 DST_REL(0),
22640974d292Smrg				 DST_SEL_X(SQ_SEL_X),
22650974d292Smrg				 DST_SEL_Y(SQ_SEL_Y),
22660974d292Smrg				 DST_SEL_Z(SQ_SEL_0),
22670974d292Smrg				 DST_SEL_W(SQ_SEL_1),
22680974d292Smrg				 USE_CONST_FIELDS(0),
22690974d292Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
22700974d292Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
22710974d292Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
22720974d292Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
22730974d292Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2274b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2275b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2276b13dfe66Smrg#else
2277b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2278b13dfe66Smrg#endif
22790974d292Smrg			     CONST_BUF_NO_STRIDE(0),
22800974d292Smrg			     MEGA_FETCH(1));
22810974d292Smrg    shader[i++] = VTX_DWORD_PAD;
2282921a55d8Smrg    /* 52/53 - src */
22830974d292Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
22840974d292Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
22850974d292Smrg			     FETCH_WHOLE_QUAD(0),
22860974d292Smrg			     BUFFER_ID(0),
22870974d292Smrg			     SRC_GPR(0),
22880974d292Smrg			     SRC_REL(ABSOLUTE),
22890974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
22900974d292Smrg			     MEGA_FETCH_COUNT(8));
22910974d292Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
22920974d292Smrg				 DST_REL(0),
22930974d292Smrg				 DST_SEL_X(SQ_SEL_X),
22940974d292Smrg				 DST_SEL_Y(SQ_SEL_Y),
22950974d292Smrg				 DST_SEL_Z(SQ_SEL_1),
22960974d292Smrg				 DST_SEL_W(SQ_SEL_0),
22970974d292Smrg				 USE_CONST_FIELDS(0),
22980974d292Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
22990974d292Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
23000974d292Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
23010974d292Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
23020974d292Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2303b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2304b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2305b13dfe66Smrg#else
2306b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2307b13dfe66Smrg#endif
23080974d292Smrg			     CONST_BUF_NO_STRIDE(0),
23090974d292Smrg			     MEGA_FETCH(0));
23100974d292Smrg    shader[i++] = VTX_DWORD_PAD;
23110974d292Smrg
23120974d292Smrg    return i;
23130974d292Smrg}
23140974d292Smrg
23150974d292Smrg/* comp ps --------------------------------------- */
23160974d292Smrgint R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
23170974d292Smrg{
23180974d292Smrg    int i = 0;
23190974d292Smrg
23200974d292Smrg    /* 0 */
23214c00f4dcSmrg    shader[i++] = CF_DWORD0(ADDR(3));
23220974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23234c00f4dcSmrg                            CF_CONST(0),
23240974d292Smrg                            COND(SQ_CF_COND_BOOL),
23250974d292Smrg                            I_COUNT(0),
23260974d292Smrg                            CALL_COUNT(0),
23270974d292Smrg                            END_OF_PROGRAM(0),
23280974d292Smrg                            VALID_PIXEL_MODE(0),
23290974d292Smrg                            CF_INST(SQ_CF_INST_CALL),
23300974d292Smrg                            WHOLE_QUAD_MODE(0),
23310974d292Smrg                            BARRIER(0));
23320974d292Smrg    /* 1 */
23334c00f4dcSmrg    shader[i++] = CF_DWORD0(ADDR(7));
23340974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23354c00f4dcSmrg                            CF_CONST(0),
23360974d292Smrg                            COND(SQ_CF_COND_NOT_BOOL),
23370974d292Smrg                            I_COUNT(0),
23380974d292Smrg                            CALL_COUNT(0),
23390974d292Smrg                            END_OF_PROGRAM(0),
23400974d292Smrg                            VALID_PIXEL_MODE(0),
23410974d292Smrg                            CF_INST(SQ_CF_INST_CALL),
23420974d292Smrg                            WHOLE_QUAD_MODE(0),
23430974d292Smrg                            BARRIER(0));
23440974d292Smrg    /* 2 */
23454c00f4dcSmrg    shader[i++] = CF_DWORD0(ADDR(0));
23460974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23470974d292Smrg                            CF_CONST(0),
23484c00f4dcSmrg                            COND(SQ_CF_COND_ACTIVE),
23490974d292Smrg                            I_COUNT(0),
23500974d292Smrg                            CALL_COUNT(0),
23514c00f4dcSmrg                            END_OF_PROGRAM(1),
23520974d292Smrg                            VALID_PIXEL_MODE(0),
23534c00f4dcSmrg                            CF_INST(SQ_CF_INST_NOP),
23540974d292Smrg                            WHOLE_QUAD_MODE(0),
23554c00f4dcSmrg                            BARRIER(1));
23560974d292Smrg
23574c00f4dcSmrg    /* 3 - mask sub */
23584c00f4dcSmrg    shader[i++] = CF_DWORD0(ADDR(14));
23590974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
23600974d292Smrg			    CF_CONST(0),
23614c00f4dcSmrg			    COND(SQ_CF_COND_ACTIVE),
23624c00f4dcSmrg			    I_COUNT(2),
23630974d292Smrg			    CALL_COUNT(0),
23640974d292Smrg			    END_OF_PROGRAM(0),
23650974d292Smrg			    VALID_PIXEL_MODE(0),
23664c00f4dcSmrg			    CF_INST(SQ_CF_INST_TEX),
23670974d292Smrg			    WHOLE_QUAD_MODE(0),
23684c00f4dcSmrg			    BARRIER(1));
23690974d292Smrg
23700974d292Smrg    /* 4 */
23714c00f4dcSmrg    shader[i++] = CF_ALU_DWORD0(ADDR(10),
23720974d292Smrg				KCACHE_BANK0(0),
23730974d292Smrg				KCACHE_BANK1(0),
23740974d292Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
23750974d292Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
23760974d292Smrg				KCACHE_ADDR0(0),
23770974d292Smrg				KCACHE_ADDR1(0),
23780974d292Smrg				I_COUNT(4),
23790974d292Smrg				USES_WATERFALL(0),
23800974d292Smrg				CF_INST(SQ_CF_INST_ALU),
23810974d292Smrg				WHOLE_QUAD_MODE(0),
23820974d292Smrg				BARRIER(1));
23830974d292Smrg
23840974d292Smrg    /* 5 */
23850974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
23860974d292Smrg					  TYPE(SQ_EXPORT_PIXEL),
23874c00f4dcSmrg					  RW_GPR(2),
23880974d292Smrg					  RW_REL(ABSOLUTE),
23890974d292Smrg					  INDEX_GPR(0),
23900974d292Smrg					  ELEM_SIZE(1));
23910974d292Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
23920974d292Smrg					       SRC_SEL_Y(SQ_SEL_Y),
23930974d292Smrg					       SRC_SEL_Z(SQ_SEL_Z),
23940974d292Smrg					       SRC_SEL_W(SQ_SEL_W),
23950974d292Smrg					       R6xx_ELEM_LOOP(0),
23960974d292Smrg					       BURST_COUNT(1),
23974c00f4dcSmrg					       END_OF_PROGRAM(0),
23980974d292Smrg					       VALID_PIXEL_MODE(0),
23990974d292Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
24000974d292Smrg					       WHOLE_QUAD_MODE(0),
24010974d292Smrg					       BARRIER(1));
24020974d292Smrg    /* 6 */
24034c00f4dcSmrg    shader[i++] = CF_DWORD0(ADDR(0));
24040974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
24050974d292Smrg			    CF_CONST(0),
24060974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
24074c00f4dcSmrg			    I_COUNT(0),
24080974d292Smrg			    CALL_COUNT(0),
24090974d292Smrg			    END_OF_PROGRAM(0),
24100974d292Smrg			    VALID_PIXEL_MODE(0),
24114c00f4dcSmrg			    CF_INST(SQ_CF_INST_RETURN),
24120974d292Smrg			    WHOLE_QUAD_MODE(0),
24130974d292Smrg			    BARRIER(1));
24140974d292Smrg
24154c00f4dcSmrg    /* 7 non-mask sub */
24164c00f4dcSmrg    shader[i++] = CF_DWORD0(ADDR(18));
2417b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2418b7e1c893Smrg			    CF_CONST(0),
2419b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
24204c00f4dcSmrg			    I_COUNT(1),
2421b7e1c893Smrg			    CALL_COUNT(0),
2422b7e1c893Smrg			    END_OF_PROGRAM(0),
2423b7e1c893Smrg			    VALID_PIXEL_MODE(0),
24244c00f4dcSmrg			    CF_INST(SQ_CF_INST_TEX),
2425b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
2426b7e1c893Smrg			    BARRIER(1));
24270974d292Smrg    /* 8 */
24284c00f4dcSmrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
24294c00f4dcSmrg					  TYPE(SQ_EXPORT_PIXEL),
24304c00f4dcSmrg					  RW_GPR(0),
24314c00f4dcSmrg					  RW_REL(ABSOLUTE),
24324c00f4dcSmrg					  INDEX_GPR(0),
24334c00f4dcSmrg					  ELEM_SIZE(1));
24344c00f4dcSmrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
24354c00f4dcSmrg					       SRC_SEL_Y(SQ_SEL_Y),
24364c00f4dcSmrg					       SRC_SEL_Z(SQ_SEL_Z),
24374c00f4dcSmrg					       SRC_SEL_W(SQ_SEL_W),
24384c00f4dcSmrg					       R6xx_ELEM_LOOP(0),
24394c00f4dcSmrg					       BURST_COUNT(1),
24404c00f4dcSmrg					       END_OF_PROGRAM(0),
24414c00f4dcSmrg					       VALID_PIXEL_MODE(0),
24424c00f4dcSmrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
24434c00f4dcSmrg					       WHOLE_QUAD_MODE(0),
24444c00f4dcSmrg					       BARRIER(1));
24450974d292Smrg    /* 9 */
24460974d292Smrg    shader[i++] = CF_DWORD0(ADDR(0));
24470974d292Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
24480974d292Smrg			    CF_CONST(0),
24490974d292Smrg			    COND(SQ_CF_COND_ACTIVE),
24500974d292Smrg			    I_COUNT(0),
24510974d292Smrg			    CALL_COUNT(0),
24520974d292Smrg			    END_OF_PROGRAM(0),
24530974d292Smrg			    VALID_PIXEL_MODE(0),
24540974d292Smrg			    CF_INST(SQ_CF_INST_RETURN),
24550974d292Smrg			    WHOLE_QUAD_MODE(0),
24560974d292Smrg			    BARRIER(1));
24570974d292Smrg
24584c00f4dcSmrg    /* 10 - alu 0 */
24594c00f4dcSmrg    /* MUL gpr[2].x gpr[1].x gpr[0].x */
2460921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
24610974d292Smrg			     SRC0_REL(ABSOLUTE),
24620974d292Smrg			     SRC0_ELEM(ELEM_X),
24630974d292Smrg			     SRC0_NEG(0),
2464921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
24650974d292Smrg			     SRC1_REL(ABSOLUTE),
24660974d292Smrg			     SRC1_ELEM(ELEM_X),
24670974d292Smrg			     SRC1_NEG(0),
24680974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
24690974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
24700974d292Smrg			     LAST(0));
24710974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
24720974d292Smrg				 SRC0_ABS(0),
24730974d292Smrg				 SRC1_ABS(0),
24740974d292Smrg				 UPDATE_EXECUTE_MASK(0),
24750974d292Smrg				 UPDATE_PRED(0),
24760974d292Smrg				 WRITE_MASK(1),
24770974d292Smrg				 FOG_MERGE(0),
24780974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
24790974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
24800974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
24814c00f4dcSmrg				 DST_GPR(2),
24820974d292Smrg				 DST_REL(ABSOLUTE),
24830974d292Smrg				 DST_ELEM(ELEM_X),
24840974d292Smrg				 CLAMP(1));
24854c00f4dcSmrg    /* 11 - alu 1 */
24864c00f4dcSmrg    /* MUL gpr[2].y gpr[1].y gpr[0].y */
2487921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
24880974d292Smrg			     SRC0_REL(ABSOLUTE),
24890974d292Smrg			     SRC0_ELEM(ELEM_Y),
24900974d292Smrg			     SRC0_NEG(0),
2491921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
24920974d292Smrg			     SRC1_REL(ABSOLUTE),
24930974d292Smrg			     SRC1_ELEM(ELEM_Y),
24940974d292Smrg			     SRC1_NEG(0),
24950974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
24960974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
24970974d292Smrg			     LAST(0));
24980974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
24990974d292Smrg				 SRC0_ABS(0),
25000974d292Smrg				 SRC1_ABS(0),
25010974d292Smrg				 UPDATE_EXECUTE_MASK(0),
25020974d292Smrg				 UPDATE_PRED(0),
25030974d292Smrg				 WRITE_MASK(1),
25040974d292Smrg				 FOG_MERGE(0),
25050974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
25060974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
25070974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
25084c00f4dcSmrg				 DST_GPR(2),
25090974d292Smrg				 DST_REL(ABSOLUTE),
25100974d292Smrg				 DST_ELEM(ELEM_Y),
25110974d292Smrg				 CLAMP(1));
25124c00f4dcSmrg    /* 12 - alu 2 */
25134c00f4dcSmrg    /* MUL gpr[2].z gpr[1].z gpr[0].z */
2514921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
25150974d292Smrg			     SRC0_REL(ABSOLUTE),
25160974d292Smrg			     SRC0_ELEM(ELEM_Z),
25170974d292Smrg			     SRC0_NEG(0),
2518921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
25190974d292Smrg			     SRC1_REL(ABSOLUTE),
25200974d292Smrg			     SRC1_ELEM(ELEM_Z),
25210974d292Smrg			     SRC1_NEG(0),
25220974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
25230974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
25240974d292Smrg			     LAST(0));
25250974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
25260974d292Smrg				 SRC0_ABS(0),
25270974d292Smrg				 SRC1_ABS(0),
25280974d292Smrg				 UPDATE_EXECUTE_MASK(0),
25290974d292Smrg				 UPDATE_PRED(0),
25300974d292Smrg				 WRITE_MASK(1),
25310974d292Smrg				 FOG_MERGE(0),
25320974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
25330974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
25340974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
25354c00f4dcSmrg				 DST_GPR(2),
25360974d292Smrg				 DST_REL(ABSOLUTE),
25370974d292Smrg				 DST_ELEM(ELEM_Z),
25380974d292Smrg				 CLAMP(1));
25394c00f4dcSmrg    /* 13 - alu 3 */
25404c00f4dcSmrg    /* MUL gpr[2].w gpr[1].w gpr[0].w */
2541921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
25420974d292Smrg			     SRC0_REL(ABSOLUTE),
25430974d292Smrg			     SRC0_ELEM(ELEM_W),
25440974d292Smrg			     SRC0_NEG(0),
2545921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
25460974d292Smrg			     SRC1_REL(ABSOLUTE),
25470974d292Smrg			     SRC1_ELEM(ELEM_W),
25480974d292Smrg			     SRC1_NEG(0),
25490974d292Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
25500974d292Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
25510974d292Smrg			     LAST(1));
25520974d292Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
25530974d292Smrg				 SRC0_ABS(0),
25540974d292Smrg				 SRC1_ABS(0),
25550974d292Smrg				 UPDATE_EXECUTE_MASK(0),
25560974d292Smrg				 UPDATE_PRED(0),
25570974d292Smrg				 WRITE_MASK(1),
25580974d292Smrg				 FOG_MERGE(0),
25590974d292Smrg				 OMOD(SQ_ALU_OMOD_OFF),
25600974d292Smrg				 ALU_INST(SQ_OP2_INST_MUL),
25610974d292Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
25624c00f4dcSmrg				 DST_GPR(2),
25630974d292Smrg				 DST_REL(ABSOLUTE),
25640974d292Smrg				 DST_ELEM(ELEM_W),
25650974d292Smrg				 CLAMP(1));
2566b7e1c893Smrg
25674c00f4dcSmrg    /* 14/15 - src - mask */
25680974d292Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
25690974d292Smrg			     BC_FRAC_MODE(0),
25700974d292Smrg			     FETCH_WHOLE_QUAD(0),
25710974d292Smrg			     RESOURCE_ID(0),
25720974d292Smrg			     SRC_GPR(0),
25730974d292Smrg			     SRC_REL(ABSOLUTE),
25740974d292Smrg			     R7xx_ALT_CONST(0));
25750974d292Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
25760974d292Smrg			     DST_REL(ABSOLUTE),
25770974d292Smrg			     DST_SEL_X(SQ_SEL_X),
25780974d292Smrg			     DST_SEL_Y(SQ_SEL_Y),
25790974d292Smrg			     DST_SEL_Z(SQ_SEL_Z),
25800974d292Smrg			     DST_SEL_W(SQ_SEL_W),
25810974d292Smrg			     LOD_BIAS(0),
25820974d292Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
25830974d292Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
25840974d292Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
25850974d292Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
25860974d292Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
25870974d292Smrg			     OFFSET_Y(0),
25880974d292Smrg			     OFFSET_Z(0),
25890974d292Smrg			     SAMPLER_ID(0),
25900974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
25910974d292Smrg			     SRC_SEL_Y(SQ_SEL_Y),
25920974d292Smrg			     SRC_SEL_Z(SQ_SEL_0),
25930974d292Smrg			     SRC_SEL_W(SQ_SEL_1));
25940974d292Smrg    shader[i++] = TEX_DWORD_PAD;
25954c00f4dcSmrg    /* 16/17 - mask */
25960974d292Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
25970974d292Smrg			     BC_FRAC_MODE(0),
25980974d292Smrg			     FETCH_WHOLE_QUAD(0),
25990974d292Smrg			     RESOURCE_ID(1),
26000974d292Smrg			     SRC_GPR(1),
26010974d292Smrg			     SRC_REL(ABSOLUTE),
26020974d292Smrg			     R7xx_ALT_CONST(0));
26030974d292Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
26040974d292Smrg			     DST_REL(ABSOLUTE),
26050974d292Smrg			     DST_SEL_X(SQ_SEL_X),
26060974d292Smrg			     DST_SEL_Y(SQ_SEL_Y),
26070974d292Smrg			     DST_SEL_Z(SQ_SEL_Z),
26080974d292Smrg			     DST_SEL_W(SQ_SEL_W),
26090974d292Smrg			     LOD_BIAS(0),
26100974d292Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
26110974d292Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
26120974d292Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
26130974d292Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
26140974d292Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
26150974d292Smrg			     OFFSET_Y(0),
26160974d292Smrg			     OFFSET_Z(0),
26170974d292Smrg			     SAMPLER_ID(1),
26180974d292Smrg			     SRC_SEL_X(SQ_SEL_X),
26190974d292Smrg			     SRC_SEL_Y(SQ_SEL_Y),
26200974d292Smrg			     SRC_SEL_Z(SQ_SEL_0),
26210974d292Smrg			     SRC_SEL_W(SQ_SEL_1));
26220974d292Smrg    shader[i++] = TEX_DWORD_PAD;
2623b7e1c893Smrg
26244c00f4dcSmrg    /* 18/19 - src - non-mask */
26254c00f4dcSmrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
26264c00f4dcSmrg			     BC_FRAC_MODE(0),
26274c00f4dcSmrg			     FETCH_WHOLE_QUAD(0),
26284c00f4dcSmrg			     RESOURCE_ID(0),
26294c00f4dcSmrg			     SRC_GPR(0),
26304c00f4dcSmrg			     SRC_REL(ABSOLUTE),
26314c00f4dcSmrg			     R7xx_ALT_CONST(0));
26324c00f4dcSmrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
26334c00f4dcSmrg			     DST_REL(ABSOLUTE),
26344c00f4dcSmrg			     DST_SEL_X(SQ_SEL_X),
26354c00f4dcSmrg			     DST_SEL_Y(SQ_SEL_Y),
26364c00f4dcSmrg			     DST_SEL_Z(SQ_SEL_Z),
26374c00f4dcSmrg			     DST_SEL_W(SQ_SEL_W),
26384c00f4dcSmrg			     LOD_BIAS(0),
26394c00f4dcSmrg			     COORD_TYPE_X(TEX_NORMALIZED),
26404c00f4dcSmrg			     COORD_TYPE_Y(TEX_NORMALIZED),
26414c00f4dcSmrg			     COORD_TYPE_Z(TEX_NORMALIZED),
26424c00f4dcSmrg			     COORD_TYPE_W(TEX_NORMALIZED));
26434c00f4dcSmrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
26444c00f4dcSmrg			     OFFSET_Y(0),
26454c00f4dcSmrg			     OFFSET_Z(0),
26464c00f4dcSmrg			     SAMPLER_ID(0),
26474c00f4dcSmrg			     SRC_SEL_X(SQ_SEL_X),
26484c00f4dcSmrg			     SRC_SEL_Y(SQ_SEL_Y),
26494c00f4dcSmrg			     SRC_SEL_Z(SQ_SEL_0),
26504c00f4dcSmrg			     SRC_SEL_W(SQ_SEL_1));
26514c00f4dcSmrg    shader[i++] = TEX_DWORD_PAD;
26524c00f4dcSmrg
2653b7e1c893Smrg    return i;
2654b7e1c893Smrg}
2655