1de2362d3Smrg/*
2de2362d3Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3de2362d3Smrg *
4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5de2362d3Smrg * copy of this software and associated documentation files (the "Software"),
6de2362d3Smrg * to deal in the Software without restriction, including without limitation
7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the
9de2362d3Smrg * Software is furnished to do so, subject to the following conditions:
10de2362d3Smrg *
11de2362d3Smrg * The above copyright notice and this permission notice (including the next
12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the
13de2362d3Smrg * Software.
14de2362d3Smrg *
15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21de2362d3Smrg * SOFTWARE.
22de2362d3Smrg *
23de2362d3Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24de2362d3Smrg *
25de2362d3Smrg */
26de2362d3Smrg
27de2362d3Smrg#ifdef HAVE_CONFIG_H
28de2362d3Smrg#include "config.h"
29de2362d3Smrg#endif
30de2362d3Smrg
31de2362d3Smrg#include "xf86.h"
32de2362d3Smrg
33de2362d3Smrg#include "radeon.h"
34de2362d3Smrg#include "r600_shader.h"
35de2362d3Smrg#include "r600_reg.h"
36de2362d3Smrg
37de2362d3Smrg/* solid vs --------------------------------------- */
38de2362d3Smrgint R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39de2362d3Smrg{
40de2362d3Smrg    int i = 0;
41de2362d3Smrg
42de2362d3Smrg    /* 0 */
43de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(4));
44de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
45de2362d3Smrg			    CF_CONST(0),
46de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
47de2362d3Smrg			    I_COUNT(1),
48de2362d3Smrg			    CALL_COUNT(0),
49de2362d3Smrg			    END_OF_PROGRAM(0),
50de2362d3Smrg			    VALID_PIXEL_MODE(0),
51de2362d3Smrg			    CF_INST(SQ_CF_INST_VTX),
52de2362d3Smrg			    WHOLE_QUAD_MODE(0),
53de2362d3Smrg			    BARRIER(1));
54de2362d3Smrg    /* 1 */
55de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56de2362d3Smrg					  TYPE(SQ_EXPORT_POS),
57de2362d3Smrg					  RW_GPR(1),
58de2362d3Smrg					  RW_REL(ABSOLUTE),
59de2362d3Smrg					  INDEX_GPR(0),
60de2362d3Smrg					  ELEM_SIZE(0));
61de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
63de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
64de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
65de2362d3Smrg					       R6xx_ELEM_LOOP(0),
66de2362d3Smrg					       BURST_COUNT(1),
67de2362d3Smrg					       END_OF_PROGRAM(0),
68de2362d3Smrg					       VALID_PIXEL_MODE(0),
69de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70de2362d3Smrg					       WHOLE_QUAD_MODE(0),
71de2362d3Smrg					       BARRIER(1));
72de2362d3Smrg    /* 2 - always export a param whether it's used or not */
73de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74de2362d3Smrg					  TYPE(SQ_EXPORT_PARAM),
75de2362d3Smrg					  RW_GPR(0),
76de2362d3Smrg					  RW_REL(ABSOLUTE),
77de2362d3Smrg					  INDEX_GPR(0),
78de2362d3Smrg					  ELEM_SIZE(0));
79de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
81de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
82de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
83de2362d3Smrg					       R6xx_ELEM_LOOP(0),
84de2362d3Smrg					       BURST_COUNT(0),
85de2362d3Smrg					       END_OF_PROGRAM(1),
86de2362d3Smrg					       VALID_PIXEL_MODE(0),
87de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
88de2362d3Smrg					       WHOLE_QUAD_MODE(0),
89de2362d3Smrg					       BARRIER(0));
90de2362d3Smrg    /* 3 - padding */
91de2362d3Smrg    shader[i++] = 0x00000000;
92de2362d3Smrg    shader[i++] = 0x00000000;
93de2362d3Smrg    /* 4/5 */
94de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
97de2362d3Smrg			     BUFFER_ID(0),
98de2362d3Smrg			     SRC_GPR(0),
99de2362d3Smrg			     SRC_REL(ABSOLUTE),
100de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
101de2362d3Smrg			     MEGA_FETCH_COUNT(8));
102de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103de2362d3Smrg				 DST_REL(0),
104de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
105de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
106de2362d3Smrg				 DST_SEL_Z(SQ_SEL_0),
107de2362d3Smrg				 DST_SEL_W(SQ_SEL_1),
108de2362d3Smrg				 USE_CONST_FIELDS(0),
109de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
110de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
111de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
112de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
114de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
115de2362d3Smrg			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
116de2362d3Smrg#else
117de2362d3Smrg			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
118de2362d3Smrg#endif
119de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
120de2362d3Smrg			     MEGA_FETCH(1));
121de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
122de2362d3Smrg
123de2362d3Smrg    return i;
124de2362d3Smrg}
125de2362d3Smrg
126de2362d3Smrg/* solid ps --------------------------------------- */
127de2362d3Smrgint R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
128de2362d3Smrg{
129de2362d3Smrg    int i = 0;
130de2362d3Smrg
131de2362d3Smrg    /* 0 */
132de2362d3Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(2),
133de2362d3Smrg				KCACHE_BANK0(0),
134de2362d3Smrg				KCACHE_BANK1(0),
135de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
136de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
137de2362d3Smrg				KCACHE_ADDR0(0),
138de2362d3Smrg				KCACHE_ADDR1(0),
139de2362d3Smrg				I_COUNT(4),
140de2362d3Smrg				USES_WATERFALL(0),
141de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
142de2362d3Smrg				WHOLE_QUAD_MODE(0),
143de2362d3Smrg				BARRIER(1));
144de2362d3Smrg    /* 1 */
145de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
146de2362d3Smrg					  TYPE(SQ_EXPORT_PIXEL),
147de2362d3Smrg					  RW_GPR(0),
148de2362d3Smrg					  RW_REL(ABSOLUTE),
149de2362d3Smrg					  INDEX_GPR(0),
150de2362d3Smrg					  ELEM_SIZE(1));
151de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
152de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
153de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
154de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
155de2362d3Smrg					       R6xx_ELEM_LOOP(0),
156de2362d3Smrg					       BURST_COUNT(1),
157de2362d3Smrg					       END_OF_PROGRAM(1),
158de2362d3Smrg					       VALID_PIXEL_MODE(0),
159de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
160de2362d3Smrg					       WHOLE_QUAD_MODE(0),
161de2362d3Smrg					       BARRIER(1));
162de2362d3Smrg
163de2362d3Smrg    /* 2 */
164de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
165de2362d3Smrg			     SRC0_REL(ABSOLUTE),
166de2362d3Smrg			     SRC0_ELEM(ELEM_X),
167de2362d3Smrg			     SRC0_NEG(0),
168de2362d3Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
169de2362d3Smrg			     SRC1_REL(ABSOLUTE),
170de2362d3Smrg			     SRC1_ELEM(ELEM_X),
171de2362d3Smrg			     SRC1_NEG(0),
172de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
173de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
174de2362d3Smrg			     LAST(0));
175de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
176de2362d3Smrg				 SRC0_ABS(0),
177de2362d3Smrg				 SRC1_ABS(0),
178de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
179de2362d3Smrg				 UPDATE_PRED(0),
180de2362d3Smrg				 WRITE_MASK(1),
181de2362d3Smrg				 FOG_MERGE(0),
182de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
183de2362d3Smrg				 ALU_INST(SQ_OP2_INST_MOV),
184de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
185de2362d3Smrg				 DST_GPR(0),
186de2362d3Smrg				 DST_REL(ABSOLUTE),
187de2362d3Smrg				 DST_ELEM(ELEM_X),
188de2362d3Smrg				 CLAMP(1));
189de2362d3Smrg    /* 3 */
190de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
191de2362d3Smrg			     SRC0_REL(ABSOLUTE),
192de2362d3Smrg			     SRC0_ELEM(ELEM_Y),
193de2362d3Smrg			     SRC0_NEG(0),
194de2362d3Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
195de2362d3Smrg			     SRC1_REL(ABSOLUTE),
196de2362d3Smrg			     SRC1_ELEM(ELEM_Y),
197de2362d3Smrg			     SRC1_NEG(0),
198de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
199de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
200de2362d3Smrg			     LAST(0));
201de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
202de2362d3Smrg				 SRC0_ABS(0),
203de2362d3Smrg				 SRC1_ABS(0),
204de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
205de2362d3Smrg				 UPDATE_PRED(0),
206de2362d3Smrg				 WRITE_MASK(1),
207de2362d3Smrg				 FOG_MERGE(0),
208de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
209de2362d3Smrg				 ALU_INST(SQ_OP2_INST_MOV),
210de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
211de2362d3Smrg				 DST_GPR(0),
212de2362d3Smrg				 DST_REL(ABSOLUTE),
213de2362d3Smrg				 DST_ELEM(ELEM_Y),
214de2362d3Smrg				 CLAMP(1));
215de2362d3Smrg    /* 4 */
216de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
217de2362d3Smrg			     SRC0_REL(ABSOLUTE),
218de2362d3Smrg			     SRC0_ELEM(ELEM_Z),
219de2362d3Smrg			     SRC0_NEG(0),
220de2362d3Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
221de2362d3Smrg			     SRC1_REL(ABSOLUTE),
222de2362d3Smrg			     SRC1_ELEM(ELEM_Z),
223de2362d3Smrg			     SRC1_NEG(0),
224de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
225de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
226de2362d3Smrg			     LAST(0));
227de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
228de2362d3Smrg				 SRC0_ABS(0),
229de2362d3Smrg				 SRC1_ABS(0),
230de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
231de2362d3Smrg				 UPDATE_PRED(0),
232de2362d3Smrg				 WRITE_MASK(1),
233de2362d3Smrg				 FOG_MERGE(0),
234de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
235de2362d3Smrg				 ALU_INST(SQ_OP2_INST_MOV),
236de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
237de2362d3Smrg				 DST_GPR(0),
238de2362d3Smrg				 DST_REL(ABSOLUTE),
239de2362d3Smrg				 DST_ELEM(ELEM_Z),
240de2362d3Smrg				 CLAMP(1));
241de2362d3Smrg    /* 5 */
242de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
243de2362d3Smrg			     SRC0_REL(ABSOLUTE),
244de2362d3Smrg			     SRC0_ELEM(ELEM_W),
245de2362d3Smrg			     SRC0_NEG(0),
246de2362d3Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
247de2362d3Smrg			     SRC1_REL(ABSOLUTE),
248de2362d3Smrg			     SRC1_ELEM(ELEM_W),
249de2362d3Smrg			     SRC1_NEG(0),
250de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
251de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
252de2362d3Smrg			     LAST(1));
253de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
254de2362d3Smrg				 SRC0_ABS(0),
255de2362d3Smrg				 SRC1_ABS(0),
256de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
257de2362d3Smrg				 UPDATE_PRED(0),
258de2362d3Smrg				 WRITE_MASK(1),
259de2362d3Smrg				 FOG_MERGE(0),
260de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
261de2362d3Smrg				 ALU_INST(SQ_OP2_INST_MOV),
262de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
263de2362d3Smrg				 DST_GPR(0),
264de2362d3Smrg				 DST_REL(ABSOLUTE),
265de2362d3Smrg				 DST_ELEM(ELEM_W),
266de2362d3Smrg				 CLAMP(1));
267de2362d3Smrg
268de2362d3Smrg    return i;
269de2362d3Smrg}
270de2362d3Smrg
271de2362d3Smrg/* copy vs --------------------------------------- */
272de2362d3Smrgint R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
273de2362d3Smrg{
274de2362d3Smrg    int i = 0;
275de2362d3Smrg
276de2362d3Smrg    /* 0 */
277de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(4));
278de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
279de2362d3Smrg			    CF_CONST(0),
280de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
281de2362d3Smrg			    I_COUNT(2),
282de2362d3Smrg			    CALL_COUNT(0),
283de2362d3Smrg			    END_OF_PROGRAM(0),
284de2362d3Smrg			    VALID_PIXEL_MODE(0),
285de2362d3Smrg			    CF_INST(SQ_CF_INST_VTX),
286de2362d3Smrg			    WHOLE_QUAD_MODE(0),
287de2362d3Smrg			    BARRIER(1));
288de2362d3Smrg    /* 1 */
289de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
290de2362d3Smrg					  TYPE(SQ_EXPORT_POS),
291de2362d3Smrg					  RW_GPR(1),
292de2362d3Smrg					  RW_REL(ABSOLUTE),
293de2362d3Smrg					  INDEX_GPR(0),
294de2362d3Smrg					  ELEM_SIZE(0));
295de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
296de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
297de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
298de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
299de2362d3Smrg					       R6xx_ELEM_LOOP(0),
300de2362d3Smrg					       BURST_COUNT(0),
301de2362d3Smrg					       END_OF_PROGRAM(0),
302de2362d3Smrg					       VALID_PIXEL_MODE(0),
303de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
304de2362d3Smrg					       WHOLE_QUAD_MODE(0),
305de2362d3Smrg					       BARRIER(1));
306de2362d3Smrg    /* 2 */
307de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
308de2362d3Smrg					  TYPE(SQ_EXPORT_PARAM),
309de2362d3Smrg					  RW_GPR(0),
310de2362d3Smrg					  RW_REL(ABSOLUTE),
311de2362d3Smrg					  INDEX_GPR(0),
312de2362d3Smrg					  ELEM_SIZE(0));
313de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
314de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
315de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
316de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
317de2362d3Smrg					       R6xx_ELEM_LOOP(0),
318de2362d3Smrg					       BURST_COUNT(0),
319de2362d3Smrg					       END_OF_PROGRAM(1),
320de2362d3Smrg					       VALID_PIXEL_MODE(0),
321de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
322de2362d3Smrg					       WHOLE_QUAD_MODE(0),
323de2362d3Smrg					       BARRIER(0));
324de2362d3Smrg    /* 3 */
325de2362d3Smrg    shader[i++] = 0x00000000;
326de2362d3Smrg    shader[i++] = 0x00000000;
327de2362d3Smrg    /* 4/5 */
328de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
329de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
330de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
331de2362d3Smrg			     BUFFER_ID(0),
332de2362d3Smrg			     SRC_GPR(0),
333de2362d3Smrg			     SRC_REL(ABSOLUTE),
334de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
335de2362d3Smrg			     MEGA_FETCH_COUNT(16));
336de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
337de2362d3Smrg				 DST_REL(0),
338de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
339de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
340de2362d3Smrg				 DST_SEL_Z(SQ_SEL_0),
341de2362d3Smrg				 DST_SEL_W(SQ_SEL_1),
342de2362d3Smrg				 USE_CONST_FIELDS(0),
343de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
344de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
345de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
346de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
347de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
348de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
349de2362d3Smrg			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
350de2362d3Smrg#else
351de2362d3Smrg			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
352de2362d3Smrg#endif
353de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
354de2362d3Smrg			     MEGA_FETCH(1));
355de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
356de2362d3Smrg    /* 6/7 */
357de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
358de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
359de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
360de2362d3Smrg			     BUFFER_ID(0),
361de2362d3Smrg			     SRC_GPR(0),
362de2362d3Smrg			     SRC_REL(ABSOLUTE),
363de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
364de2362d3Smrg			     MEGA_FETCH_COUNT(8));
365de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
366de2362d3Smrg				 DST_REL(0),
367de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
368de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
369de2362d3Smrg				 DST_SEL_Z(SQ_SEL_0),
370de2362d3Smrg				 DST_SEL_W(SQ_SEL_1),
371de2362d3Smrg				 USE_CONST_FIELDS(0),
372de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
373de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
374de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
375de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
376de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
377de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
378de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
379de2362d3Smrg#else
380de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
381de2362d3Smrg#endif
382de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
383de2362d3Smrg			     MEGA_FETCH(0));
384de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
385de2362d3Smrg
386de2362d3Smrg    return i;
387de2362d3Smrg}
388de2362d3Smrg
389de2362d3Smrg/* copy ps --------------------------------------- */
390de2362d3Smrgint R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
391de2362d3Smrg{
392de2362d3Smrg    int i=0;
393de2362d3Smrg
394de2362d3Smrg    /* CF INST 0 */
395de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(2));
396de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
397de2362d3Smrg			    CF_CONST(0),
398de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
399de2362d3Smrg			    I_COUNT(1),
400de2362d3Smrg			    CALL_COUNT(0),
401de2362d3Smrg			    END_OF_PROGRAM(0),
402de2362d3Smrg			    VALID_PIXEL_MODE(0),
403de2362d3Smrg			    CF_INST(SQ_CF_INST_TEX),
404de2362d3Smrg			    WHOLE_QUAD_MODE(0),
405de2362d3Smrg			    BARRIER(1));
406de2362d3Smrg    /* CF INST 1 */
407de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
408de2362d3Smrg					  TYPE(SQ_EXPORT_PIXEL),
409de2362d3Smrg					  RW_GPR(0),
410de2362d3Smrg					  RW_REL(ABSOLUTE),
411de2362d3Smrg					  INDEX_GPR(0),
412de2362d3Smrg					  ELEM_SIZE(1));
413de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
414de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
415de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
416de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
417de2362d3Smrg					       R6xx_ELEM_LOOP(0),
418de2362d3Smrg					       BURST_COUNT(1),
419de2362d3Smrg					       END_OF_PROGRAM(1),
420de2362d3Smrg					       VALID_PIXEL_MODE(0),
421de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
422de2362d3Smrg					       WHOLE_QUAD_MODE(0),
423de2362d3Smrg					       BARRIER(1));
424de2362d3Smrg    /* TEX INST 0 */
425de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
426de2362d3Smrg			     BC_FRAC_MODE(0),
427de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
428de2362d3Smrg			     RESOURCE_ID(0),
429de2362d3Smrg			     SRC_GPR(0),
430de2362d3Smrg			     SRC_REL(ABSOLUTE),
431de2362d3Smrg			     R7xx_ALT_CONST(0));
432de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
433de2362d3Smrg			     DST_REL(ABSOLUTE),
434de2362d3Smrg			     DST_SEL_X(SQ_SEL_X), /* R */
435de2362d3Smrg			     DST_SEL_Y(SQ_SEL_Y), /* G */
436de2362d3Smrg			     DST_SEL_Z(SQ_SEL_Z), /* B */
437de2362d3Smrg			     DST_SEL_W(SQ_SEL_W), /* A */
438de2362d3Smrg			     LOD_BIAS(0),
439de2362d3Smrg			     COORD_TYPE_X(TEX_UNNORMALIZED),
440de2362d3Smrg			     COORD_TYPE_Y(TEX_UNNORMALIZED),
441de2362d3Smrg			     COORD_TYPE_Z(TEX_UNNORMALIZED),
442de2362d3Smrg			     COORD_TYPE_W(TEX_UNNORMALIZED));
443de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
444de2362d3Smrg			     OFFSET_Y(0),
445de2362d3Smrg			     OFFSET_Z(0),
446de2362d3Smrg			     SAMPLER_ID(0),
447de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
448de2362d3Smrg			     SRC_SEL_Y(SQ_SEL_Y),
449de2362d3Smrg			     SRC_SEL_Z(SQ_SEL_0),
450de2362d3Smrg			     SRC_SEL_W(SQ_SEL_1));
451de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
452de2362d3Smrg
453de2362d3Smrg    return i;
454de2362d3Smrg}
455de2362d3Smrg
456de2362d3Smrg/*
457de2362d3Smrg * ; xv vertex shader
458de2362d3Smrg * 00 VTX: ADDR(4) CNT(2)
459de2362d3Smrg *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT)
460de2362d3Smrg *          FORMAT_COMP(SIGNED)
461de2362d3Smrg *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
462de2362d3Smrg *          FORMAT_COMP(SIGNED)
463de2362d3Smrg * 01 EXP_DONE: POS0, R1
464de2362d3Smrg * 02 EXP_DONE: PARAM0, R0  NO_BARRIER
465de2362d3Smrg * END_OF_PROGRAM
466de2362d3Smrg */
467de2362d3Smrgint R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
468de2362d3Smrg{
469de2362d3Smrg    int i = 0;
470de2362d3Smrg
471de2362d3Smrg    /* 0 */
472de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(6));
473de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
474de2362d3Smrg                            CF_CONST(0),
475de2362d3Smrg                            COND(SQ_CF_COND_ACTIVE),
476de2362d3Smrg                            I_COUNT(2),
477de2362d3Smrg                            CALL_COUNT(0),
478de2362d3Smrg                            END_OF_PROGRAM(0),
479de2362d3Smrg                            VALID_PIXEL_MODE(0),
480de2362d3Smrg                            CF_INST(SQ_CF_INST_VTX),
481de2362d3Smrg                            WHOLE_QUAD_MODE(0),
482de2362d3Smrg                            BARRIER(1));
483de2362d3Smrg
484de2362d3Smrg    /* 1 - ALU */
485de2362d3Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
486de2362d3Smrg				KCACHE_BANK0(0),
487de2362d3Smrg				KCACHE_BANK1(0),
488de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
489de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
490de2362d3Smrg				KCACHE_ADDR0(0),
491de2362d3Smrg				KCACHE_ADDR1(0),
492de2362d3Smrg				I_COUNT(2),
493de2362d3Smrg				USES_WATERFALL(0),
494de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
495de2362d3Smrg				WHOLE_QUAD_MODE(0),
496de2362d3Smrg				BARRIER(1));
497de2362d3Smrg
498de2362d3Smrg    /* 2 */
499de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
500de2362d3Smrg                                          TYPE(SQ_EXPORT_POS),
501de2362d3Smrg                                          RW_GPR(1),
502de2362d3Smrg                                          RW_REL(ABSOLUTE),
503de2362d3Smrg                                          INDEX_GPR(0),
504de2362d3Smrg                                          ELEM_SIZE(3));
505de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
506de2362d3Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
507de2362d3Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
508de2362d3Smrg                                               SRC_SEL_W(SQ_SEL_W),
509de2362d3Smrg                                               R6xx_ELEM_LOOP(0),
510de2362d3Smrg                                               BURST_COUNT(1),
511de2362d3Smrg                                               END_OF_PROGRAM(0),
512de2362d3Smrg                                               VALID_PIXEL_MODE(0),
513de2362d3Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
514de2362d3Smrg                                               WHOLE_QUAD_MODE(0),
515de2362d3Smrg                                               BARRIER(1));
516de2362d3Smrg    /* 3 */
517de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
518de2362d3Smrg                                          TYPE(SQ_EXPORT_PARAM),
519de2362d3Smrg                                          RW_GPR(0),
520de2362d3Smrg                                          RW_REL(ABSOLUTE),
521de2362d3Smrg                                          INDEX_GPR(0),
522de2362d3Smrg                                          ELEM_SIZE(3));
523de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
524de2362d3Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
525de2362d3Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
526de2362d3Smrg                                               SRC_SEL_W(SQ_SEL_W),
527de2362d3Smrg                                               R6xx_ELEM_LOOP(0),
528de2362d3Smrg                                               BURST_COUNT(1),
529de2362d3Smrg                                               END_OF_PROGRAM(1),
530de2362d3Smrg                                               VALID_PIXEL_MODE(0),
531de2362d3Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
532de2362d3Smrg                                               WHOLE_QUAD_MODE(0),
533de2362d3Smrg                                               BARRIER(0));
534de2362d3Smrg
535de2362d3Smrg
536de2362d3Smrg    /* 4 texX / w */
537de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
538de2362d3Smrg                             SRC0_REL(ABSOLUTE),
539de2362d3Smrg                             SRC0_ELEM(ELEM_X),
540de2362d3Smrg                             SRC0_NEG(0),
541de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
542de2362d3Smrg                             SRC1_REL(ABSOLUTE),
543de2362d3Smrg                             SRC1_ELEM(ELEM_X),
544de2362d3Smrg                             SRC1_NEG(0),
545de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
546de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
547de2362d3Smrg                             LAST(0));
548de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
549de2362d3Smrg                                 SRC0_ABS(0),
550de2362d3Smrg                                 SRC1_ABS(0),
551de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
552de2362d3Smrg                                 UPDATE_PRED(0),
553de2362d3Smrg                                 WRITE_MASK(1),
554de2362d3Smrg                                 FOG_MERGE(0),
555de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
556de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
557de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
558de2362d3Smrg                                 DST_GPR(0),
559de2362d3Smrg                                 DST_REL(ABSOLUTE),
560de2362d3Smrg                                 DST_ELEM(ELEM_X),
561de2362d3Smrg                                 CLAMP(0));
562de2362d3Smrg
563de2362d3Smrg    /* 5 texY / h */
564de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
565de2362d3Smrg                             SRC0_REL(ABSOLUTE),
566de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
567de2362d3Smrg                             SRC0_NEG(0),
568de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
569de2362d3Smrg                             SRC1_REL(ABSOLUTE),
570de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
571de2362d3Smrg                             SRC1_NEG(0),
572de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
573de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
574de2362d3Smrg                             LAST(1));
575de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
576de2362d3Smrg                                 SRC0_ABS(0),
577de2362d3Smrg                                 SRC1_ABS(0),
578de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
579de2362d3Smrg                                 UPDATE_PRED(0),
580de2362d3Smrg                                 WRITE_MASK(1),
581de2362d3Smrg                                 FOG_MERGE(0),
582de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
583de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
584de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
585de2362d3Smrg                                 DST_GPR(0),
586de2362d3Smrg                                 DST_REL(ABSOLUTE),
587de2362d3Smrg                                 DST_ELEM(ELEM_Y),
588de2362d3Smrg                                 CLAMP(0));
589de2362d3Smrg
590de2362d3Smrg    /* 6/7 */
591de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
592de2362d3Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
593de2362d3Smrg                             FETCH_WHOLE_QUAD(0),
594de2362d3Smrg                             BUFFER_ID(0),
595de2362d3Smrg                             SRC_GPR(0),
596de2362d3Smrg                             SRC_REL(ABSOLUTE),
597de2362d3Smrg                             SRC_SEL_X(SQ_SEL_X),
598de2362d3Smrg                             MEGA_FETCH_COUNT(16));
599de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
600de2362d3Smrg                                 DST_REL(ABSOLUTE),
601de2362d3Smrg                                 DST_SEL_X(SQ_SEL_X),
602de2362d3Smrg                                 DST_SEL_Y(SQ_SEL_Y),
603de2362d3Smrg                                 DST_SEL_Z(SQ_SEL_0),
604de2362d3Smrg                                 DST_SEL_W(SQ_SEL_1),
605de2362d3Smrg                                 USE_CONST_FIELDS(0),
606de2362d3Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
607de2362d3Smrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
608de2362d3Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
609de2362d3Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
610de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
611de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
612de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
613de2362d3Smrg#else
614de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
615de2362d3Smrg#endif
616de2362d3Smrg                             CONST_BUF_NO_STRIDE(0),
617de2362d3Smrg                             MEGA_FETCH(1));
618de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
619de2362d3Smrg    /* 8/9 */
620de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
621de2362d3Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
622de2362d3Smrg                             FETCH_WHOLE_QUAD(0),
623de2362d3Smrg                             BUFFER_ID(0),
624de2362d3Smrg                             SRC_GPR(0),
625de2362d3Smrg                             SRC_REL(ABSOLUTE),
626de2362d3Smrg                             SRC_SEL_X(SQ_SEL_X),
627de2362d3Smrg                             MEGA_FETCH_COUNT(8));
628de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
629de2362d3Smrg                                 DST_REL(ABSOLUTE),
630de2362d3Smrg                                 DST_SEL_X(SQ_SEL_X),
631de2362d3Smrg                                 DST_SEL_Y(SQ_SEL_Y),
632de2362d3Smrg                                 DST_SEL_Z(SQ_SEL_0),
633de2362d3Smrg                                 DST_SEL_W(SQ_SEL_1),
634de2362d3Smrg                                 USE_CONST_FIELDS(0),
635de2362d3Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
636de2362d3Smrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
637de2362d3Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
638de2362d3Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
639de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
640de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
641de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
642de2362d3Smrg#else
643de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
644de2362d3Smrg#endif
645de2362d3Smrg                             CONST_BUF_NO_STRIDE(0),
646de2362d3Smrg                             MEGA_FETCH(0));
647de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
648de2362d3Smrg
649de2362d3Smrg    return i;
650de2362d3Smrg}
651de2362d3Smrg
652de2362d3Smrgint R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
653de2362d3Smrg{
654de2362d3Smrg    int i = 0;
655de2362d3Smrg
656de2362d3Smrg    /* 0 */
657de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(16));
658de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
659de2362d3Smrg                            CF_CONST(0),
660de2362d3Smrg                            COND(SQ_CF_COND_BOOL),
661de2362d3Smrg                            I_COUNT(0),
662de2362d3Smrg                            CALL_COUNT(0),
663de2362d3Smrg                            END_OF_PROGRAM(0),
664de2362d3Smrg                            VALID_PIXEL_MODE(0),
665de2362d3Smrg                            CF_INST(SQ_CF_INST_CALL),
666de2362d3Smrg                            WHOLE_QUAD_MODE(0),
667de2362d3Smrg                            BARRIER(0));
668de2362d3Smrg    /* 1 */
669de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(24));
670de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
671de2362d3Smrg                            CF_CONST(0),
672de2362d3Smrg                            COND(SQ_CF_COND_NOT_BOOL),
673de2362d3Smrg                            I_COUNT(0),
674de2362d3Smrg                            CALL_COUNT(0),
675de2362d3Smrg                            END_OF_PROGRAM(0),
676de2362d3Smrg                            VALID_PIXEL_MODE(0),
677de2362d3Smrg                            CF_INST(SQ_CF_INST_CALL),
678de2362d3Smrg                            WHOLE_QUAD_MODE(0),
679de2362d3Smrg                            BARRIER(0));
680de2362d3Smrg    /* 2 */
681de2362d3Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
682de2362d3Smrg                                KCACHE_BANK0(0),
683de2362d3Smrg                                KCACHE_BANK1(0),
684de2362d3Smrg                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
685de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
686de2362d3Smrg                                KCACHE_ADDR0(0),
687de2362d3Smrg                                KCACHE_ADDR1(0),
688de2362d3Smrg                                I_COUNT(12),
689de2362d3Smrg                                USES_WATERFALL(0),
690de2362d3Smrg                                CF_INST(SQ_CF_INST_ALU),
691de2362d3Smrg                                WHOLE_QUAD_MODE(0),
692de2362d3Smrg                                BARRIER(1));
693de2362d3Smrg    /* 3 */
694de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
695de2362d3Smrg                                          TYPE(SQ_EXPORT_PIXEL),
696de2362d3Smrg                                          RW_GPR(2),
697de2362d3Smrg                                          RW_REL(ABSOLUTE),
698de2362d3Smrg                                          INDEX_GPR(0),
699de2362d3Smrg                                          ELEM_SIZE(3));
700de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
701de2362d3Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
702de2362d3Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
703de2362d3Smrg                                               SRC_SEL_W(SQ_SEL_W),
704de2362d3Smrg                                               R6xx_ELEM_LOOP(0),
705de2362d3Smrg                                               BURST_COUNT(1),
706de2362d3Smrg                                               END_OF_PROGRAM(1),
707de2362d3Smrg                                               VALID_PIXEL_MODE(0),
708de2362d3Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
709de2362d3Smrg                                               WHOLE_QUAD_MODE(0),
710de2362d3Smrg                                               BARRIER(1));
711de2362d3Smrg    /* 4,5,6,7 */
712de2362d3Smrg    /* r2.x = MAD(c0.w, r1.x, c0.x) */
713de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
714de2362d3Smrg                             SRC0_REL(ABSOLUTE),
715de2362d3Smrg                             SRC0_ELEM(ELEM_W),
716de2362d3Smrg                             SRC0_NEG(0),
717de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
718de2362d3Smrg                             SRC1_REL(ABSOLUTE),
719de2362d3Smrg                             SRC1_ELEM(ELEM_X),
720de2362d3Smrg                             SRC1_NEG(0),
721de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
722de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
723de2362d3Smrg                             LAST(0));
724de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
725de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
726de2362d3Smrg                                 SRC2_ELEM(ELEM_X),
727de2362d3Smrg                                 SRC2_NEG(0),
728de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
729de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
730de2362d3Smrg                                 DST_GPR(2),
731de2362d3Smrg                                 DST_REL(ABSOLUTE),
732de2362d3Smrg                                 DST_ELEM(ELEM_X),
733de2362d3Smrg                                 CLAMP(0));
734de2362d3Smrg    /* r2.y = MAD(c0.w, r1.x, c0.y) */
735de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
736de2362d3Smrg                             SRC0_REL(ABSOLUTE),
737de2362d3Smrg                             SRC0_ELEM(ELEM_W),
738de2362d3Smrg                             SRC0_NEG(0),
739de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
740de2362d3Smrg                             SRC1_REL(ABSOLUTE),
741de2362d3Smrg                             SRC1_ELEM(ELEM_X),
742de2362d3Smrg                             SRC1_NEG(0),
743de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
744de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
745de2362d3Smrg                             LAST(0));
746de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
747de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
748de2362d3Smrg                                 SRC2_ELEM(ELEM_Y),
749de2362d3Smrg                                 SRC2_NEG(0),
750de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
751de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
752de2362d3Smrg                                 DST_GPR(2),
753de2362d3Smrg                                 DST_REL(ABSOLUTE),
754de2362d3Smrg                                 DST_ELEM(ELEM_Y),
755de2362d3Smrg                                 CLAMP(0));
756de2362d3Smrg    /* r2.z = MAD(c0.w, r1.x, c0.z) */
757de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
758de2362d3Smrg                             SRC0_REL(ABSOLUTE),
759de2362d3Smrg                             SRC0_ELEM(ELEM_W),
760de2362d3Smrg                             SRC0_NEG(0),
761de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
762de2362d3Smrg                             SRC1_REL(ABSOLUTE),
763de2362d3Smrg                             SRC1_ELEM(ELEM_X),
764de2362d3Smrg                             SRC1_NEG(0),
765de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
766de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
767de2362d3Smrg                             LAST(0));
768de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
769de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
770de2362d3Smrg                                 SRC2_ELEM(ELEM_Z),
771de2362d3Smrg                                 SRC2_NEG(0),
772de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
773de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
774de2362d3Smrg                                 DST_GPR(2),
775de2362d3Smrg                                 DST_REL(ABSOLUTE),
776de2362d3Smrg                                 DST_ELEM(ELEM_Z),
777de2362d3Smrg                                 CLAMP(0));
778de2362d3Smrg    /* r2.w = MAD(0, 0, 1) */
779de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
780de2362d3Smrg                             SRC0_REL(ABSOLUTE),
781de2362d3Smrg                             SRC0_ELEM(ELEM_X),
782de2362d3Smrg                             SRC0_NEG(0),
783de2362d3Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
784de2362d3Smrg                             SRC1_REL(ABSOLUTE),
785de2362d3Smrg                             SRC1_ELEM(ELEM_X),
786de2362d3Smrg                             SRC1_NEG(0),
787de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
788de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
789de2362d3Smrg                             LAST(1));
790de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
791de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
792de2362d3Smrg                                 SRC2_ELEM(ELEM_X),
793de2362d3Smrg                                 SRC2_NEG(0),
794de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
795de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
796de2362d3Smrg                                 DST_GPR(2),
797de2362d3Smrg                                 DST_REL(ABSOLUTE),
798de2362d3Smrg                                 DST_ELEM(ELEM_W),
799de2362d3Smrg                                 CLAMP(0));
800de2362d3Smrg
801de2362d3Smrg    /* 8,9,10,11 */
802de2362d3Smrg    /* r2.x = MAD(c1.x, r1.y, pv.x) */
803de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
804de2362d3Smrg                             SRC0_REL(ABSOLUTE),
805de2362d3Smrg                             SRC0_ELEM(ELEM_X),
806de2362d3Smrg                             SRC0_NEG(0),
807de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
808de2362d3Smrg                             SRC1_REL(ABSOLUTE),
809de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
810de2362d3Smrg                             SRC1_NEG(0),
811de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
812de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
813de2362d3Smrg                             LAST(0));
814de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
815de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
816de2362d3Smrg                                 SRC2_ELEM(ELEM_X),
817de2362d3Smrg                                 SRC2_NEG(0),
818de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
819de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
820de2362d3Smrg                                 DST_GPR(2),
821de2362d3Smrg                                 DST_REL(ABSOLUTE),
822de2362d3Smrg                                 DST_ELEM(ELEM_X),
823de2362d3Smrg                                 CLAMP(0));
824de2362d3Smrg    /* r2.y = MAD(c1.y, r1.y, pv.y) */
825de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
826de2362d3Smrg                             SRC0_REL(ABSOLUTE),
827de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
828de2362d3Smrg                             SRC0_NEG(0),
829de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
830de2362d3Smrg                             SRC1_REL(ABSOLUTE),
831de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
832de2362d3Smrg                             SRC1_NEG(0),
833de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
834de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
835de2362d3Smrg                             LAST(0));
836de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
837de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
838de2362d3Smrg                                 SRC2_ELEM(ELEM_Y),
839de2362d3Smrg                                 SRC2_NEG(0),
840de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
841de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
842de2362d3Smrg                                 DST_GPR(2),
843de2362d3Smrg                                 DST_REL(ABSOLUTE),
844de2362d3Smrg                                 DST_ELEM(ELEM_Y),
845de2362d3Smrg                                 CLAMP(0));
846de2362d3Smrg    /* r2.z = MAD(c1.z, r1.y, pv.z) */
847de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
848de2362d3Smrg                             SRC0_REL(ABSOLUTE),
849de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
850de2362d3Smrg                             SRC0_NEG(0),
851de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
852de2362d3Smrg                             SRC1_REL(ABSOLUTE),
853de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
854de2362d3Smrg                             SRC1_NEG(0),
855de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
856de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
857de2362d3Smrg                             LAST(0));
858de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
859de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
860de2362d3Smrg                                 SRC2_ELEM(ELEM_Z),
861de2362d3Smrg                                 SRC2_NEG(0),
862de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
863de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
864de2362d3Smrg                                 DST_GPR(2),
865de2362d3Smrg                                 DST_REL(ABSOLUTE),
866de2362d3Smrg                                 DST_ELEM(ELEM_Z),
867de2362d3Smrg                                 CLAMP(0));
868de2362d3Smrg    /* r2.w = MAD(0, 0, 1) */
869de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
870de2362d3Smrg                             SRC0_REL(ABSOLUTE),
871de2362d3Smrg                             SRC0_ELEM(ELEM_X),
872de2362d3Smrg                             SRC0_NEG(0),
873de2362d3Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
874de2362d3Smrg                             SRC1_REL(ABSOLUTE),
875de2362d3Smrg                             SRC1_ELEM(ELEM_X),
876de2362d3Smrg                             SRC1_NEG(0),
877de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
878de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
879de2362d3Smrg                             LAST(1));
880de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
881de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
882de2362d3Smrg                                 SRC2_ELEM(ELEM_W),
883de2362d3Smrg                                 SRC2_NEG(0),
884de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
885de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
886de2362d3Smrg                                 DST_GPR(2),
887de2362d3Smrg                                 DST_REL(ABSOLUTE),
888de2362d3Smrg                                 DST_ELEM(ELEM_W),
889de2362d3Smrg                                 CLAMP(0));
890de2362d3Smrg    /* 12,13,14,15 */
891de2362d3Smrg    /* r2.x = MAD(c2.x, r1.z, pv.x) */
892de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
893de2362d3Smrg                             SRC0_REL(ABSOLUTE),
894de2362d3Smrg                             SRC0_ELEM(ELEM_X),
895de2362d3Smrg                             SRC0_NEG(0),
896de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
897de2362d3Smrg                             SRC1_REL(ABSOLUTE),
898de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
899de2362d3Smrg                             SRC1_NEG(0),
900de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
901de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
902de2362d3Smrg                             LAST(0));
903de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
904de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
905de2362d3Smrg                                 SRC2_ELEM(ELEM_X),
906de2362d3Smrg                                 SRC2_NEG(0),
907de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
908de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
909de2362d3Smrg                                 DST_GPR(2),
910de2362d3Smrg                                 DST_REL(ABSOLUTE),
911de2362d3Smrg                                 DST_ELEM(ELEM_X),
912de2362d3Smrg                                 CLAMP(1));
913de2362d3Smrg    /* r2.y = MAD(c2.y, r1.z, pv.y) */
914de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
915de2362d3Smrg                             SRC0_REL(ABSOLUTE),
916de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
917de2362d3Smrg                             SRC0_NEG(0),
918de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
919de2362d3Smrg                             SRC1_REL(ABSOLUTE),
920de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
921de2362d3Smrg                             SRC1_NEG(0),
922de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
923de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
924de2362d3Smrg                             LAST(0));
925de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
926de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
927de2362d3Smrg                                 SRC2_ELEM(ELEM_Y),
928de2362d3Smrg                                 SRC2_NEG(0),
929de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
930de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
931de2362d3Smrg                                 DST_GPR(2),
932de2362d3Smrg                                 DST_REL(ABSOLUTE),
933de2362d3Smrg                                 DST_ELEM(ELEM_Y),
934de2362d3Smrg                                 CLAMP(1));
935de2362d3Smrg    /* r2.z = MAD(c2.z, r1.z, pv.z) */
936de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
937de2362d3Smrg                             SRC0_REL(ABSOLUTE),
938de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
939de2362d3Smrg                             SRC0_NEG(0),
940de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
941de2362d3Smrg                             SRC1_REL(ABSOLUTE),
942de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
943de2362d3Smrg                             SRC1_NEG(0),
944de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
945de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
946de2362d3Smrg                             LAST(0));
947de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
948de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
949de2362d3Smrg                                 SRC2_ELEM(ELEM_Z),
950de2362d3Smrg                                 SRC2_NEG(0),
951de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
952de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
953de2362d3Smrg                                 DST_GPR(2),
954de2362d3Smrg                                 DST_REL(ABSOLUTE),
955de2362d3Smrg                                 DST_ELEM(ELEM_Z),
956de2362d3Smrg                                 CLAMP(1));
957de2362d3Smrg    /* r2.w = MAD(0, 0, 1) */
958de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
959de2362d3Smrg                             SRC0_REL(ABSOLUTE),
960de2362d3Smrg                             SRC0_ELEM(ELEM_X),
961de2362d3Smrg                             SRC0_NEG(0),
962de2362d3Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
963de2362d3Smrg                             SRC1_REL(ABSOLUTE),
964de2362d3Smrg                             SRC1_ELEM(ELEM_X),
965de2362d3Smrg                             SRC1_NEG(0),
966de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
967de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
968de2362d3Smrg                             LAST(1));
969de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
970de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
971de2362d3Smrg                                 SRC2_ELEM(ELEM_X),
972de2362d3Smrg                                 SRC2_NEG(0),
973de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
974de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
975de2362d3Smrg                                 DST_GPR(2),
976de2362d3Smrg                                 DST_REL(ABSOLUTE),
977de2362d3Smrg                                 DST_ELEM(ELEM_W),
978de2362d3Smrg                                 CLAMP(1));
979de2362d3Smrg
980de2362d3Smrg    /* 16 */
981de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(18));
982de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
983de2362d3Smrg                            CF_CONST(0),
984de2362d3Smrg                            COND(SQ_CF_COND_ACTIVE),
985de2362d3Smrg                            I_COUNT(3),
986de2362d3Smrg                            CALL_COUNT(0),
987de2362d3Smrg                            END_OF_PROGRAM(0),
988de2362d3Smrg                            VALID_PIXEL_MODE(0),
989de2362d3Smrg                            CF_INST(SQ_CF_INST_TEX),
990de2362d3Smrg                            WHOLE_QUAD_MODE(0),
991de2362d3Smrg                            BARRIER(1));
992de2362d3Smrg    /* 17 */
993de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0));
994de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
995de2362d3Smrg			    CF_CONST(0),
996de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
997de2362d3Smrg			    I_COUNT(0),
998de2362d3Smrg			    CALL_COUNT(0),
999de2362d3Smrg			    END_OF_PROGRAM(0),
1000de2362d3Smrg			    VALID_PIXEL_MODE(0),
1001de2362d3Smrg			    CF_INST(SQ_CF_INST_RETURN),
1002de2362d3Smrg			    WHOLE_QUAD_MODE(0),
1003de2362d3Smrg			    BARRIER(1));
1004de2362d3Smrg    /* 18/19 */
1005de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1006de2362d3Smrg                             BC_FRAC_MODE(0),
1007de2362d3Smrg                             FETCH_WHOLE_QUAD(0),
1008de2362d3Smrg                             RESOURCE_ID(0),
1009de2362d3Smrg                             SRC_GPR(0),
1010de2362d3Smrg                             SRC_REL(ABSOLUTE),
1011de2362d3Smrg                             R7xx_ALT_CONST(0));
1012de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1013de2362d3Smrg                             DST_REL(ABSOLUTE),
1014de2362d3Smrg                             DST_SEL_X(SQ_SEL_X),
1015de2362d3Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1016de2362d3Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1017de2362d3Smrg                             DST_SEL_W(SQ_SEL_1),
1018de2362d3Smrg                             LOD_BIAS(0),
1019de2362d3Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1020de2362d3Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1021de2362d3Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1022de2362d3Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1023de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1024de2362d3Smrg                             OFFSET_Y(0),
1025de2362d3Smrg                             OFFSET_Z(0),
1026de2362d3Smrg                             SAMPLER_ID(0),
1027de2362d3Smrg                             SRC_SEL_X(SQ_SEL_X),
1028de2362d3Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1029de2362d3Smrg                             SRC_SEL_Z(SQ_SEL_0),
1030de2362d3Smrg                             SRC_SEL_W(SQ_SEL_1));
1031de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
1032de2362d3Smrg    /* 20/21 */
1033de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1034de2362d3Smrg                             BC_FRAC_MODE(0),
1035de2362d3Smrg                             FETCH_WHOLE_QUAD(0),
1036de2362d3Smrg                             RESOURCE_ID(1),
1037de2362d3Smrg                             SRC_GPR(0),
1038de2362d3Smrg                             SRC_REL(ABSOLUTE),
1039de2362d3Smrg                             R7xx_ALT_CONST(0));
1040de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1041de2362d3Smrg                             DST_REL(ABSOLUTE),
1042de2362d3Smrg                             DST_SEL_X(SQ_SEL_MASK),
1043de2362d3Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1044de2362d3Smrg                             DST_SEL_Z(SQ_SEL_X),
1045de2362d3Smrg                             DST_SEL_W(SQ_SEL_MASK),
1046de2362d3Smrg                             LOD_BIAS(0),
1047de2362d3Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1048de2362d3Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1049de2362d3Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1050de2362d3Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1051de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1052de2362d3Smrg                             OFFSET_Y(0),
1053de2362d3Smrg                             OFFSET_Z(0),
1054de2362d3Smrg                             SAMPLER_ID(1),
1055de2362d3Smrg                             SRC_SEL_X(SQ_SEL_X),
1056de2362d3Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1057de2362d3Smrg                             SRC_SEL_Z(SQ_SEL_0),
1058de2362d3Smrg                             SRC_SEL_W(SQ_SEL_1));
1059de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
1060de2362d3Smrg    /* 22/23 */
1061de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1062de2362d3Smrg                             BC_FRAC_MODE(0),
1063de2362d3Smrg                             FETCH_WHOLE_QUAD(0),
1064de2362d3Smrg                             RESOURCE_ID(2),
1065de2362d3Smrg                             SRC_GPR(0),
1066de2362d3Smrg                             SRC_REL(ABSOLUTE),
1067de2362d3Smrg                             R7xx_ALT_CONST(0));
1068de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1069de2362d3Smrg                             DST_REL(ABSOLUTE),
1070de2362d3Smrg                             DST_SEL_X(SQ_SEL_MASK),
1071de2362d3Smrg                             DST_SEL_Y(SQ_SEL_X),
1072de2362d3Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1073de2362d3Smrg                             DST_SEL_W(SQ_SEL_MASK),
1074de2362d3Smrg                             LOD_BIAS(0),
1075de2362d3Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1076de2362d3Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1077de2362d3Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1078de2362d3Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1079de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1080de2362d3Smrg                             OFFSET_Y(0),
1081de2362d3Smrg                             OFFSET_Z(0),
1082de2362d3Smrg                             SAMPLER_ID(2),
1083de2362d3Smrg                             SRC_SEL_X(SQ_SEL_X),
1084de2362d3Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1085de2362d3Smrg                             SRC_SEL_Z(SQ_SEL_0),
1086de2362d3Smrg                             SRC_SEL_W(SQ_SEL_1));
1087de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
1088de2362d3Smrg    /* 24 */
1089de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(26));
1090de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1091de2362d3Smrg                            CF_CONST(0),
1092de2362d3Smrg                            COND(SQ_CF_COND_ACTIVE),
1093de2362d3Smrg                            I_COUNT(1),
1094de2362d3Smrg                            CALL_COUNT(0),
1095de2362d3Smrg                            END_OF_PROGRAM(0),
1096de2362d3Smrg                            VALID_PIXEL_MODE(0),
1097de2362d3Smrg                            CF_INST(SQ_CF_INST_TEX),
1098de2362d3Smrg                            WHOLE_QUAD_MODE(0),
1099de2362d3Smrg                            BARRIER(1));
1100de2362d3Smrg    /* 25 */
1101de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1102de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1103de2362d3Smrg			    CF_CONST(0),
1104de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
1105de2362d3Smrg			    I_COUNT(0),
1106de2362d3Smrg			    CALL_COUNT(0),
1107de2362d3Smrg			    END_OF_PROGRAM(0),
1108de2362d3Smrg			    VALID_PIXEL_MODE(0),
1109de2362d3Smrg			    CF_INST(SQ_CF_INST_RETURN),
1110de2362d3Smrg			    WHOLE_QUAD_MODE(0),
1111de2362d3Smrg			    BARRIER(1));
1112de2362d3Smrg    /* 26/27 */
1113de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1114de2362d3Smrg                             BC_FRAC_MODE(0),
1115de2362d3Smrg                             FETCH_WHOLE_QUAD(0),
1116de2362d3Smrg                             RESOURCE_ID(0),
1117de2362d3Smrg                             SRC_GPR(0),
1118de2362d3Smrg                             SRC_REL(ABSOLUTE),
1119de2362d3Smrg                             R7xx_ALT_CONST(0));
1120de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1121de2362d3Smrg                             DST_REL(ABSOLUTE),
1122de2362d3Smrg                             DST_SEL_X(SQ_SEL_X),
1123de2362d3Smrg                             DST_SEL_Y(SQ_SEL_Y),
1124de2362d3Smrg                             DST_SEL_Z(SQ_SEL_Z),
1125de2362d3Smrg                             DST_SEL_W(SQ_SEL_1),
1126de2362d3Smrg                             LOD_BIAS(0),
1127de2362d3Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1128de2362d3Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1129de2362d3Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1130de2362d3Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1131de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1132de2362d3Smrg                             OFFSET_Y(0),
1133de2362d3Smrg                             OFFSET_Z(0),
1134de2362d3Smrg                             SAMPLER_ID(0),
1135de2362d3Smrg                             SRC_SEL_X(SQ_SEL_X),
1136de2362d3Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1137de2362d3Smrg                             SRC_SEL_Z(SQ_SEL_0),
1138de2362d3Smrg                             SRC_SEL_W(SQ_SEL_1));
1139de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
1140de2362d3Smrg
1141de2362d3Smrg    return i;
1142de2362d3Smrg}
1143de2362d3Smrg
1144de2362d3Smrg/* comp vs --------------------------------------- */
1145de2362d3Smrgint R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1146de2362d3Smrg{
1147de2362d3Smrg    int i = 0;
1148de2362d3Smrg
1149de2362d3Smrg    /* 0 */
1150de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(3));
1151de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1152de2362d3Smrg                            CF_CONST(0),
1153de2362d3Smrg                            COND(SQ_CF_COND_BOOL),
1154de2362d3Smrg                            I_COUNT(0),
1155de2362d3Smrg                            CALL_COUNT(0),
1156de2362d3Smrg                            END_OF_PROGRAM(0),
1157de2362d3Smrg                            VALID_PIXEL_MODE(0),
1158de2362d3Smrg                            CF_INST(SQ_CF_INST_CALL),
1159de2362d3Smrg                            WHOLE_QUAD_MODE(0),
1160de2362d3Smrg                            BARRIER(0));
1161de2362d3Smrg    /* 1 */
1162de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(9));
1163de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1164de2362d3Smrg                            CF_CONST(0),
1165de2362d3Smrg                            COND(SQ_CF_COND_NOT_BOOL),
1166de2362d3Smrg                            I_COUNT(0),
1167de2362d3Smrg                            CALL_COUNT(0),
1168de2362d3Smrg                            END_OF_PROGRAM(0),
1169de2362d3Smrg                            VALID_PIXEL_MODE(0),
1170de2362d3Smrg                            CF_INST(SQ_CF_INST_CALL),
1171de2362d3Smrg                            WHOLE_QUAD_MODE(0),
1172de2362d3Smrg                            BARRIER(0));
1173de2362d3Smrg    /* 2 */
1174de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1175de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1176de2362d3Smrg                            CF_CONST(0),
1177de2362d3Smrg                            COND(SQ_CF_COND_ACTIVE),
1178de2362d3Smrg                            I_COUNT(0),
1179de2362d3Smrg                            CALL_COUNT(0),
1180de2362d3Smrg                            END_OF_PROGRAM(1),
1181de2362d3Smrg                            VALID_PIXEL_MODE(0),
1182de2362d3Smrg                            CF_INST(SQ_CF_INST_NOP),
1183de2362d3Smrg                            WHOLE_QUAD_MODE(0),
1184de2362d3Smrg                            BARRIER(1));
1185de2362d3Smrg    /* 3 - mask sub */
1186de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(44));
1187de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1188de2362d3Smrg			    CF_CONST(0),
1189de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
1190de2362d3Smrg			    I_COUNT(3),
1191de2362d3Smrg			    CALL_COUNT(0),
1192de2362d3Smrg			    END_OF_PROGRAM(0),
1193de2362d3Smrg			    VALID_PIXEL_MODE(0),
1194de2362d3Smrg			    CF_INST(SQ_CF_INST_VTX),
1195de2362d3Smrg			    WHOLE_QUAD_MODE(0),
1196de2362d3Smrg			    BARRIER(1));
1197de2362d3Smrg
1198de2362d3Smrg    /* 4 - ALU */
1199de2362d3Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1200de2362d3Smrg				KCACHE_BANK0(0),
1201de2362d3Smrg				KCACHE_BANK1(0),
1202de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1203de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1204de2362d3Smrg				KCACHE_ADDR0(0),
1205de2362d3Smrg				KCACHE_ADDR1(0),
1206de2362d3Smrg				I_COUNT(20),
1207de2362d3Smrg				USES_WATERFALL(0),
1208de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
1209de2362d3Smrg				WHOLE_QUAD_MODE(0),
1210de2362d3Smrg				BARRIER(1));
1211de2362d3Smrg
1212de2362d3Smrg    /* 5 - dst */
1213de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1214de2362d3Smrg					  TYPE(SQ_EXPORT_POS),
1215de2362d3Smrg					  RW_GPR(2),
1216de2362d3Smrg					  RW_REL(ABSOLUTE),
1217de2362d3Smrg					  INDEX_GPR(0),
1218de2362d3Smrg					  ELEM_SIZE(0));
1219de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1220de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1221de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_0),
1222de2362d3Smrg					       SRC_SEL_W(SQ_SEL_1),
1223de2362d3Smrg					       R6xx_ELEM_LOOP(0),
1224de2362d3Smrg					       BURST_COUNT(1),
1225de2362d3Smrg					       END_OF_PROGRAM(0),
1226de2362d3Smrg					       VALID_PIXEL_MODE(0),
1227de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1228de2362d3Smrg					       WHOLE_QUAD_MODE(0),
1229de2362d3Smrg					       BARRIER(1));
1230de2362d3Smrg    /* 6 - src */
1231de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1232de2362d3Smrg					  TYPE(SQ_EXPORT_PARAM),
1233de2362d3Smrg					  RW_GPR(1),
1234de2362d3Smrg					  RW_REL(ABSOLUTE),
1235de2362d3Smrg					  INDEX_GPR(0),
1236de2362d3Smrg					  ELEM_SIZE(0));
1237de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1238de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1239de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_0),
1240de2362d3Smrg					       SRC_SEL_W(SQ_SEL_1),
1241de2362d3Smrg					       R6xx_ELEM_LOOP(0),
1242de2362d3Smrg					       BURST_COUNT(1),
1243de2362d3Smrg					       END_OF_PROGRAM(0),
1244de2362d3Smrg					       VALID_PIXEL_MODE(0),
1245de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT),
1246de2362d3Smrg					       WHOLE_QUAD_MODE(0),
1247de2362d3Smrg					       BARRIER(0));
1248de2362d3Smrg    /* 7 - mask */
1249de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1250de2362d3Smrg					  TYPE(SQ_EXPORT_PARAM),
1251de2362d3Smrg					  RW_GPR(0),
1252de2362d3Smrg					  RW_REL(ABSOLUTE),
1253de2362d3Smrg					  INDEX_GPR(0),
1254de2362d3Smrg					  ELEM_SIZE(0));
1255de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1256de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1257de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_0),
1258de2362d3Smrg					       SRC_SEL_W(SQ_SEL_1),
1259de2362d3Smrg					       R6xx_ELEM_LOOP(0),
1260de2362d3Smrg					       BURST_COUNT(1),
1261de2362d3Smrg					       END_OF_PROGRAM(0),
1262de2362d3Smrg					       VALID_PIXEL_MODE(0),
1263de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1264de2362d3Smrg					       WHOLE_QUAD_MODE(0),
1265de2362d3Smrg					       BARRIER(0));
1266de2362d3Smrg    /* 8 */
1267de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1268de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1269de2362d3Smrg			    CF_CONST(0),
1270de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
1271de2362d3Smrg			    I_COUNT(0),
1272de2362d3Smrg			    CALL_COUNT(0),
1273de2362d3Smrg			    END_OF_PROGRAM(0),
1274de2362d3Smrg			    VALID_PIXEL_MODE(0),
1275de2362d3Smrg			    CF_INST(SQ_CF_INST_RETURN),
1276de2362d3Smrg			    WHOLE_QUAD_MODE(0),
1277de2362d3Smrg			    BARRIER(1));
1278de2362d3Smrg    /* 9 - non-mask sub */
1279de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(50));
1280de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1281de2362d3Smrg			    CF_CONST(0),
1282de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
1283de2362d3Smrg			    I_COUNT(2),
1284de2362d3Smrg			    CALL_COUNT(0),
1285de2362d3Smrg			    END_OF_PROGRAM(0),
1286de2362d3Smrg			    VALID_PIXEL_MODE(0),
1287de2362d3Smrg			    CF_INST(SQ_CF_INST_VTX),
1288de2362d3Smrg			    WHOLE_QUAD_MODE(0),
1289de2362d3Smrg			    BARRIER(1));
1290de2362d3Smrg
1291de2362d3Smrg    /* 10 - ALU */
1292de2362d3Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(34),
1293de2362d3Smrg				KCACHE_BANK0(0),
1294de2362d3Smrg				KCACHE_BANK1(0),
1295de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1296de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1297de2362d3Smrg				KCACHE_ADDR0(0),
1298de2362d3Smrg				KCACHE_ADDR1(0),
1299de2362d3Smrg				I_COUNT(10),
1300de2362d3Smrg				USES_WATERFALL(0),
1301de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
1302de2362d3Smrg				WHOLE_QUAD_MODE(0),
1303de2362d3Smrg				BARRIER(1));
1304de2362d3Smrg
1305de2362d3Smrg    /* 11 - dst */
1306de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1307de2362d3Smrg					  TYPE(SQ_EXPORT_POS),
1308de2362d3Smrg					  RW_GPR(1),
1309de2362d3Smrg					  RW_REL(ABSOLUTE),
1310de2362d3Smrg					  INDEX_GPR(0),
1311de2362d3Smrg					  ELEM_SIZE(0));
1312de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1313de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1314de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_0),
1315de2362d3Smrg					       SRC_SEL_W(SQ_SEL_1),
1316de2362d3Smrg					       R6xx_ELEM_LOOP(0),
1317de2362d3Smrg					       BURST_COUNT(0),
1318de2362d3Smrg					       END_OF_PROGRAM(0),
1319de2362d3Smrg					       VALID_PIXEL_MODE(0),
1320de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1321de2362d3Smrg					       WHOLE_QUAD_MODE(0),
1322de2362d3Smrg					       BARRIER(1));
1323de2362d3Smrg    /* 12 - src */
1324de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1325de2362d3Smrg					  TYPE(SQ_EXPORT_PARAM),
1326de2362d3Smrg					  RW_GPR(0),
1327de2362d3Smrg					  RW_REL(ABSOLUTE),
1328de2362d3Smrg					  INDEX_GPR(0),
1329de2362d3Smrg					  ELEM_SIZE(0));
1330de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1331de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1332de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_0),
1333de2362d3Smrg					       SRC_SEL_W(SQ_SEL_1),
1334de2362d3Smrg					       R6xx_ELEM_LOOP(0),
1335de2362d3Smrg					       BURST_COUNT(0),
1336de2362d3Smrg					       END_OF_PROGRAM(0),
1337de2362d3Smrg					       VALID_PIXEL_MODE(0),
1338de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1339de2362d3Smrg					       WHOLE_QUAD_MODE(0),
1340de2362d3Smrg					       BARRIER(0));
1341de2362d3Smrg    /* 13 */
1342de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1343de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1344de2362d3Smrg			    CF_CONST(0),
1345de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
1346de2362d3Smrg			    I_COUNT(0),
1347de2362d3Smrg			    CALL_COUNT(0),
1348de2362d3Smrg			    END_OF_PROGRAM(0),
1349de2362d3Smrg			    VALID_PIXEL_MODE(0),
1350de2362d3Smrg			    CF_INST(SQ_CF_INST_RETURN),
1351de2362d3Smrg			    WHOLE_QUAD_MODE(0),
1352de2362d3Smrg			    BARRIER(1));
1353de2362d3Smrg
1354de2362d3Smrg
1355de2362d3Smrg    /* 14 srcX.x DOT4 - mask */
1356de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1357de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1358de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1359de2362d3Smrg                             SRC0_NEG(0),
1360de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1361de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1362de2362d3Smrg                             SRC1_ELEM(ELEM_X),
1363de2362d3Smrg                             SRC1_NEG(0),
1364de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1365de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1366de2362d3Smrg                             LAST(0));
1367de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1368de2362d3Smrg                                 SRC0_ABS(0),
1369de2362d3Smrg                                 SRC1_ABS(0),
1370de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1371de2362d3Smrg                                 UPDATE_PRED(0),
1372de2362d3Smrg                                 WRITE_MASK(1),
1373de2362d3Smrg                                 FOG_MERGE(0),
1374de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1375de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1376de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1377de2362d3Smrg                                 DST_GPR(3),
1378de2362d3Smrg                                 DST_REL(ABSOLUTE),
1379de2362d3Smrg                                 DST_ELEM(ELEM_X),
1380de2362d3Smrg                                 CLAMP(0));
1381de2362d3Smrg
1382de2362d3Smrg    /* 15 srcX.y DOT4 - mask */
1383de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1384de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1385de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1386de2362d3Smrg                             SRC0_NEG(0),
1387de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1388de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1389de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
1390de2362d3Smrg                             SRC1_NEG(0),
1391de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1392de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1393de2362d3Smrg                             LAST(0));
1394de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1395de2362d3Smrg                                 SRC0_ABS(0),
1396de2362d3Smrg                                 SRC1_ABS(0),
1397de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1398de2362d3Smrg                                 UPDATE_PRED(0),
1399de2362d3Smrg                                 WRITE_MASK(0),
1400de2362d3Smrg                                 FOG_MERGE(0),
1401de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1402de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1403de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1404de2362d3Smrg                                 DST_GPR(3),
1405de2362d3Smrg                                 DST_REL(ABSOLUTE),
1406de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1407de2362d3Smrg                                 CLAMP(0));
1408de2362d3Smrg
1409de2362d3Smrg    /* 16 srcX.z DOT4 - mask */
1410de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1411de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1412de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
1413de2362d3Smrg                             SRC0_NEG(0),
1414de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1415de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1416de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
1417de2362d3Smrg                             SRC1_NEG(0),
1418de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1419de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1420de2362d3Smrg                             LAST(0));
1421de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1422de2362d3Smrg                                 SRC0_ABS(0),
1423de2362d3Smrg                                 SRC1_ABS(0),
1424de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1425de2362d3Smrg                                 UPDATE_PRED(0),
1426de2362d3Smrg                                 WRITE_MASK(0),
1427de2362d3Smrg                                 FOG_MERGE(0),
1428de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1429de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1430de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1431de2362d3Smrg                                 DST_GPR(3),
1432de2362d3Smrg                                 DST_REL(ABSOLUTE),
1433de2362d3Smrg                                 DST_ELEM(ELEM_Z),
1434de2362d3Smrg                                 CLAMP(0));
1435de2362d3Smrg
1436de2362d3Smrg    /* 17 srcX.w DOT4 - mask */
1437de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1438de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1439de2362d3Smrg                             SRC0_ELEM(ELEM_W),
1440de2362d3Smrg                             SRC0_NEG(0),
1441de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1442de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1443de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1444de2362d3Smrg                             SRC1_NEG(0),
1445de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1446de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1447de2362d3Smrg                             LAST(1));
1448de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1449de2362d3Smrg                                 SRC0_ABS(0),
1450de2362d3Smrg                                 SRC1_ABS(0),
1451de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1452de2362d3Smrg                                 UPDATE_PRED(0),
1453de2362d3Smrg                                 WRITE_MASK(0),
1454de2362d3Smrg                                 FOG_MERGE(0),
1455de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1456de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1457de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1458de2362d3Smrg                                 DST_GPR(3),
1459de2362d3Smrg                                 DST_REL(ABSOLUTE),
1460de2362d3Smrg                                 DST_ELEM(ELEM_W),
1461de2362d3Smrg                                 CLAMP(0));
1462de2362d3Smrg
1463de2362d3Smrg    /* 18 srcY.x DOT4 - mask */
1464de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1465de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1466de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1467de2362d3Smrg                             SRC0_NEG(0),
1468de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1469de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1470de2362d3Smrg                             SRC1_ELEM(ELEM_X),
1471de2362d3Smrg                             SRC1_NEG(0),
1472de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1473de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1474de2362d3Smrg                             LAST(0));
1475de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1476de2362d3Smrg                                 SRC0_ABS(0),
1477de2362d3Smrg                                 SRC1_ABS(0),
1478de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1479de2362d3Smrg                                 UPDATE_PRED(0),
1480de2362d3Smrg                                 WRITE_MASK(0),
1481de2362d3Smrg                                 FOG_MERGE(0),
1482de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1483de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1484de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1485de2362d3Smrg                                 DST_GPR(3),
1486de2362d3Smrg                                 DST_REL(ABSOLUTE),
1487de2362d3Smrg                                 DST_ELEM(ELEM_X),
1488de2362d3Smrg                                 CLAMP(0));
1489de2362d3Smrg
1490de2362d3Smrg    /* 19 srcY.y DOT4 - mask */
1491de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1492de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1493de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1494de2362d3Smrg                             SRC0_NEG(0),
1495de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1496de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1497de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
1498de2362d3Smrg                             SRC1_NEG(0),
1499de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1500de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1501de2362d3Smrg                             LAST(0));
1502de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1503de2362d3Smrg                                 SRC0_ABS(0),
1504de2362d3Smrg                                 SRC1_ABS(0),
1505de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1506de2362d3Smrg                                 UPDATE_PRED(0),
1507de2362d3Smrg                                 WRITE_MASK(1),
1508de2362d3Smrg                                 FOG_MERGE(0),
1509de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1510de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1511de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1512de2362d3Smrg                                 DST_GPR(3),
1513de2362d3Smrg                                 DST_REL(ABSOLUTE),
1514de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1515de2362d3Smrg                                 CLAMP(0));
1516de2362d3Smrg
1517de2362d3Smrg    /* 20 srcY.z DOT4 - mask */
1518de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1519de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1520de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
1521de2362d3Smrg                             SRC0_NEG(0),
1522de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1523de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1524de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
1525de2362d3Smrg                             SRC1_NEG(0),
1526de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1527de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1528de2362d3Smrg                             LAST(0));
1529de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1530de2362d3Smrg                                 SRC0_ABS(0),
1531de2362d3Smrg                                 SRC1_ABS(0),
1532de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1533de2362d3Smrg                                 UPDATE_PRED(0),
1534de2362d3Smrg                                 WRITE_MASK(0),
1535de2362d3Smrg                                 FOG_MERGE(0),
1536de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1537de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1538de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1539de2362d3Smrg                                 DST_GPR(3),
1540de2362d3Smrg                                 DST_REL(ABSOLUTE),
1541de2362d3Smrg                                 DST_ELEM(ELEM_Z),
1542de2362d3Smrg                                 CLAMP(0));
1543de2362d3Smrg
1544de2362d3Smrg    /* 21 srcY.w DOT4 - mask */
1545de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1546de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1547de2362d3Smrg                             SRC0_ELEM(ELEM_W),
1548de2362d3Smrg                             SRC0_NEG(0),
1549de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1550de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1551de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1552de2362d3Smrg                             SRC1_NEG(0),
1553de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1554de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1555de2362d3Smrg                             LAST(1));
1556de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1557de2362d3Smrg                                 SRC0_ABS(0),
1558de2362d3Smrg                                 SRC1_ABS(0),
1559de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1560de2362d3Smrg                                 UPDATE_PRED(0),
1561de2362d3Smrg                                 WRITE_MASK(0),
1562de2362d3Smrg                                 FOG_MERGE(0),
1563de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1564de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1565de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1566de2362d3Smrg                                 DST_GPR(3),
1567de2362d3Smrg                                 DST_REL(ABSOLUTE),
1568de2362d3Smrg                                 DST_ELEM(ELEM_W),
1569de2362d3Smrg                                 CLAMP(0));
1570de2362d3Smrg
1571de2362d3Smrg    /* 22 maskX.x DOT4 - mask */
1572de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1573de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1574de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1575de2362d3Smrg                             SRC0_NEG(0),
1576de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1577de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1578de2362d3Smrg                             SRC1_ELEM(ELEM_X),
1579de2362d3Smrg                             SRC1_NEG(0),
1580de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1581de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1582de2362d3Smrg                             LAST(0));
1583de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1584de2362d3Smrg                                 SRC0_ABS(0),
1585de2362d3Smrg                                 SRC1_ABS(0),
1586de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1587de2362d3Smrg                                 UPDATE_PRED(0),
1588de2362d3Smrg                                 WRITE_MASK(1),
1589de2362d3Smrg                                 FOG_MERGE(0),
1590de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1591de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1592de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1593de2362d3Smrg                                 DST_GPR(4),
1594de2362d3Smrg                                 DST_REL(ABSOLUTE),
1595de2362d3Smrg                                 DST_ELEM(ELEM_X),
1596de2362d3Smrg                                 CLAMP(0));
1597de2362d3Smrg
1598de2362d3Smrg    /* 23 maskX.y DOT4 - mask */
1599de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1600de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1601de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1602de2362d3Smrg                             SRC0_NEG(0),
1603de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1604de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1605de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
1606de2362d3Smrg                             SRC1_NEG(0),
1607de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1608de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1609de2362d3Smrg                             LAST(0));
1610de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1611de2362d3Smrg                                 SRC0_ABS(0),
1612de2362d3Smrg                                 SRC1_ABS(0),
1613de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1614de2362d3Smrg                                 UPDATE_PRED(0),
1615de2362d3Smrg                                 WRITE_MASK(0),
1616de2362d3Smrg                                 FOG_MERGE(0),
1617de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1618de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1619de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1620de2362d3Smrg                                 DST_GPR(4),
1621de2362d3Smrg                                 DST_REL(ABSOLUTE),
1622de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1623de2362d3Smrg                                 CLAMP(0));
1624de2362d3Smrg
1625de2362d3Smrg    /* 24 maskX.z DOT4 - mask */
1626de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1627de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1628de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
1629de2362d3Smrg                             SRC0_NEG(0),
1630de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1631de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1632de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
1633de2362d3Smrg                             SRC1_NEG(0),
1634de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1635de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1636de2362d3Smrg                             LAST(0));
1637de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1638de2362d3Smrg                                 SRC0_ABS(0),
1639de2362d3Smrg                                 SRC1_ABS(0),
1640de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1641de2362d3Smrg                                 UPDATE_PRED(0),
1642de2362d3Smrg                                 WRITE_MASK(0),
1643de2362d3Smrg                                 FOG_MERGE(0),
1644de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1645de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1646de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1647de2362d3Smrg                                 DST_GPR(4),
1648de2362d3Smrg                                 DST_REL(ABSOLUTE),
1649de2362d3Smrg                                 DST_ELEM(ELEM_Z),
1650de2362d3Smrg                                 CLAMP(0));
1651de2362d3Smrg
1652de2362d3Smrg    /* 25 maskX.w DOT4 - mask */
1653de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1654de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1655de2362d3Smrg                             SRC0_ELEM(ELEM_W),
1656de2362d3Smrg                             SRC0_NEG(0),
1657de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1658de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1659de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1660de2362d3Smrg                             SRC1_NEG(0),
1661de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1662de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1663de2362d3Smrg                             LAST(1));
1664de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1665de2362d3Smrg                                 SRC0_ABS(0),
1666de2362d3Smrg                                 SRC1_ABS(0),
1667de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1668de2362d3Smrg                                 UPDATE_PRED(0),
1669de2362d3Smrg                                 WRITE_MASK(0),
1670de2362d3Smrg                                 FOG_MERGE(0),
1671de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1672de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1673de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1674de2362d3Smrg                                 DST_GPR(4),
1675de2362d3Smrg                                 DST_REL(ABSOLUTE),
1676de2362d3Smrg                                 DST_ELEM(ELEM_W),
1677de2362d3Smrg                                 CLAMP(0));
1678de2362d3Smrg
1679de2362d3Smrg    /* 26 maskY.x DOT4 - mask */
1680de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1681de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1682de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1683de2362d3Smrg                             SRC0_NEG(0),
1684de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1685de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1686de2362d3Smrg                             SRC1_ELEM(ELEM_X),
1687de2362d3Smrg                             SRC1_NEG(0),
1688de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1689de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1690de2362d3Smrg                             LAST(0));
1691de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1692de2362d3Smrg                                 SRC0_ABS(0),
1693de2362d3Smrg                                 SRC1_ABS(0),
1694de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1695de2362d3Smrg                                 UPDATE_PRED(0),
1696de2362d3Smrg                                 WRITE_MASK(0),
1697de2362d3Smrg                                 FOG_MERGE(0),
1698de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1699de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1700de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1701de2362d3Smrg                                 DST_GPR(4),
1702de2362d3Smrg                                 DST_REL(ABSOLUTE),
1703de2362d3Smrg                                 DST_ELEM(ELEM_X),
1704de2362d3Smrg                                 CLAMP(0));
1705de2362d3Smrg
1706de2362d3Smrg    /* 27 maskY.y DOT4 - mask */
1707de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1708de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1709de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1710de2362d3Smrg                             SRC0_NEG(0),
1711de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1712de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1713de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
1714de2362d3Smrg                             SRC1_NEG(0),
1715de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1716de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1717de2362d3Smrg                             LAST(0));
1718de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1719de2362d3Smrg                                 SRC0_ABS(0),
1720de2362d3Smrg                                 SRC1_ABS(0),
1721de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1722de2362d3Smrg                                 UPDATE_PRED(0),
1723de2362d3Smrg                                 WRITE_MASK(1),
1724de2362d3Smrg                                 FOG_MERGE(0),
1725de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1726de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1727de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1728de2362d3Smrg                                 DST_GPR(4),
1729de2362d3Smrg                                 DST_REL(ABSOLUTE),
1730de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1731de2362d3Smrg                                 CLAMP(0));
1732de2362d3Smrg
1733de2362d3Smrg    /* 28 maskY.z DOT4 - mask */
1734de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1735de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1736de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
1737de2362d3Smrg                             SRC0_NEG(0),
1738de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1739de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1740de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
1741de2362d3Smrg                             SRC1_NEG(0),
1742de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1743de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1744de2362d3Smrg                             LAST(0));
1745de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1746de2362d3Smrg                                 SRC0_ABS(0),
1747de2362d3Smrg                                 SRC1_ABS(0),
1748de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1749de2362d3Smrg                                 UPDATE_PRED(0),
1750de2362d3Smrg                                 WRITE_MASK(0),
1751de2362d3Smrg                                 FOG_MERGE(0),
1752de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1753de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1754de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1755de2362d3Smrg                                 DST_GPR(4),
1756de2362d3Smrg                                 DST_REL(ABSOLUTE),
1757de2362d3Smrg                                 DST_ELEM(ELEM_Z),
1758de2362d3Smrg                                 CLAMP(0));
1759de2362d3Smrg
1760de2362d3Smrg    /* 29 maskY.w DOT4 - mask */
1761de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1762de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1763de2362d3Smrg                             SRC0_ELEM(ELEM_W),
1764de2362d3Smrg                             SRC0_NEG(0),
1765de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1766de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1767de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1768de2362d3Smrg                             SRC1_NEG(0),
1769de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1770de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1771de2362d3Smrg                             LAST(1));
1772de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1773de2362d3Smrg                                 SRC0_ABS(0),
1774de2362d3Smrg                                 SRC1_ABS(0),
1775de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1776de2362d3Smrg                                 UPDATE_PRED(0),
1777de2362d3Smrg                                 WRITE_MASK(0),
1778de2362d3Smrg                                 FOG_MERGE(0),
1779de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1780de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1781de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1782de2362d3Smrg                                 DST_GPR(4),
1783de2362d3Smrg                                 DST_REL(ABSOLUTE),
1784de2362d3Smrg                                 DST_ELEM(ELEM_W),
1785de2362d3Smrg                                 CLAMP(0));
1786de2362d3Smrg
1787de2362d3Smrg    /* 30 srcX / w */
1788de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1789de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1790de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1791de2362d3Smrg                             SRC0_NEG(0),
1792de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1793de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1794de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1795de2362d3Smrg                             SRC1_NEG(0),
1796de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
1797de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1798de2362d3Smrg                             LAST(1));
1799de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1800de2362d3Smrg                                 SRC0_ABS(0),
1801de2362d3Smrg                                 SRC1_ABS(0),
1802de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1803de2362d3Smrg                                 UPDATE_PRED(0),
1804de2362d3Smrg                                 WRITE_MASK(1),
1805de2362d3Smrg                                 FOG_MERGE(0),
1806de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1807de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
1808de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1809de2362d3Smrg                                 DST_GPR(1),
1810de2362d3Smrg                                 DST_REL(ABSOLUTE),
1811de2362d3Smrg                                 DST_ELEM(ELEM_X),
1812de2362d3Smrg                                 CLAMP(0));
1813de2362d3Smrg
1814de2362d3Smrg    /* 31 srcY / h */
1815de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1816de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1817de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1818de2362d3Smrg                             SRC0_NEG(0),
1819de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
1820de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1821de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1822de2362d3Smrg                             SRC1_NEG(0),
1823de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
1824de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1825de2362d3Smrg                             LAST(1));
1826de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1827de2362d3Smrg                                 SRC0_ABS(0),
1828de2362d3Smrg                                 SRC1_ABS(0),
1829de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1830de2362d3Smrg                                 UPDATE_PRED(0),
1831de2362d3Smrg                                 WRITE_MASK(1),
1832de2362d3Smrg                                 FOG_MERGE(0),
1833de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1834de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
1835de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1836de2362d3Smrg                                 DST_GPR(1),
1837de2362d3Smrg                                 DST_REL(ABSOLUTE),
1838de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1839de2362d3Smrg                                 CLAMP(0));
1840de2362d3Smrg
1841de2362d3Smrg    /* 32 maskX / w */
1842de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1843de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1844de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1845de2362d3Smrg                             SRC0_NEG(0),
1846de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
1847de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1848de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1849de2362d3Smrg                             SRC1_NEG(0),
1850de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
1851de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1852de2362d3Smrg                             LAST(1));
1853de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1854de2362d3Smrg                                 SRC0_ABS(0),
1855de2362d3Smrg                                 SRC1_ABS(0),
1856de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1857de2362d3Smrg                                 UPDATE_PRED(0),
1858de2362d3Smrg                                 WRITE_MASK(1),
1859de2362d3Smrg                                 FOG_MERGE(0),
1860de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1861de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
1862de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1863de2362d3Smrg                                 DST_GPR(0),
1864de2362d3Smrg                                 DST_REL(ABSOLUTE),
1865de2362d3Smrg                                 DST_ELEM(ELEM_X),
1866de2362d3Smrg                                 CLAMP(0));
1867de2362d3Smrg
1868de2362d3Smrg    /* 33 maskY / h */
1869de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
1870de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1871de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1872de2362d3Smrg                             SRC0_NEG(0),
1873de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
1874de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1875de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1876de2362d3Smrg                             SRC1_NEG(0),
1877de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
1878de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1879de2362d3Smrg                             LAST(1));
1880de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1881de2362d3Smrg                                 SRC0_ABS(0),
1882de2362d3Smrg                                 SRC1_ABS(0),
1883de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1884de2362d3Smrg                                 UPDATE_PRED(0),
1885de2362d3Smrg                                 WRITE_MASK(1),
1886de2362d3Smrg                                 FOG_MERGE(0),
1887de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1888de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
1889de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1890de2362d3Smrg                                 DST_GPR(0),
1891de2362d3Smrg                                 DST_REL(ABSOLUTE),
1892de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1893de2362d3Smrg                                 CLAMP(0));
1894de2362d3Smrg
1895de2362d3Smrg    /* 34 srcX.x DOT4 - non-mask */
1896de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1897de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1898de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1899de2362d3Smrg                             SRC0_NEG(0),
1900de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1901de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1902de2362d3Smrg                             SRC1_ELEM(ELEM_X),
1903de2362d3Smrg                             SRC1_NEG(0),
1904de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1905de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1906de2362d3Smrg                             LAST(0));
1907de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1908de2362d3Smrg                                 SRC0_ABS(0),
1909de2362d3Smrg                                 SRC1_ABS(0),
1910de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1911de2362d3Smrg                                 UPDATE_PRED(0),
1912de2362d3Smrg                                 WRITE_MASK(1),
1913de2362d3Smrg                                 FOG_MERGE(0),
1914de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1915de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1916de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1917de2362d3Smrg                                 DST_GPR(2),
1918de2362d3Smrg                                 DST_REL(ABSOLUTE),
1919de2362d3Smrg                                 DST_ELEM(ELEM_X),
1920de2362d3Smrg                                 CLAMP(0));
1921de2362d3Smrg
1922de2362d3Smrg    /* 35 srcX.y DOT4 - non-mask */
1923de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1924de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1925de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1926de2362d3Smrg                             SRC0_NEG(0),
1927de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1928de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1929de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
1930de2362d3Smrg                             SRC1_NEG(0),
1931de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1932de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1933de2362d3Smrg                             LAST(0));
1934de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1935de2362d3Smrg                                 SRC0_ABS(0),
1936de2362d3Smrg                                 SRC1_ABS(0),
1937de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1938de2362d3Smrg                                 UPDATE_PRED(0),
1939de2362d3Smrg                                 WRITE_MASK(0),
1940de2362d3Smrg                                 FOG_MERGE(0),
1941de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1942de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1943de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1944de2362d3Smrg                                 DST_GPR(2),
1945de2362d3Smrg                                 DST_REL(ABSOLUTE),
1946de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1947de2362d3Smrg                                 CLAMP(0));
1948de2362d3Smrg
1949de2362d3Smrg    /* 36 srcX.z DOT4 - non-mask */
1950de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1951de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1952de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
1953de2362d3Smrg                             SRC0_NEG(0),
1954de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1955de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1956de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
1957de2362d3Smrg                             SRC1_NEG(0),
1958de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1959de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1960de2362d3Smrg                             LAST(0));
1961de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1962de2362d3Smrg                                 SRC0_ABS(0),
1963de2362d3Smrg                                 SRC1_ABS(0),
1964de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1965de2362d3Smrg                                 UPDATE_PRED(0),
1966de2362d3Smrg                                 WRITE_MASK(0),
1967de2362d3Smrg                                 FOG_MERGE(0),
1968de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1969de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1970de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1971de2362d3Smrg                                 DST_GPR(2),
1972de2362d3Smrg                                 DST_REL(ABSOLUTE),
1973de2362d3Smrg                                 DST_ELEM(ELEM_Z),
1974de2362d3Smrg                                 CLAMP(0));
1975de2362d3Smrg
1976de2362d3Smrg    /* 37 srcX.w DOT4 - non-mask */
1977de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1978de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1979de2362d3Smrg                             SRC0_ELEM(ELEM_W),
1980de2362d3Smrg                             SRC0_NEG(0),
1981de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
1982de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1983de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1984de2362d3Smrg                             SRC1_NEG(0),
1985de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1986de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1987de2362d3Smrg                             LAST(1));
1988de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1989de2362d3Smrg                                 SRC0_ABS(0),
1990de2362d3Smrg                                 SRC1_ABS(0),
1991de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1992de2362d3Smrg                                 UPDATE_PRED(0),
1993de2362d3Smrg                                 WRITE_MASK(0),
1994de2362d3Smrg                                 FOG_MERGE(0),
1995de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1996de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1997de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1998de2362d3Smrg                                 DST_GPR(2),
1999de2362d3Smrg                                 DST_REL(ABSOLUTE),
2000de2362d3Smrg                                 DST_ELEM(ELEM_W),
2001de2362d3Smrg                                 CLAMP(0));
2002de2362d3Smrg
2003de2362d3Smrg    /* 38 srcY.x DOT4 - non-mask */
2004de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2005de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2006de2362d3Smrg                             SRC0_ELEM(ELEM_X),
2007de2362d3Smrg                             SRC0_NEG(0),
2008de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2009de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2010de2362d3Smrg                             SRC1_ELEM(ELEM_X),
2011de2362d3Smrg                             SRC1_NEG(0),
2012de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2013de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2014de2362d3Smrg                             LAST(0));
2015de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2016de2362d3Smrg                                 SRC0_ABS(0),
2017de2362d3Smrg                                 SRC1_ABS(0),
2018de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2019de2362d3Smrg                                 UPDATE_PRED(0),
2020de2362d3Smrg                                 WRITE_MASK(0),
2021de2362d3Smrg                                 FOG_MERGE(0),
2022de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2023de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2024de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2025de2362d3Smrg                                 DST_GPR(2),
2026de2362d3Smrg                                 DST_REL(ABSOLUTE),
2027de2362d3Smrg                                 DST_ELEM(ELEM_X),
2028de2362d3Smrg                                 CLAMP(0));
2029de2362d3Smrg
2030de2362d3Smrg    /* 39 srcY.y DOT4 - non-mask */
2031de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2032de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2033de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
2034de2362d3Smrg                             SRC0_NEG(0),
2035de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2036de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2037de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
2038de2362d3Smrg                             SRC1_NEG(0),
2039de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2040de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2041de2362d3Smrg                             LAST(0));
2042de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2043de2362d3Smrg                                 SRC0_ABS(0),
2044de2362d3Smrg                                 SRC1_ABS(0),
2045de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2046de2362d3Smrg                                 UPDATE_PRED(0),
2047de2362d3Smrg                                 WRITE_MASK(1),
2048de2362d3Smrg                                 FOG_MERGE(0),
2049de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2050de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2051de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2052de2362d3Smrg                                 DST_GPR(2),
2053de2362d3Smrg                                 DST_REL(ABSOLUTE),
2054de2362d3Smrg                                 DST_ELEM(ELEM_Y),
2055de2362d3Smrg                                 CLAMP(0));
2056de2362d3Smrg
2057de2362d3Smrg    /* 40 srcY.z DOT4 - non-mask */
2058de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2059de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2060de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
2061de2362d3Smrg                             SRC0_NEG(0),
2062de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2063de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2064de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
2065de2362d3Smrg                             SRC1_NEG(0),
2066de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2067de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2068de2362d3Smrg                             LAST(0));
2069de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2070de2362d3Smrg                                 SRC0_ABS(0),
2071de2362d3Smrg                                 SRC1_ABS(0),
2072de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2073de2362d3Smrg                                 UPDATE_PRED(0),
2074de2362d3Smrg                                 WRITE_MASK(0),
2075de2362d3Smrg                                 FOG_MERGE(0),
2076de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2077de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2078de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2079de2362d3Smrg                                 DST_GPR(2),
2080de2362d3Smrg                                 DST_REL(ABSOLUTE),
2081de2362d3Smrg                                 DST_ELEM(ELEM_Z),
2082de2362d3Smrg                                 CLAMP(0));
2083de2362d3Smrg
2084de2362d3Smrg    /* 41 srcY.w DOT4 - non-mask */
2085de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2086de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2087de2362d3Smrg                             SRC0_ELEM(ELEM_W),
2088de2362d3Smrg                             SRC0_NEG(0),
2089de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2090de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2091de2362d3Smrg                             SRC1_ELEM(ELEM_W),
2092de2362d3Smrg                             SRC1_NEG(0),
2093de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2094de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2095de2362d3Smrg                             LAST(1));
2096de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2097de2362d3Smrg                                 SRC0_ABS(0),
2098de2362d3Smrg                                 SRC1_ABS(0),
2099de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2100de2362d3Smrg                                 UPDATE_PRED(0),
2101de2362d3Smrg                                 WRITE_MASK(0),
2102de2362d3Smrg                                 FOG_MERGE(0),
2103de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2104de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2105de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2106de2362d3Smrg                                 DST_GPR(2),
2107de2362d3Smrg                                 DST_REL(ABSOLUTE),
2108de2362d3Smrg                                 DST_ELEM(ELEM_W),
2109de2362d3Smrg                                 CLAMP(0));
2110de2362d3Smrg
2111de2362d3Smrg    /* 42 srcX / w */
2112de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2113de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2114de2362d3Smrg                             SRC0_ELEM(ELEM_X),
2115de2362d3Smrg                             SRC0_NEG(0),
2116de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
2117de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2118de2362d3Smrg                             SRC1_ELEM(ELEM_W),
2119de2362d3Smrg                             SRC1_NEG(0),
2120de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2121de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2122de2362d3Smrg                             LAST(1));
2123de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2124de2362d3Smrg                                 SRC0_ABS(0),
2125de2362d3Smrg                                 SRC1_ABS(0),
2126de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2127de2362d3Smrg                                 UPDATE_PRED(0),
2128de2362d3Smrg                                 WRITE_MASK(1),
2129de2362d3Smrg                                 FOG_MERGE(0),
2130de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2131de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2132de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2133de2362d3Smrg                                 DST_GPR(0),
2134de2362d3Smrg                                 DST_REL(ABSOLUTE),
2135de2362d3Smrg                                 DST_ELEM(ELEM_X),
2136de2362d3Smrg                                 CLAMP(0));
2137de2362d3Smrg
2138de2362d3Smrg    /* 43 srcY / h */
2139de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2140de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2141de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
2142de2362d3Smrg                             SRC0_NEG(0),
2143de2362d3Smrg                             SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
2144de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2145de2362d3Smrg                             SRC1_ELEM(ELEM_W),
2146de2362d3Smrg                             SRC1_NEG(0),
2147de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2148de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2149de2362d3Smrg                             LAST(1));
2150de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2151de2362d3Smrg                                 SRC0_ABS(0),
2152de2362d3Smrg                                 SRC1_ABS(0),
2153de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2154de2362d3Smrg                                 UPDATE_PRED(0),
2155de2362d3Smrg                                 WRITE_MASK(1),
2156de2362d3Smrg                                 FOG_MERGE(0),
2157de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2158de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2159de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2160de2362d3Smrg                                 DST_GPR(0),
2161de2362d3Smrg                                 DST_REL(ABSOLUTE),
2162de2362d3Smrg                                 DST_ELEM(ELEM_Y),
2163de2362d3Smrg                                 CLAMP(0));
2164de2362d3Smrg
2165de2362d3Smrg    /* 44/45 - dst - mask */
2166de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2167de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2168de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
2169de2362d3Smrg			     BUFFER_ID(0),
2170de2362d3Smrg			     SRC_GPR(0),
2171de2362d3Smrg			     SRC_REL(ABSOLUTE),
2172de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
2173de2362d3Smrg			     MEGA_FETCH_COUNT(24));
2174de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2175de2362d3Smrg				 DST_REL(0),
2176de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
2177de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
2178de2362d3Smrg				 DST_SEL_Z(SQ_SEL_0),
2179de2362d3Smrg				 DST_SEL_W(SQ_SEL_1),
2180de2362d3Smrg				 USE_CONST_FIELDS(0),
2181de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2182de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2183de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2184de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2185de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2186de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2187de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2188de2362d3Smrg#else
2189de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2190de2362d3Smrg#endif
2191de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
2192de2362d3Smrg			     MEGA_FETCH(1));
2193de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
2194de2362d3Smrg    /* 46/47 - src */
2195de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2196de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2197de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
2198de2362d3Smrg			     BUFFER_ID(0),
2199de2362d3Smrg			     SRC_GPR(0),
2200de2362d3Smrg			     SRC_REL(ABSOLUTE),
2201de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
2202de2362d3Smrg			     MEGA_FETCH_COUNT(8));
2203de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2204de2362d3Smrg				 DST_REL(0),
2205de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
2206de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
2207de2362d3Smrg				 DST_SEL_Z(SQ_SEL_1),
2208de2362d3Smrg				 DST_SEL_W(SQ_SEL_0),
2209de2362d3Smrg				 USE_CONST_FIELDS(0),
2210de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2211de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2212de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2213de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2214de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2215de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2216de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2217de2362d3Smrg#else
2218de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2219de2362d3Smrg#endif
2220de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
2221de2362d3Smrg			     MEGA_FETCH(0));
2222de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
2223de2362d3Smrg    /* 48/49 - mask */
2224de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2225de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2226de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
2227de2362d3Smrg			     BUFFER_ID(0),
2228de2362d3Smrg			     SRC_GPR(0),
2229de2362d3Smrg			     SRC_REL(ABSOLUTE),
2230de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
2231de2362d3Smrg			     MEGA_FETCH_COUNT(8));
2232de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2233de2362d3Smrg				 DST_REL(0),
2234de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
2235de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
2236de2362d3Smrg				 DST_SEL_Z(SQ_SEL_1),
2237de2362d3Smrg				 DST_SEL_W(SQ_SEL_0),
2238de2362d3Smrg				 USE_CONST_FIELDS(0),
2239de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2240de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2241de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2242de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2243de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(16),
2244de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2245de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2246de2362d3Smrg#else
2247de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2248de2362d3Smrg#endif
2249de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
2250de2362d3Smrg			     MEGA_FETCH(0));
2251de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
2252de2362d3Smrg
2253de2362d3Smrg    /* 50/51 - dst - non-mask */
2254de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2255de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2256de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
2257de2362d3Smrg			     BUFFER_ID(0),
2258de2362d3Smrg			     SRC_GPR(0),
2259de2362d3Smrg			     SRC_REL(ABSOLUTE),
2260de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
2261de2362d3Smrg			     MEGA_FETCH_COUNT(16));
2262de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2263de2362d3Smrg				 DST_REL(0),
2264de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
2265de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
2266de2362d3Smrg				 DST_SEL_Z(SQ_SEL_0),
2267de2362d3Smrg				 DST_SEL_W(SQ_SEL_1),
2268de2362d3Smrg				 USE_CONST_FIELDS(0),
2269de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2270de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2271de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2272de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2273de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2274de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2275de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2276de2362d3Smrg#else
2277de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2278de2362d3Smrg#endif
2279de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
2280de2362d3Smrg			     MEGA_FETCH(1));
2281de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
2282de2362d3Smrg    /* 52/53 - src */
2283de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2284de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2285de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
2286de2362d3Smrg			     BUFFER_ID(0),
2287de2362d3Smrg			     SRC_GPR(0),
2288de2362d3Smrg			     SRC_REL(ABSOLUTE),
2289de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
2290de2362d3Smrg			     MEGA_FETCH_COUNT(8));
2291de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2292de2362d3Smrg				 DST_REL(0),
2293de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
2294de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
2295de2362d3Smrg				 DST_SEL_Z(SQ_SEL_1),
2296de2362d3Smrg				 DST_SEL_W(SQ_SEL_0),
2297de2362d3Smrg				 USE_CONST_FIELDS(0),
2298de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2299de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2300de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2301de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2302de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2303de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2304de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2305de2362d3Smrg#else
2306de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2307de2362d3Smrg#endif
2308de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
2309de2362d3Smrg			     MEGA_FETCH(0));
2310de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
2311de2362d3Smrg
2312de2362d3Smrg    return i;
2313de2362d3Smrg}
2314de2362d3Smrg
2315de2362d3Smrg/* comp ps --------------------------------------- */
2316de2362d3Smrgint R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2317de2362d3Smrg{
2318de2362d3Smrg    int i = 0;
2319de2362d3Smrg
2320de2362d3Smrg    /* 0 */
232118781e08Smrg    /* call fetch-mask if boolean1 == true */
232218781e08Smrg    shader[i++] = CF_DWORD0(ADDR(10));
2323de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
232418781e08Smrg                            CF_CONST(1),
2325de2362d3Smrg                            COND(SQ_CF_COND_BOOL),
2326de2362d3Smrg                            I_COUNT(0),
2327de2362d3Smrg                            CALL_COUNT(0),
2328de2362d3Smrg                            END_OF_PROGRAM(0),
2329de2362d3Smrg                            VALID_PIXEL_MODE(0),
2330de2362d3Smrg                            CF_INST(SQ_CF_INST_CALL),
2331de2362d3Smrg                            WHOLE_QUAD_MODE(0),
2332de2362d3Smrg                            BARRIER(0));
2333de2362d3Smrg    /* 1 */
233418781e08Smrg    /* call read-constant-mask if boolean1 == false */
233518781e08Smrg    shader[i++] = CF_DWORD0(ADDR(12));
2336de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
233718781e08Smrg                            CF_CONST(1),
2338de2362d3Smrg                            COND(SQ_CF_COND_NOT_BOOL),
2339de2362d3Smrg                            I_COUNT(0),
2340de2362d3Smrg                            CALL_COUNT(0),
2341de2362d3Smrg                            END_OF_PROGRAM(0),
2342de2362d3Smrg                            VALID_PIXEL_MODE(0),
2343de2362d3Smrg                            CF_INST(SQ_CF_INST_CALL),
2344de2362d3Smrg                            WHOLE_QUAD_MODE(0),
2345de2362d3Smrg                            BARRIER(0));
2346de2362d3Smrg    /* 2 */
234718781e08Smrg    /* call fetch-src if boolean0 == true */
234818781e08Smrg    shader[i++] = CF_DWORD0(ADDR(6));
2349de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2350de2362d3Smrg                            CF_CONST(0),
235118781e08Smrg                            COND(SQ_CF_COND_BOOL),
2352de2362d3Smrg                            I_COUNT(0),
2353de2362d3Smrg                            CALL_COUNT(0),
235418781e08Smrg                            END_OF_PROGRAM(0),
2355de2362d3Smrg                            VALID_PIXEL_MODE(0),
235618781e08Smrg                            CF_INST(SQ_CF_INST_CALL),
2357de2362d3Smrg                            WHOLE_QUAD_MODE(0),
235818781e08Smrg                            BARRIER(0));
2359de2362d3Smrg
236018781e08Smrg    /* 3 */
236118781e08Smrg    /* call read-constant-src if boolean0 == false */
236218781e08Smrg    shader[i++] = CF_DWORD0(ADDR(8));
2363de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2364de2362d3Smrg			    CF_CONST(0),
236518781e08Smrg			    COND(SQ_CF_COND_NOT_BOOL),
236618781e08Smrg			    I_COUNT(0),
2367de2362d3Smrg			    CALL_COUNT(0),
2368de2362d3Smrg			    END_OF_PROGRAM(0),
2369de2362d3Smrg			    VALID_PIXEL_MODE(0),
237018781e08Smrg			    CF_INST(SQ_CF_INST_CALL),
2371de2362d3Smrg			    WHOLE_QUAD_MODE(0),
237218781e08Smrg			    BARRIER(0));
2373de2362d3Smrg
2374de2362d3Smrg    /* 4 */
237518781e08Smrg    /* src IN mask (GPR0 := GPR1 .* GPR0) */
237618781e08Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(14),
2377de2362d3Smrg				KCACHE_BANK0(0),
2378de2362d3Smrg				KCACHE_BANK1(0),
2379de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2380de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2381de2362d3Smrg				KCACHE_ADDR0(0),
2382de2362d3Smrg				KCACHE_ADDR1(0),
2383de2362d3Smrg				I_COUNT(4),
2384de2362d3Smrg				USES_WATERFALL(0),
2385de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
2386de2362d3Smrg				WHOLE_QUAD_MODE(0),
2387de2362d3Smrg				BARRIER(1));
2388de2362d3Smrg
2389de2362d3Smrg    /* 5 */
239018781e08Smrg    /* export pixel data */
2391de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2392de2362d3Smrg					  TYPE(SQ_EXPORT_PIXEL),
239318781e08Smrg					  RW_GPR(0),
2394de2362d3Smrg					  RW_REL(ABSOLUTE),
2395de2362d3Smrg					  INDEX_GPR(0),
2396de2362d3Smrg					  ELEM_SIZE(1));
2397de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2398de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
2399de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
2400de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
2401de2362d3Smrg					       R6xx_ELEM_LOOP(0),
2402de2362d3Smrg					       BURST_COUNT(1),
240318781e08Smrg					       END_OF_PROGRAM(1),
2404de2362d3Smrg					       VALID_PIXEL_MODE(0),
2405de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2406de2362d3Smrg					       WHOLE_QUAD_MODE(0),
2407de2362d3Smrg					       BARRIER(1));
240818781e08Smrg    /* subroutine fetch src */
2409de2362d3Smrg    /* 6 */
241018781e08Smrg    /* fetch src into GPR0*/
241118781e08Smrg    shader[i++] = CF_DWORD0(ADDR(26));
2412de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2413de2362d3Smrg			    CF_CONST(0),
2414de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
241518781e08Smrg			    I_COUNT(1),
2416de2362d3Smrg			    CALL_COUNT(0),
2417de2362d3Smrg			    END_OF_PROGRAM(0),
2418de2362d3Smrg			    VALID_PIXEL_MODE(0),
241918781e08Smrg			    CF_INST(SQ_CF_INST_TEX),
2420de2362d3Smrg			    WHOLE_QUAD_MODE(0),
2421de2362d3Smrg			    BARRIER(1));
2422de2362d3Smrg
242318781e08Smrg    /* 7 */
242418781e08Smrg    /* return */
242518781e08Smrg    shader[i++] = CF_DWORD0(ADDR(0));
2426de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2427de2362d3Smrg			    CF_CONST(0),
2428de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
242918781e08Smrg			    I_COUNT(0),
2430de2362d3Smrg			    CALL_COUNT(0),
2431de2362d3Smrg			    END_OF_PROGRAM(0),
2432de2362d3Smrg			    VALID_PIXEL_MODE(0),
243318781e08Smrg			    CF_INST(SQ_CF_INST_RETURN),
2434de2362d3Smrg			    WHOLE_QUAD_MODE(0),
2435de2362d3Smrg			    BARRIER(1));
243618781e08Smrg
243718781e08Smrg    /* subroutine read-constant-src*/
2438de2362d3Smrg    /* 8 */
243918781e08Smrg    /* read constants into GPR0 */
244018781e08Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(18),
244118781e08Smrg				KCACHE_BANK0(0),
244218781e08Smrg				KCACHE_BANK1(0),
244318781e08Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
244418781e08Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
244518781e08Smrg				KCACHE_ADDR0(0),
244618781e08Smrg				KCACHE_ADDR1(0),
244718781e08Smrg				I_COUNT(4),
244818781e08Smrg				USES_WATERFALL(0),
244918781e08Smrg				CF_INST(SQ_CF_INST_ALU),
245018781e08Smrg				WHOLE_QUAD_MODE(0),
245118781e08Smrg				BARRIER(1));
2452de2362d3Smrg    /* 9 */
245318781e08Smrg    /* return */
2454de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0));
2455de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2456de2362d3Smrg			    CF_CONST(0),
2457de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
2458de2362d3Smrg			    I_COUNT(0),
2459de2362d3Smrg			    CALL_COUNT(0),
2460de2362d3Smrg			    END_OF_PROGRAM(0),
2461de2362d3Smrg			    VALID_PIXEL_MODE(0),
2462de2362d3Smrg			    CF_INST(SQ_CF_INST_RETURN),
2463de2362d3Smrg			    WHOLE_QUAD_MODE(0),
2464de2362d3Smrg			    BARRIER(1));
2465de2362d3Smrg
246618781e08Smrg    /* subroutine fetch mask */
246718781e08Smrg    /* 10 */
246818781e08Smrg    /* fetch mask into GPR1*/
246918781e08Smrg    shader[i++] = CF_DWORD0(ADDR(28));
247018781e08Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
247118781e08Smrg                            CF_CONST(0),
247218781e08Smrg                            COND(SQ_CF_COND_ACTIVE),
247318781e08Smrg                            I_COUNT(1),
247418781e08Smrg                            CALL_COUNT(0),
247518781e08Smrg                            END_OF_PROGRAM(0),
247618781e08Smrg                            VALID_PIXEL_MODE(0),
247718781e08Smrg                            CF_INST(SQ_CF_INST_TEX),
247818781e08Smrg                            WHOLE_QUAD_MODE(0),
247918781e08Smrg                            BARRIER(1));
248018781e08Smrg
248118781e08Smrg    /* 11 */
248218781e08Smrg    /* return */
248318781e08Smrg    shader[i++] = CF_DWORD0(ADDR(0));
248418781e08Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
248518781e08Smrg                            CF_CONST(0),
248618781e08Smrg                            COND(SQ_CF_COND_ACTIVE),
248718781e08Smrg                            I_COUNT(0),
248818781e08Smrg                            CALL_COUNT(0),
248918781e08Smrg                            END_OF_PROGRAM(0),
249018781e08Smrg                            VALID_PIXEL_MODE(0),
249118781e08Smrg                            CF_INST(SQ_CF_INST_RETURN),
249218781e08Smrg                            WHOLE_QUAD_MODE(0),
249318781e08Smrg                            BARRIER(1));
249418781e08Smrg
249518781e08Smrg    /* subroutine read-constant-mask*/
249618781e08Smrg    /* 12 */
249718781e08Smrg    /* read constants into GPR1 */
249818781e08Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(22),
249918781e08Smrg                                KCACHE_BANK0(0),
250018781e08Smrg                                KCACHE_BANK1(0),
250118781e08Smrg                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
250218781e08Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
250318781e08Smrg                                KCACHE_ADDR0(0),
250418781e08Smrg                                KCACHE_ADDR1(0),
250518781e08Smrg                                I_COUNT(4),
250618781e08Smrg                                USES_WATERFALL(0),
250718781e08Smrg                                CF_INST(SQ_CF_INST_ALU),
250818781e08Smrg                                WHOLE_QUAD_MODE(0),
250918781e08Smrg                                BARRIER(1));
251018781e08Smrg    /* 13 */
251118781e08Smrg    /* return */
251218781e08Smrg    shader[i++] = CF_DWORD0(ADDR(0));
251318781e08Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
251418781e08Smrg                            CF_CONST(0),
251518781e08Smrg                            COND(SQ_CF_COND_ACTIVE),
251618781e08Smrg                            I_COUNT(0),
251718781e08Smrg                            CALL_COUNT(0),
251818781e08Smrg                            END_OF_PROGRAM(0),
251918781e08Smrg                            VALID_PIXEL_MODE(0),
252018781e08Smrg                            CF_INST(SQ_CF_INST_RETURN),
252118781e08Smrg                            WHOLE_QUAD_MODE(0),
252218781e08Smrg                            BARRIER(1));
252318781e08Smrg    /* ALU clauses */
252418781e08Smrg
252518781e08Smrg    /* 14 - alu 0 */
252618781e08Smrg    /* MUL gpr[0].x gpr[1].x gpr[0].x */
2527de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2528de2362d3Smrg			     SRC0_REL(ABSOLUTE),
2529de2362d3Smrg			     SRC0_ELEM(ELEM_X),
2530de2362d3Smrg			     SRC0_NEG(0),
2531de2362d3Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2532de2362d3Smrg			     SRC1_REL(ABSOLUTE),
2533de2362d3Smrg			     SRC1_ELEM(ELEM_X),
2534de2362d3Smrg			     SRC1_NEG(0),
2535de2362d3Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2536de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2537de2362d3Smrg			     LAST(0));
2538de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2539de2362d3Smrg				 SRC0_ABS(0),
2540de2362d3Smrg				 SRC1_ABS(0),
2541de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2542de2362d3Smrg				 UPDATE_PRED(0),
2543de2362d3Smrg				 WRITE_MASK(1),
2544de2362d3Smrg				 FOG_MERGE(0),
2545de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2546de2362d3Smrg				 ALU_INST(SQ_OP2_INST_MUL),
2547de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
254818781e08Smrg				 DST_GPR(0),
2549de2362d3Smrg				 DST_REL(ABSOLUTE),
2550de2362d3Smrg				 DST_ELEM(ELEM_X),
2551de2362d3Smrg				 CLAMP(1));
255218781e08Smrg    /* 15 - alu 1 */
255318781e08Smrg    /* MUL gpr[0].y gpr[1].y gpr[0].y */
2554de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2555de2362d3Smrg			     SRC0_REL(ABSOLUTE),
2556de2362d3Smrg			     SRC0_ELEM(ELEM_Y),
2557de2362d3Smrg			     SRC0_NEG(0),
2558de2362d3Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2559de2362d3Smrg			     SRC1_REL(ABSOLUTE),
2560de2362d3Smrg			     SRC1_ELEM(ELEM_Y),
2561de2362d3Smrg			     SRC1_NEG(0),
2562de2362d3Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2563de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2564de2362d3Smrg			     LAST(0));
2565de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2566de2362d3Smrg				 SRC0_ABS(0),
2567de2362d3Smrg				 SRC1_ABS(0),
2568de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2569de2362d3Smrg				 UPDATE_PRED(0),
2570de2362d3Smrg				 WRITE_MASK(1),
2571de2362d3Smrg				 FOG_MERGE(0),
2572de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2573de2362d3Smrg				 ALU_INST(SQ_OP2_INST_MUL),
2574de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
257518781e08Smrg				 DST_GPR(0),
2576de2362d3Smrg				 DST_REL(ABSOLUTE),
2577de2362d3Smrg				 DST_ELEM(ELEM_Y),
2578de2362d3Smrg				 CLAMP(1));
257918781e08Smrg    /* 16 - alu 2 */
258018781e08Smrg    /* MUL gpr[0].z gpr[1].z gpr[0].z */
2581de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2582de2362d3Smrg			     SRC0_REL(ABSOLUTE),
2583de2362d3Smrg			     SRC0_ELEM(ELEM_Z),
2584de2362d3Smrg			     SRC0_NEG(0),
2585de2362d3Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2586de2362d3Smrg			     SRC1_REL(ABSOLUTE),
2587de2362d3Smrg			     SRC1_ELEM(ELEM_Z),
2588de2362d3Smrg			     SRC1_NEG(0),
2589de2362d3Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2590de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2591de2362d3Smrg			     LAST(0));
2592de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2593de2362d3Smrg				 SRC0_ABS(0),
2594de2362d3Smrg				 SRC1_ABS(0),
2595de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2596de2362d3Smrg				 UPDATE_PRED(0),
2597de2362d3Smrg				 WRITE_MASK(1),
2598de2362d3Smrg				 FOG_MERGE(0),
2599de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2600de2362d3Smrg				 ALU_INST(SQ_OP2_INST_MUL),
2601de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
260218781e08Smrg				 DST_GPR(0),
2603de2362d3Smrg				 DST_REL(ABSOLUTE),
2604de2362d3Smrg				 DST_ELEM(ELEM_Z),
2605de2362d3Smrg				 CLAMP(1));
260618781e08Smrg    /* 17 - alu 3 */
260718781e08Smrg    /* MUL gpr[0].w gpr[1].w gpr[0].w */
2608de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
2609de2362d3Smrg			     SRC0_REL(ABSOLUTE),
2610de2362d3Smrg			     SRC0_ELEM(ELEM_W),
2611de2362d3Smrg			     SRC0_NEG(0),
2612de2362d3Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2613de2362d3Smrg			     SRC1_REL(ABSOLUTE),
2614de2362d3Smrg			     SRC1_ELEM(ELEM_W),
2615de2362d3Smrg			     SRC1_NEG(0),
2616de2362d3Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2617de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2618de2362d3Smrg			     LAST(1));
2619de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2620de2362d3Smrg				 SRC0_ABS(0),
2621de2362d3Smrg				 SRC1_ABS(0),
2622de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2623de2362d3Smrg				 UPDATE_PRED(0),
2624de2362d3Smrg				 WRITE_MASK(1),
2625de2362d3Smrg				 FOG_MERGE(0),
2626de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2627de2362d3Smrg				 ALU_INST(SQ_OP2_INST_MUL),
2628de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
262918781e08Smrg				 DST_GPR(0),
263018781e08Smrg				 DST_REL(ABSOLUTE),
263118781e08Smrg				 DST_ELEM(ELEM_W),
263218781e08Smrg				 CLAMP(1));
263318781e08Smrg
263418781e08Smrg    /* 18 */
263518781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
263618781e08Smrg			     SRC0_REL(ABSOLUTE),
263718781e08Smrg			     SRC0_ELEM(ELEM_X),
263818781e08Smrg			     SRC0_NEG(0),
263918781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
264018781e08Smrg			     SRC1_REL(ABSOLUTE),
264118781e08Smrg			     SRC1_ELEM(ELEM_X),
264218781e08Smrg			     SRC1_NEG(0),
264318781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
264418781e08Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
264518781e08Smrg			     LAST(0));
264618781e08Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
264718781e08Smrg				 SRC0_ABS(0),
264818781e08Smrg				 SRC1_ABS(0),
264918781e08Smrg				 UPDATE_EXECUTE_MASK(0),
265018781e08Smrg				 UPDATE_PRED(0),
265118781e08Smrg				 WRITE_MASK(1),
265218781e08Smrg				 FOG_MERGE(0),
265318781e08Smrg				 OMOD(SQ_ALU_OMOD_OFF),
265418781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
265518781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
265618781e08Smrg				 DST_GPR(0),
265718781e08Smrg				 DST_REL(ABSOLUTE),
265818781e08Smrg				 DST_ELEM(ELEM_X),
265918781e08Smrg				 CLAMP(1));
266018781e08Smrg    /* 19 */
266118781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
266218781e08Smrg			     SRC0_REL(ABSOLUTE),
266318781e08Smrg			     SRC0_ELEM(ELEM_Y),
266418781e08Smrg			     SRC0_NEG(0),
266518781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
266618781e08Smrg			     SRC1_REL(ABSOLUTE),
266718781e08Smrg			     SRC1_ELEM(ELEM_Y),
266818781e08Smrg			     SRC1_NEG(0),
266918781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
267018781e08Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
267118781e08Smrg			     LAST(0));
267218781e08Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
267318781e08Smrg				 SRC0_ABS(0),
267418781e08Smrg				 SRC1_ABS(0),
267518781e08Smrg				 UPDATE_EXECUTE_MASK(0),
267618781e08Smrg				 UPDATE_PRED(0),
267718781e08Smrg				 WRITE_MASK(1),
267818781e08Smrg				 FOG_MERGE(0),
267918781e08Smrg				 OMOD(SQ_ALU_OMOD_OFF),
268018781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
268118781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
268218781e08Smrg				 DST_GPR(0),
268318781e08Smrg				 DST_REL(ABSOLUTE),
268418781e08Smrg				 DST_ELEM(ELEM_Y),
268518781e08Smrg				 CLAMP(1));
268618781e08Smrg    /* 20 */
268718781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
268818781e08Smrg			     SRC0_REL(ABSOLUTE),
268918781e08Smrg			     SRC0_ELEM(ELEM_Z),
269018781e08Smrg			     SRC0_NEG(0),
269118781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
269218781e08Smrg			     SRC1_REL(ABSOLUTE),
269318781e08Smrg			     SRC1_ELEM(ELEM_Z),
269418781e08Smrg			     SRC1_NEG(0),
269518781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
269618781e08Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
269718781e08Smrg			     LAST(0));
269818781e08Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
269918781e08Smrg				 SRC0_ABS(0),
270018781e08Smrg				 SRC1_ABS(0),
270118781e08Smrg				 UPDATE_EXECUTE_MASK(0),
270218781e08Smrg				 UPDATE_PRED(0),
270318781e08Smrg				 WRITE_MASK(1),
270418781e08Smrg				 FOG_MERGE(0),
270518781e08Smrg				 OMOD(SQ_ALU_OMOD_OFF),
270618781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
270718781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
270818781e08Smrg				 DST_GPR(0),
270918781e08Smrg				 DST_REL(ABSOLUTE),
271018781e08Smrg				 DST_ELEM(ELEM_Z),
271118781e08Smrg				 CLAMP(1));
271218781e08Smrg    /* 21 */
271318781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
271418781e08Smrg			     SRC0_REL(ABSOLUTE),
271518781e08Smrg			     SRC0_ELEM(ELEM_W),
271618781e08Smrg			     SRC0_NEG(0),
271718781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
271818781e08Smrg			     SRC1_REL(ABSOLUTE),
271918781e08Smrg			     SRC1_ELEM(ELEM_W),
272018781e08Smrg			     SRC1_NEG(0),
272118781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
272218781e08Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
272318781e08Smrg			     LAST(1));
272418781e08Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
272518781e08Smrg				 SRC0_ABS(0),
272618781e08Smrg				 SRC1_ABS(0),
272718781e08Smrg				 UPDATE_EXECUTE_MASK(0),
272818781e08Smrg				 UPDATE_PRED(0),
272918781e08Smrg				 WRITE_MASK(1),
273018781e08Smrg				 FOG_MERGE(0),
273118781e08Smrg				 OMOD(SQ_ALU_OMOD_OFF),
273218781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
273318781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
273418781e08Smrg				 DST_GPR(0),
2735de2362d3Smrg				 DST_REL(ABSOLUTE),
2736de2362d3Smrg				 DST_ELEM(ELEM_W),
2737de2362d3Smrg				 CLAMP(1));
2738de2362d3Smrg
273918781e08Smrg    /* 22 */
274018781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
274118781e08Smrg			     SRC0_REL(ABSOLUTE),
274218781e08Smrg			     SRC0_ELEM(ELEM_X),
274318781e08Smrg			     SRC0_NEG(0),
274418781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
274518781e08Smrg			     SRC1_REL(ABSOLUTE),
274618781e08Smrg			     SRC1_ELEM(ELEM_X),
274718781e08Smrg			     SRC1_NEG(0),
274818781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
274918781e08Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
275018781e08Smrg			     LAST(0));
275118781e08Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
275218781e08Smrg				 SRC0_ABS(0),
275318781e08Smrg				 SRC1_ABS(0),
275418781e08Smrg				 UPDATE_EXECUTE_MASK(0),
275518781e08Smrg				 UPDATE_PRED(0),
275618781e08Smrg				 WRITE_MASK(1),
275718781e08Smrg				 FOG_MERGE(0),
275818781e08Smrg				 OMOD(SQ_ALU_OMOD_OFF),
275918781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
276018781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
276118781e08Smrg				 DST_GPR(1),
276218781e08Smrg				 DST_REL(ABSOLUTE),
276318781e08Smrg				 DST_ELEM(ELEM_X),
276418781e08Smrg				 CLAMP(1));
276518781e08Smrg    /* 23 */
276618781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
276718781e08Smrg			     SRC0_REL(ABSOLUTE),
276818781e08Smrg			     SRC0_ELEM(ELEM_Y),
276918781e08Smrg			     SRC0_NEG(0),
277018781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
277118781e08Smrg			     SRC1_REL(ABSOLUTE),
277218781e08Smrg			     SRC1_ELEM(ELEM_Y),
277318781e08Smrg			     SRC1_NEG(0),
277418781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
277518781e08Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
277618781e08Smrg			     LAST(0));
277718781e08Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
277818781e08Smrg				 SRC0_ABS(0),
277918781e08Smrg				 SRC1_ABS(0),
278018781e08Smrg				 UPDATE_EXECUTE_MASK(0),
278118781e08Smrg				 UPDATE_PRED(0),
278218781e08Smrg				 WRITE_MASK(1),
278318781e08Smrg				 FOG_MERGE(0),
278418781e08Smrg				 OMOD(SQ_ALU_OMOD_OFF),
278518781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
278618781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
278718781e08Smrg				 DST_GPR(1),
278818781e08Smrg				 DST_REL(ABSOLUTE),
278918781e08Smrg				 DST_ELEM(ELEM_Y),
279018781e08Smrg				 CLAMP(1));
279118781e08Smrg    /* 24 */
279218781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
279318781e08Smrg			     SRC0_REL(ABSOLUTE),
279418781e08Smrg			     SRC0_ELEM(ELEM_Z),
279518781e08Smrg			     SRC0_NEG(0),
279618781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
279718781e08Smrg			     SRC1_REL(ABSOLUTE),
279818781e08Smrg			     SRC1_ELEM(ELEM_Z),
279918781e08Smrg			     SRC1_NEG(0),
280018781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
280118781e08Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
280218781e08Smrg			     LAST(0));
280318781e08Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
280418781e08Smrg				 SRC0_ABS(0),
280518781e08Smrg				 SRC1_ABS(0),
280618781e08Smrg				 UPDATE_EXECUTE_MASK(0),
280718781e08Smrg				 UPDATE_PRED(0),
280818781e08Smrg				 WRITE_MASK(1),
280918781e08Smrg				 FOG_MERGE(0),
281018781e08Smrg				 OMOD(SQ_ALU_OMOD_OFF),
281118781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
281218781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
281318781e08Smrg				 DST_GPR(1),
281418781e08Smrg				 DST_REL(ABSOLUTE),
281518781e08Smrg				 DST_ELEM(ELEM_Z),
281618781e08Smrg				 CLAMP(1));
281718781e08Smrg    /* 25 */
281818781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
281918781e08Smrg			     SRC0_REL(ABSOLUTE),
282018781e08Smrg			     SRC0_ELEM(ELEM_W),
282118781e08Smrg			     SRC0_NEG(0),
282218781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
282318781e08Smrg			     SRC1_REL(ABSOLUTE),
282418781e08Smrg			     SRC1_ELEM(ELEM_W),
282518781e08Smrg			     SRC1_NEG(0),
282618781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
282718781e08Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
282818781e08Smrg			     LAST(1));
282918781e08Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
283018781e08Smrg				 SRC0_ABS(0),
283118781e08Smrg				 SRC1_ABS(0),
283218781e08Smrg				 UPDATE_EXECUTE_MASK(0),
283318781e08Smrg				 UPDATE_PRED(0),
283418781e08Smrg				 WRITE_MASK(1),
283518781e08Smrg				 FOG_MERGE(0),
283618781e08Smrg				 OMOD(SQ_ALU_OMOD_OFF),
283718781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
283818781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
283918781e08Smrg				 DST_GPR(1),
284018781e08Smrg				 DST_REL(ABSOLUTE),
284118781e08Smrg				 DST_ELEM(ELEM_W),
284218781e08Smrg				 CLAMP(1));
284318781e08Smrg
284418781e08Smrg    /* 26/27 - src */
2845de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2846de2362d3Smrg			     BC_FRAC_MODE(0),
2847de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
2848de2362d3Smrg			     RESOURCE_ID(0),
2849de2362d3Smrg			     SRC_GPR(0),
2850de2362d3Smrg			     SRC_REL(ABSOLUTE),
2851de2362d3Smrg			     R7xx_ALT_CONST(0));
2852de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
2853de2362d3Smrg			     DST_REL(ABSOLUTE),
2854de2362d3Smrg			     DST_SEL_X(SQ_SEL_X),
2855de2362d3Smrg			     DST_SEL_Y(SQ_SEL_Y),
2856de2362d3Smrg			     DST_SEL_Z(SQ_SEL_Z),
2857de2362d3Smrg			     DST_SEL_W(SQ_SEL_W),
2858de2362d3Smrg			     LOD_BIAS(0),
2859de2362d3Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
2860de2362d3Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
2861de2362d3Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
2862de2362d3Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
2863de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2864de2362d3Smrg			     OFFSET_Y(0),
2865de2362d3Smrg			     OFFSET_Z(0),
2866de2362d3Smrg			     SAMPLER_ID(0),
2867de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
2868de2362d3Smrg			     SRC_SEL_Y(SQ_SEL_Y),
2869de2362d3Smrg			     SRC_SEL_Z(SQ_SEL_0),
2870de2362d3Smrg			     SRC_SEL_W(SQ_SEL_1));
2871de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
287218781e08Smrg    /* 28/29 - mask */
2873de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2874de2362d3Smrg			     BC_FRAC_MODE(0),
2875de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
2876de2362d3Smrg			     RESOURCE_ID(1),
2877de2362d3Smrg			     SRC_GPR(1),
2878de2362d3Smrg			     SRC_REL(ABSOLUTE),
2879de2362d3Smrg			     R7xx_ALT_CONST(0));
2880de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
2881de2362d3Smrg			     DST_REL(ABSOLUTE),
2882de2362d3Smrg			     DST_SEL_X(SQ_SEL_X),
2883de2362d3Smrg			     DST_SEL_Y(SQ_SEL_Y),
2884de2362d3Smrg			     DST_SEL_Z(SQ_SEL_Z),
2885de2362d3Smrg			     DST_SEL_W(SQ_SEL_W),
2886de2362d3Smrg			     LOD_BIAS(0),
2887de2362d3Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
2888de2362d3Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
2889de2362d3Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
2890de2362d3Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
2891de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2892de2362d3Smrg			     OFFSET_Y(0),
2893de2362d3Smrg			     OFFSET_Z(0),
2894de2362d3Smrg			     SAMPLER_ID(1),
2895de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
2896de2362d3Smrg			     SRC_SEL_Y(SQ_SEL_Y),
2897de2362d3Smrg			     SRC_SEL_Z(SQ_SEL_0),
2898de2362d3Smrg			     SRC_SEL_W(SQ_SEL_1));
2899de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
2900de2362d3Smrg
2901de2362d3Smrg    return i;
2902de2362d3Smrg}
2903