r600_shader.c revision 2f39173d
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "radeon.h"
34b7e1c893Smrg#include "r600_shader.h"
35b7e1c893Smrg#include "r600_reg.h"
36b7e1c893Smrg
37b7e1c893Smrg/* solid vs --------------------------------------- */
38b7e1c893Smrgint R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39b7e1c893Smrg{
40b7e1c893Smrg    int i = 0;
41b7e1c893Smrg
42b7e1c893Smrg    /* 0 */
43b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
44b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
45b7e1c893Smrg			    CF_CONST(0),
46b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
47b7e1c893Smrg			    I_COUNT(1),
48b7e1c893Smrg			    CALL_COUNT(0),
49b7e1c893Smrg			    END_OF_PROGRAM(0),
50b7e1c893Smrg			    VALID_PIXEL_MODE(0),
51b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
52b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
53b7e1c893Smrg			    BARRIER(1));
54b7e1c893Smrg    /* 1 */
55b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
57b7e1c893Smrg					  RW_GPR(1),
58b7e1c893Smrg					  RW_REL(ABSOLUTE),
59b7e1c893Smrg					  INDEX_GPR(0),
60b7e1c893Smrg					  ELEM_SIZE(0));
61b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
63b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
64b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
65b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
66b7e1c893Smrg					       BURST_COUNT(1),
67b7e1c893Smrg					       END_OF_PROGRAM(0),
68b7e1c893Smrg					       VALID_PIXEL_MODE(0),
69b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
71b7e1c893Smrg					       BARRIER(1));
72b7e1c893Smrg    /* 2 - always export a param whether it's used or not */
73b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
75b7e1c893Smrg					  RW_GPR(0),
76b7e1c893Smrg					  RW_REL(ABSOLUTE),
77b7e1c893Smrg					  INDEX_GPR(0),
78b7e1c893Smrg					  ELEM_SIZE(0));
79b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
81b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
82b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
83b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
84b7e1c893Smrg					       BURST_COUNT(0),
85b7e1c893Smrg					       END_OF_PROGRAM(1),
86b7e1c893Smrg					       VALID_PIXEL_MODE(0),
87b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
88b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
89b7e1c893Smrg					       BARRIER(0));
90b7e1c893Smrg    /* 3 - padding */
91b7e1c893Smrg    shader[i++] = 0x00000000;
92b7e1c893Smrg    shader[i++] = 0x00000000;
93b7e1c893Smrg    /* 4/5 */
94b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
97b7e1c893Smrg			     BUFFER_ID(0),
98b7e1c893Smrg			     SRC_GPR(0),
99b7e1c893Smrg			     SRC_REL(ABSOLUTE),
100b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
101b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
102b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103b7e1c893Smrg				 DST_REL(0),
104b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
105b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
106b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
107b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
108b7e1c893Smrg				 USE_CONST_FIELDS(0),
109ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
110ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
111ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
112b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
114b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
115b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
116b7e1c893Smrg			     MEGA_FETCH(1));
117b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
118b7e1c893Smrg
119b7e1c893Smrg    return i;
120b7e1c893Smrg}
121b7e1c893Smrg
122b7e1c893Smrg/* solid ps --------------------------------------- */
123b7e1c893Smrgint R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
124b7e1c893Smrg{
125b7e1c893Smrg    int i = 0;
126b7e1c893Smrg
127b7e1c893Smrg    /* 0 */
128b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(2),
129b7e1c893Smrg				KCACHE_BANK0(0),
130b7e1c893Smrg				KCACHE_BANK1(0),
131b7e1c893Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
132b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
133b7e1c893Smrg				KCACHE_ADDR0(0),
134b7e1c893Smrg				KCACHE_ADDR1(0),
135b7e1c893Smrg				I_COUNT(4),
136b7e1c893Smrg				USES_WATERFALL(0),
137b7e1c893Smrg				CF_INST(SQ_CF_INST_ALU),
138b7e1c893Smrg				WHOLE_QUAD_MODE(0),
139b7e1c893Smrg				BARRIER(1));
140b7e1c893Smrg    /* 1 */
141b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
142b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
143b7e1c893Smrg					  RW_GPR(0),
144b7e1c893Smrg					  RW_REL(ABSOLUTE),
145b7e1c893Smrg					  INDEX_GPR(0),
146b7e1c893Smrg					  ELEM_SIZE(1));
147b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
148b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
149b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
150b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
151b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
152b7e1c893Smrg					       BURST_COUNT(1),
153b7e1c893Smrg					       END_OF_PROGRAM(1),
154b7e1c893Smrg					       VALID_PIXEL_MODE(0),
155b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
156b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
157b7e1c893Smrg					       BARRIER(1));
158b7e1c893Smrg
159b7e1c893Smrg    /* 2 */
160b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
161b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
162b7e1c893Smrg			     SRC0_ELEM(ELEM_X),
163b7e1c893Smrg			     SRC0_NEG(0),
164b7e1c893Smrg			     SRC1_SEL(0),
165b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
166b7e1c893Smrg			     SRC1_ELEM(ELEM_X),
167b7e1c893Smrg			     SRC1_NEG(0),
168b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
169b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
170b7e1c893Smrg			     LAST(0));
171b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
172b7e1c893Smrg				 SRC0_ABS(0),
173b7e1c893Smrg				 SRC1_ABS(0),
174b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
175b7e1c893Smrg				 UPDATE_PRED(0),
176b7e1c893Smrg				 WRITE_MASK(1),
177b7e1c893Smrg				 FOG_MERGE(0),
178b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
179b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
180b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
181b7e1c893Smrg				 DST_GPR(0),
182b7e1c893Smrg				 DST_REL(ABSOLUTE),
183b7e1c893Smrg				 DST_ELEM(ELEM_X),
184b7e1c893Smrg				 CLAMP(1));
185b7e1c893Smrg    /* 3 */
186b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
187b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
188b7e1c893Smrg			     SRC0_ELEM(ELEM_Y),
189b7e1c893Smrg			     SRC0_NEG(0),
190b7e1c893Smrg			     SRC1_SEL(0),
191b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
192b7e1c893Smrg			     SRC1_ELEM(ELEM_Y),
193b7e1c893Smrg			     SRC1_NEG(0),
194b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
195b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
196b7e1c893Smrg			     LAST(0));
197b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
198b7e1c893Smrg				 SRC0_ABS(0),
199b7e1c893Smrg				 SRC1_ABS(0),
200b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
201b7e1c893Smrg				 UPDATE_PRED(0),
202b7e1c893Smrg				 WRITE_MASK(1),
203b7e1c893Smrg				 FOG_MERGE(0),
204b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
205b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
206b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
207b7e1c893Smrg				 DST_GPR(0),
208b7e1c893Smrg				 DST_REL(ABSOLUTE),
209b7e1c893Smrg				 DST_ELEM(ELEM_Y),
210b7e1c893Smrg				 CLAMP(1));
211b7e1c893Smrg    /* 4 */
212b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
213b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
214b7e1c893Smrg			     SRC0_ELEM(ELEM_Z),
215b7e1c893Smrg			     SRC0_NEG(0),
216b7e1c893Smrg			     SRC1_SEL(0),
217b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
218b7e1c893Smrg			     SRC1_ELEM(ELEM_Z),
219b7e1c893Smrg			     SRC1_NEG(0),
220b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
221b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
222b7e1c893Smrg			     LAST(0));
223b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
224b7e1c893Smrg				 SRC0_ABS(0),
225b7e1c893Smrg				 SRC1_ABS(0),
226b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
227b7e1c893Smrg				 UPDATE_PRED(0),
228b7e1c893Smrg				 WRITE_MASK(1),
229b7e1c893Smrg				 FOG_MERGE(0),
230b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
231b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
232b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
233b7e1c893Smrg				 DST_GPR(0),
234b7e1c893Smrg				 DST_REL(ABSOLUTE),
235b7e1c893Smrg				 DST_ELEM(ELEM_Z),
236b7e1c893Smrg				 CLAMP(1));
237b7e1c893Smrg    /* 5 */
238b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
239b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
240b7e1c893Smrg			     SRC0_ELEM(ELEM_W),
241b7e1c893Smrg			     SRC0_NEG(0),
242b7e1c893Smrg			     SRC1_SEL(0),
243b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
244b7e1c893Smrg			     SRC1_ELEM(ELEM_W),
245b7e1c893Smrg			     SRC1_NEG(0),
246b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
247b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
248b7e1c893Smrg			     LAST(1));
249b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
250b7e1c893Smrg				 SRC0_ABS(0),
251b7e1c893Smrg				 SRC1_ABS(0),
252b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
253b7e1c893Smrg				 UPDATE_PRED(0),
254b7e1c893Smrg				 WRITE_MASK(1),
255b7e1c893Smrg				 FOG_MERGE(0),
256b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
257b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
258b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
259b7e1c893Smrg				 DST_GPR(0),
260b7e1c893Smrg				 DST_REL(ABSOLUTE),
261b7e1c893Smrg				 DST_ELEM(ELEM_W),
262b7e1c893Smrg				 CLAMP(1));
263b7e1c893Smrg
264b7e1c893Smrg    return i;
265b7e1c893Smrg}
266b7e1c893Smrg
267b7e1c893Smrg/* copy vs --------------------------------------- */
268b7e1c893Smrgint R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
269b7e1c893Smrg{
270b7e1c893Smrg    int i = 0;
271b7e1c893Smrg
272b7e1c893Smrg    /* 0 */
273b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
274b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
275b7e1c893Smrg			    CF_CONST(0),
276b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
277b7e1c893Smrg			    I_COUNT(2),
278b7e1c893Smrg			    CALL_COUNT(0),
279b7e1c893Smrg			    END_OF_PROGRAM(0),
280b7e1c893Smrg			    VALID_PIXEL_MODE(0),
281b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
282b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
283b7e1c893Smrg			    BARRIER(1));
284b7e1c893Smrg    /* 1 */
285b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
286b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
287b7e1c893Smrg					  RW_GPR(1),
288b7e1c893Smrg					  RW_REL(ABSOLUTE),
289b7e1c893Smrg					  INDEX_GPR(0),
290b7e1c893Smrg					  ELEM_SIZE(0));
291b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
292b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
293b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
294b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
295b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
296b7e1c893Smrg					       BURST_COUNT(0),
297b7e1c893Smrg					       END_OF_PROGRAM(0),
298b7e1c893Smrg					       VALID_PIXEL_MODE(0),
299b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
300b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
301b7e1c893Smrg					       BARRIER(1));
302b7e1c893Smrg    /* 2 */
303b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
304b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
305b7e1c893Smrg					  RW_GPR(0),
306b7e1c893Smrg					  RW_REL(ABSOLUTE),
307b7e1c893Smrg					  INDEX_GPR(0),
308b7e1c893Smrg					  ELEM_SIZE(0));
309b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
310b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
311b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
312b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
313b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
314b7e1c893Smrg					       BURST_COUNT(0),
315b7e1c893Smrg					       END_OF_PROGRAM(1),
316b7e1c893Smrg					       VALID_PIXEL_MODE(0),
317b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
318b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
319b7e1c893Smrg					       BARRIER(0));
320b7e1c893Smrg    /* 3 */
321b7e1c893Smrg    shader[i++] = 0x00000000;
322b7e1c893Smrg    shader[i++] = 0x00000000;
323b7e1c893Smrg    /* 4/5 */
324b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
325b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
326b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
327b7e1c893Smrg			     BUFFER_ID(0),
328b7e1c893Smrg			     SRC_GPR(0),
329b7e1c893Smrg			     SRC_REL(ABSOLUTE),
330b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
331b7e1c893Smrg			     MEGA_FETCH_COUNT(16));
332b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
333b7e1c893Smrg				 DST_REL(0),
334b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
335b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
336b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
337b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
338b7e1c893Smrg				 USE_CONST_FIELDS(0),
339ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
340ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
341ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
342b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
343b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
344b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
345b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
346b7e1c893Smrg			     MEGA_FETCH(1));
347b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
348b7e1c893Smrg    /* 6/7 */
349b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
350b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
351b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
352b7e1c893Smrg			     BUFFER_ID(0),
353b7e1c893Smrg			     SRC_GPR(0),
354b7e1c893Smrg			     SRC_REL(ABSOLUTE),
355b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
356b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
357b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
358b7e1c893Smrg				 DST_REL(0),
359b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
360b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
361b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
362b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
363b7e1c893Smrg				 USE_CONST_FIELDS(0),
364ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
365ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
366ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
367b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
368b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
369b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
370b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
371b7e1c893Smrg			     MEGA_FETCH(0));
372b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
373b7e1c893Smrg
374b7e1c893Smrg    return i;
375b7e1c893Smrg}
376b7e1c893Smrg
377b7e1c893Smrg/* copy ps --------------------------------------- */
378b7e1c893Smrgint R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
379b7e1c893Smrg{
380b7e1c893Smrg    int i=0;
381b7e1c893Smrg
382b7e1c893Smrg    /* CF INST 0 */
383b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(2));
384b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
385b7e1c893Smrg			    CF_CONST(0),
386b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
387b7e1c893Smrg			    I_COUNT(1),
388b7e1c893Smrg			    CALL_COUNT(0),
389b7e1c893Smrg			    END_OF_PROGRAM(0),
390b7e1c893Smrg			    VALID_PIXEL_MODE(0),
391b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
392b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
393b7e1c893Smrg			    BARRIER(1));
394b7e1c893Smrg    /* CF INST 1 */
395b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
396b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
397b7e1c893Smrg					  RW_GPR(0),
398b7e1c893Smrg					  RW_REL(ABSOLUTE),
399b7e1c893Smrg					  INDEX_GPR(0),
400b7e1c893Smrg					  ELEM_SIZE(1));
401b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
402b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
403b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
404b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
405b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
406b7e1c893Smrg					       BURST_COUNT(1),
407b7e1c893Smrg					       END_OF_PROGRAM(1),
408b7e1c893Smrg					       VALID_PIXEL_MODE(0),
409b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
410b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
411b7e1c893Smrg					       BARRIER(1));
412b7e1c893Smrg    /* TEX INST 0 */
413b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
414b7e1c893Smrg			     BC_FRAC_MODE(0),
415b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
416b7e1c893Smrg			     RESOURCE_ID(0),
417b7e1c893Smrg			     SRC_GPR(0),
418b7e1c893Smrg			     SRC_REL(ABSOLUTE),
419b7e1c893Smrg			     R7xx_ALT_CONST(0));
420b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
421b7e1c893Smrg			     DST_REL(ABSOLUTE),
422b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X), /* R */
423b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y), /* G */
424b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z), /* B */
425b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W), /* A */
426b7e1c893Smrg			     LOD_BIAS(0),
427b7e1c893Smrg			     COORD_TYPE_X(TEX_UNNORMALIZED),
428b7e1c893Smrg			     COORD_TYPE_Y(TEX_UNNORMALIZED),
429b7e1c893Smrg			     COORD_TYPE_Z(TEX_UNNORMALIZED),
430b7e1c893Smrg			     COORD_TYPE_W(TEX_UNNORMALIZED));
431b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
432b7e1c893Smrg			     OFFSET_Y(0),
433b7e1c893Smrg			     OFFSET_Z(0),
434b7e1c893Smrg			     SAMPLER_ID(0),
435b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
436b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
437b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
438b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
439b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
440b7e1c893Smrg
441b7e1c893Smrg    return i;
442b7e1c893Smrg}
443b7e1c893Smrg
444b7e1c893Smrg/*
445b7e1c893Smrg * ; xv vertex shader
446b7e1c893Smrg * 00 VTX: ADDR(4) CNT(2)
447b7e1c893Smrg *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT)
448b7e1c893Smrg *          FORMAT_COMP(SIGNED)
449b7e1c893Smrg *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
450b7e1c893Smrg *          FORMAT_COMP(SIGNED)
451b7e1c893Smrg * 01 EXP_DONE: POS0, R1
452b7e1c893Smrg * 02 EXP_DONE: PARAM0, R0  NO_BARRIER
453b7e1c893Smrg * END_OF_PROGRAM
454b7e1c893Smrg */
455b7e1c893Smrgint R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
456b7e1c893Smrg{
457b7e1c893Smrg    int i = 0;
458b7e1c893Smrg
459b7e1c893Smrg    /* 0 */
460ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(6));
461b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
462b7e1c893Smrg                            CF_CONST(0),
463b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
464b7e1c893Smrg                            I_COUNT(2),
465b7e1c893Smrg                            CALL_COUNT(0),
466b7e1c893Smrg                            END_OF_PROGRAM(0),
467b7e1c893Smrg                            VALID_PIXEL_MODE(0),
468b7e1c893Smrg                            CF_INST(SQ_CF_INST_VTX),
469b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
470b7e1c893Smrg                            BARRIER(1));
471ad43ddacSmrg
472ad43ddacSmrg    /* 1 - ALU */
473ad43ddacSmrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
474ad43ddacSmrg				KCACHE_BANK0(0),
475ad43ddacSmrg				KCACHE_BANK1(0),
476ad43ddacSmrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
477ad43ddacSmrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
478ad43ddacSmrg				KCACHE_ADDR0(0),
479ad43ddacSmrg				KCACHE_ADDR1(0),
480ad43ddacSmrg				I_COUNT(2),
481ad43ddacSmrg				USES_WATERFALL(0),
482ad43ddacSmrg				CF_INST(SQ_CF_INST_ALU),
483ad43ddacSmrg				WHOLE_QUAD_MODE(0),
484ad43ddacSmrg				BARRIER(1));
485ad43ddacSmrg
486ad43ddacSmrg    /* 2 */
487b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
488b7e1c893Smrg                                          TYPE(SQ_EXPORT_POS),
489b7e1c893Smrg                                          RW_GPR(1),
490b7e1c893Smrg                                          RW_REL(ABSOLUTE),
491b7e1c893Smrg                                          INDEX_GPR(0),
492b7e1c893Smrg                                          ELEM_SIZE(3));
493b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
494b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
495b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
496b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
497b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
498b7e1c893Smrg                                               BURST_COUNT(1),
499b7e1c893Smrg                                               END_OF_PROGRAM(0),
500b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
501b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
502b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
503b7e1c893Smrg                                               BARRIER(1));
504ad43ddacSmrg    /* 3 */
505b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
506b7e1c893Smrg                                          TYPE(SQ_EXPORT_PARAM),
507b7e1c893Smrg                                          RW_GPR(0),
508b7e1c893Smrg                                          RW_REL(ABSOLUTE),
509b7e1c893Smrg                                          INDEX_GPR(0),
510b7e1c893Smrg                                          ELEM_SIZE(3));
511b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
512b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
513b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
514b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
515b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
516b7e1c893Smrg                                               BURST_COUNT(1),
517b7e1c893Smrg                                               END_OF_PROGRAM(1),
518b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
519b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
520b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
521b7e1c893Smrg                                               BARRIER(0));
522ad43ddacSmrg
523ad43ddacSmrg
524ad43ddacSmrg    /* 4 texX / w */
525ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
526ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
527ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
528ad43ddacSmrg                             SRC0_NEG(0),
529ad43ddacSmrg                             SRC1_SEL(256),
530ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
531ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
532ad43ddacSmrg                             SRC1_NEG(0),
533ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
534ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
535ad43ddacSmrg                             LAST(0));
536ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
537ad43ddacSmrg                                 SRC0_ABS(0),
538ad43ddacSmrg                                 SRC1_ABS(0),
539ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
540ad43ddacSmrg                                 UPDATE_PRED(0),
541ad43ddacSmrg                                 WRITE_MASK(1),
542ad43ddacSmrg                                 FOG_MERGE(0),
543ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
544ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
545ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
546ad43ddacSmrg                                 DST_GPR(0),
547ad43ddacSmrg                                 DST_REL(ABSOLUTE),
548ad43ddacSmrg                                 DST_ELEM(ELEM_X),
549ad43ddacSmrg                                 CLAMP(0));
550ad43ddacSmrg
551ad43ddacSmrg    /* 5 texY / h */
552ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
553ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
554ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
555ad43ddacSmrg                             SRC0_NEG(0),
556ad43ddacSmrg                             SRC1_SEL(256),
557ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
558ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
559ad43ddacSmrg                             SRC1_NEG(0),
560ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
561ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
562ad43ddacSmrg                             LAST(1));
563ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
564ad43ddacSmrg                                 SRC0_ABS(0),
565ad43ddacSmrg                                 SRC1_ABS(0),
566ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
567ad43ddacSmrg                                 UPDATE_PRED(0),
568ad43ddacSmrg                                 WRITE_MASK(1),
569ad43ddacSmrg                                 FOG_MERGE(0),
570ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
571ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
572ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
573ad43ddacSmrg                                 DST_GPR(0),
574ad43ddacSmrg                                 DST_REL(ABSOLUTE),
575ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
576ad43ddacSmrg                                 CLAMP(0));
577ad43ddacSmrg
578ad43ddacSmrg    /* 6/7 */
579b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
580b7e1c893Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
581b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
582b7e1c893Smrg                             BUFFER_ID(0),
583b7e1c893Smrg                             SRC_GPR(0),
584b7e1c893Smrg                             SRC_REL(ABSOLUTE),
585b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
586b7e1c893Smrg                             MEGA_FETCH_COUNT(16));
587b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
588b7e1c893Smrg                                 DST_REL(ABSOLUTE),
589b7e1c893Smrg                                 DST_SEL_X(SQ_SEL_X),
590b7e1c893Smrg                                 DST_SEL_Y(SQ_SEL_Y),
591b7e1c893Smrg                                 DST_SEL_Z(SQ_SEL_0),
592b7e1c893Smrg                                 DST_SEL_W(SQ_SEL_1),
593b7e1c893Smrg                                 USE_CONST_FIELDS(0),
594b7e1c893Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
595ad43ddacSmrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
596b7e1c893Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
597b7e1c893Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
598b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
599b7e1c893Smrg                             ENDIAN_SWAP(ENDIAN_NONE),
600b7e1c893Smrg                             CONST_BUF_NO_STRIDE(0),
601b7e1c893Smrg                             MEGA_FETCH(1));
602b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
603ad43ddacSmrg    /* 8/9 */
604b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
605b7e1c893Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
606b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
607b7e1c893Smrg                             BUFFER_ID(0),
608b7e1c893Smrg                             SRC_GPR(0),
609b7e1c893Smrg                             SRC_REL(ABSOLUTE),
610b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
611b7e1c893Smrg                             MEGA_FETCH_COUNT(8));
612b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
613b7e1c893Smrg                                 DST_REL(ABSOLUTE),
614b7e1c893Smrg                                 DST_SEL_X(SQ_SEL_X),
615b7e1c893Smrg                                 DST_SEL_Y(SQ_SEL_Y),
616b7e1c893Smrg                                 DST_SEL_Z(SQ_SEL_0),
617b7e1c893Smrg                                 DST_SEL_W(SQ_SEL_1),
618b7e1c893Smrg                                 USE_CONST_FIELDS(0),
619b7e1c893Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
620ad43ddacSmrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
621b7e1c893Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
622b7e1c893Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
623b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
624b7e1c893Smrg                             ENDIAN_SWAP(ENDIAN_NONE),
625b7e1c893Smrg                             CONST_BUF_NO_STRIDE(0),
626b7e1c893Smrg                             MEGA_FETCH(0));
627b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
628b7e1c893Smrg
629b7e1c893Smrg    return i;
630b7e1c893Smrg}
631b7e1c893Smrg
632b7e1c893Smrgint R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
633b7e1c893Smrg{
634b7e1c893Smrg    int i = 0;
635b7e1c893Smrg
636b7e1c893Smrg    /* 0 */
637ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(16));
638b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
639b7e1c893Smrg                            CF_CONST(0),
640b7e1c893Smrg                            COND(SQ_CF_COND_BOOL),
641b7e1c893Smrg                            I_COUNT(0),
642b7e1c893Smrg                            CALL_COUNT(0),
643b7e1c893Smrg                            END_OF_PROGRAM(0),
644b7e1c893Smrg                            VALID_PIXEL_MODE(0),
645b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
646b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
647b7e1c893Smrg                            BARRIER(0));
648b7e1c893Smrg    /* 1 */
649ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(24));
650b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
651b7e1c893Smrg                            CF_CONST(0),
652b7e1c893Smrg                            COND(SQ_CF_COND_NOT_BOOL),
653b7e1c893Smrg                            I_COUNT(0),
654b7e1c893Smrg                            CALL_COUNT(0),
655b7e1c893Smrg                            END_OF_PROGRAM(0),
656b7e1c893Smrg                            VALID_PIXEL_MODE(0),
657b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
658b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
659b7e1c893Smrg                            BARRIER(0));
660b7e1c893Smrg    /* 2 */
661b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
662b7e1c893Smrg                                KCACHE_BANK0(0),
663b7e1c893Smrg                                KCACHE_BANK1(0),
664b7e1c893Smrg                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
665b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
666b7e1c893Smrg                                KCACHE_ADDR0(0),
667b7e1c893Smrg                                KCACHE_ADDR1(0),
668ad43ddacSmrg                                I_COUNT(12),
669b7e1c893Smrg                                USES_WATERFALL(0),
670b7e1c893Smrg                                CF_INST(SQ_CF_INST_ALU),
671b7e1c893Smrg                                WHOLE_QUAD_MODE(0),
672b7e1c893Smrg                                BARRIER(1));
673b7e1c893Smrg    /* 3 */
674b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
675b7e1c893Smrg                                          TYPE(SQ_EXPORT_PIXEL),
676b7e1c893Smrg                                          RW_GPR(2),
677b7e1c893Smrg                                          RW_REL(ABSOLUTE),
678b7e1c893Smrg                                          INDEX_GPR(0),
679b7e1c893Smrg                                          ELEM_SIZE(3));
680b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
681b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
682b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
683b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
684b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
685b7e1c893Smrg                                               BURST_COUNT(1),
686b7e1c893Smrg                                               END_OF_PROGRAM(1),
687b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
688b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
689b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
690b7e1c893Smrg                                               BARRIER(1));
691ad43ddacSmrg    /* 4,5,6,7 */
692ad43ddacSmrg    /* r2.x = MAD(c0.w, r1.x, c0.x) */
693ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
694b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
695ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
696b7e1c893Smrg                             SRC0_NEG(0),
697ad43ddacSmrg                             SRC1_SEL(1),
698b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
699b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
700b7e1c893Smrg                             SRC1_NEG(0),
701b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
702b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
703b7e1c893Smrg                             LAST(0));
704ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
705b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
706ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
707b7e1c893Smrg                                 SRC2_NEG(0),
708b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
709b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
710ad43ddacSmrg                                 DST_GPR(2),
711b7e1c893Smrg                                 DST_REL(ABSOLUTE),
712b7e1c893Smrg                                 DST_ELEM(ELEM_X),
713ad43ddacSmrg                                 CLAMP(0));
714ad43ddacSmrg    /* r2.y = MAD(c0.w, r1.x, c0.y) */
715ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
716b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
717ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
718b7e1c893Smrg                             SRC0_NEG(0),
719ad43ddacSmrg                             SRC1_SEL(1),
720b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
721ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
722b7e1c893Smrg                             SRC1_NEG(0),
723b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
724b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
725b7e1c893Smrg                             LAST(0));
726ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
727b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
728ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
729b7e1c893Smrg                                 SRC2_NEG(0),
730b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
731b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
732ad43ddacSmrg                                 DST_GPR(2),
733b7e1c893Smrg                                 DST_REL(ABSOLUTE),
734b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
735b7e1c893Smrg                                 CLAMP(0));
736ad43ddacSmrg    /* r2.z = MAD(c0.w, r1.x, c0.z) */
737ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
738b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
739ad43ddacSmrg                             SRC0_ELEM(ELEM_W),
740b7e1c893Smrg                             SRC0_NEG(0),
741ad43ddacSmrg                             SRC1_SEL(1),
742b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
743ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
744b7e1c893Smrg                             SRC1_NEG(0),
745b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
746b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
747b7e1c893Smrg                             LAST(0));
748ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
749b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
750ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
751b7e1c893Smrg                                 SRC2_NEG(0),
752b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
753b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
754ad43ddacSmrg                                 DST_GPR(2),
755b7e1c893Smrg                                 DST_REL(ABSOLUTE),
756b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
757b7e1c893Smrg                                 CLAMP(0));
758ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
759b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
760b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
761b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
762b7e1c893Smrg                             SRC0_NEG(0),
763b7e1c893Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
764b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
765b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
766b7e1c893Smrg                             SRC1_NEG(0),
767b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
768b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
769b7e1c893Smrg                             LAST(1));
770ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
771ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
772ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
773ad43ddacSmrg                                 SRC2_NEG(0),
774ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
775b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
776ad43ddacSmrg                                 DST_GPR(2),
777b7e1c893Smrg                                 DST_REL(ABSOLUTE),
778b7e1c893Smrg                                 DST_ELEM(ELEM_W),
779b7e1c893Smrg                                 CLAMP(0));
780ad43ddacSmrg
781ad43ddacSmrg    /* 8,9,10,11 */
782ad43ddacSmrg    /* r2.x = MAD(c1.x, r1.y, pv.x) */
783ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
784b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
785b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
786b7e1c893Smrg                             SRC0_NEG(0),
787ad43ddacSmrg                             SRC1_SEL(1),
788b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
789ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
790b7e1c893Smrg                             SRC1_NEG(0),
791b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
792b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
793b7e1c893Smrg                             LAST(0));
794ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
795ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
796ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
797ad43ddacSmrg                                 SRC2_NEG(0),
798ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
799ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
800b7e1c893Smrg                                 DST_GPR(2),
801b7e1c893Smrg                                 DST_REL(ABSOLUTE),
802b7e1c893Smrg                                 DST_ELEM(ELEM_X),
803ad43ddacSmrg                                 CLAMP(0));
804ad43ddacSmrg    /* r2.y = MAD(c1.y, r1.y, pv.y) */
805ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
806b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
807b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
808b7e1c893Smrg                             SRC0_NEG(0),
809ad43ddacSmrg                             SRC1_SEL(1),
810b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
811b7e1c893Smrg                             SRC1_ELEM(ELEM_Y),
812b7e1c893Smrg                             SRC1_NEG(0),
813b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
814b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
815b7e1c893Smrg                             LAST(0));
816ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
817ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
818ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
819ad43ddacSmrg                                 SRC2_NEG(0),
820ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
821ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
822ad43ddacSmrg                                 DST_GPR(2),
823b7e1c893Smrg                                 DST_REL(ABSOLUTE),
824b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
825ad43ddacSmrg                                 CLAMP(0));
826ad43ddacSmrg    /* r2.z = MAD(c1.z, r1.y, pv.z) */
827ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
828b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
829b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
830b7e1c893Smrg                             SRC0_NEG(0),
831ad43ddacSmrg                             SRC1_SEL(1),
832b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
833ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
834b7e1c893Smrg                             SRC1_NEG(0),
835b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
836b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
837b7e1c893Smrg                             LAST(0));
838ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
839ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
840ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
841ad43ddacSmrg                                 SRC2_NEG(0),
842ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
843ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
844ad43ddacSmrg                                 DST_GPR(2),
845b7e1c893Smrg                                 DST_REL(ABSOLUTE),
846b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
847ad43ddacSmrg                                 CLAMP(0));
848ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
849ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
850b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
851ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
852b7e1c893Smrg                             SRC0_NEG(0),
853ad43ddacSmrg                             SRC1_SEL(SQ_ALU_SRC_0),
854b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
855ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
856b7e1c893Smrg                             SRC1_NEG(0),
857b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
858b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
859b7e1c893Smrg                             LAST(1));
860ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
861ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
862ad43ddacSmrg                                 SRC2_ELEM(ELEM_W),
863ad43ddacSmrg                                 SRC2_NEG(0),
864ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
865ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
866ad43ddacSmrg                                 DST_GPR(2),
867b7e1c893Smrg                                 DST_REL(ABSOLUTE),
868b7e1c893Smrg                                 DST_ELEM(ELEM_W),
869ad43ddacSmrg                                 CLAMP(0));
870ad43ddacSmrg    /* 12,13,14,15 */
871ad43ddacSmrg    /* r2.x = MAD(c2.x, r1.z, pv.x) */
872ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
873b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
874b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
875b7e1c893Smrg                             SRC0_NEG(0),
876ad43ddacSmrg                             SRC1_SEL(1),
877b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
878ad43ddacSmrg                             SRC1_ELEM(ELEM_Z),
879b7e1c893Smrg                             SRC1_NEG(0),
880b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
881b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
882b7e1c893Smrg                             LAST(0));
883ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
884ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
885ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
886ad43ddacSmrg                                 SRC2_NEG(0),
887ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
888ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
889ad43ddacSmrg                                 DST_GPR(2),
890b7e1c893Smrg                                 DST_REL(ABSOLUTE),
891b7e1c893Smrg                                 DST_ELEM(ELEM_X),
892b7e1c893Smrg                                 CLAMP(1));
893ad43ddacSmrg    /* r2.y = MAD(c2.y, r1.z, pv.y) */
894ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
895b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
896b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
897b7e1c893Smrg                             SRC0_NEG(0),
898ad43ddacSmrg                             SRC1_SEL(1),
899b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
900ad43ddacSmrg                             SRC1_ELEM(ELEM_Z),
901b7e1c893Smrg                             SRC1_NEG(0),
902b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
903b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
904b7e1c893Smrg                             LAST(0));
905ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
906ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
907ad43ddacSmrg                                 SRC2_ELEM(ELEM_Y),
908ad43ddacSmrg                                 SRC2_NEG(0),
909ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
910ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
911b7e1c893Smrg                                 DST_GPR(2),
912b7e1c893Smrg                                 DST_REL(ABSOLUTE),
913b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
914b7e1c893Smrg                                 CLAMP(1));
915ad43ddacSmrg    /* r2.z = MAD(c2.z, r1.z, pv.z) */
916ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
917b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
918b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
919b7e1c893Smrg                             SRC0_NEG(0),
920ad43ddacSmrg                             SRC1_SEL(1),
921b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
922b7e1c893Smrg                             SRC1_ELEM(ELEM_Z),
923b7e1c893Smrg                             SRC1_NEG(0),
924b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
925b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
926b7e1c893Smrg                             LAST(0));
927ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
928ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
929ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
930ad43ddacSmrg                                 SRC2_NEG(0),
931ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
932ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
933ad43ddacSmrg                                 DST_GPR(2),
934b7e1c893Smrg                                 DST_REL(ABSOLUTE),
935b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
936b7e1c893Smrg                                 CLAMP(1));
937ad43ddacSmrg    /* r2.w = MAD(0, 0, 1) */
938ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
939b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
940b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
941b7e1c893Smrg                             SRC0_NEG(0),
942ad43ddacSmrg                             SRC1_SEL(SQ_ALU_SRC_0),
943b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
944b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
945b7e1c893Smrg                             SRC1_NEG(0),
946b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
947b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
948b7e1c893Smrg                             LAST(1));
949ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
950ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
951ad43ddacSmrg                                 SRC2_ELEM(ELEM_X),
952ad43ddacSmrg                                 SRC2_NEG(0),
953ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
954ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
955ad43ddacSmrg                                 DST_GPR(2),
956b7e1c893Smrg                                 DST_REL(ABSOLUTE),
957b7e1c893Smrg                                 DST_ELEM(ELEM_W),
958b7e1c893Smrg                                 CLAMP(1));
959ad43ddacSmrg
960ad43ddacSmrg    /* 16 */
961ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(18));
962b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
963b7e1c893Smrg                            CF_CONST(0),
964b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
965b7e1c893Smrg                            I_COUNT(3),
966b7e1c893Smrg                            CALL_COUNT(0),
967b7e1c893Smrg                            END_OF_PROGRAM(0),
968b7e1c893Smrg                            VALID_PIXEL_MODE(0),
969b7e1c893Smrg                            CF_INST(SQ_CF_INST_TEX),
970b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
971b7e1c893Smrg                            BARRIER(1));
972ad43ddacSmrg    /* 17 */
973b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
974b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
975b7e1c893Smrg			    CF_CONST(0),
976b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
977b7e1c893Smrg			    I_COUNT(0),
978b7e1c893Smrg			    CALL_COUNT(0),
979b7e1c893Smrg			    END_OF_PROGRAM(0),
980b7e1c893Smrg			    VALID_PIXEL_MODE(0),
981b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
982b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
983b7e1c893Smrg			    BARRIER(1));
984ad43ddacSmrg    /* 18/19 */
985b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
986b7e1c893Smrg                             BC_FRAC_MODE(0),
987b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
988b7e1c893Smrg                             RESOURCE_ID(0),
989b7e1c893Smrg                             SRC_GPR(0),
990b7e1c893Smrg                             SRC_REL(ABSOLUTE),
991b7e1c893Smrg                             R7xx_ALT_CONST(0));
992b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
993b7e1c893Smrg                             DST_REL(ABSOLUTE),
994b7e1c893Smrg                             DST_SEL_X(SQ_SEL_X),
995b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
996b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
997b7e1c893Smrg                             DST_SEL_W(SQ_SEL_1),
998b7e1c893Smrg                             LOD_BIAS(0),
999b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1000b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1001b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1002b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1003b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1004b7e1c893Smrg                             OFFSET_Y(0),
1005b7e1c893Smrg                             OFFSET_Z(0),
1006b7e1c893Smrg                             SAMPLER_ID(0),
1007b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1008b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1009b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1010b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1011b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1012ad43ddacSmrg    /* 20/21 */
1013b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1014b7e1c893Smrg                             BC_FRAC_MODE(0),
1015b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1016b7e1c893Smrg                             RESOURCE_ID(1),
1017b7e1c893Smrg                             SRC_GPR(0),
1018b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1019b7e1c893Smrg                             R7xx_ALT_CONST(0));
1020b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1021b7e1c893Smrg                             DST_REL(ABSOLUTE),
1022b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1023b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1024b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_X),
1025b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1026b7e1c893Smrg                             LOD_BIAS(0),
1027b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1028b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1029b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1030b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1031b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1032b7e1c893Smrg                             OFFSET_Y(0),
1033b7e1c893Smrg                             OFFSET_Z(0),
1034b7e1c893Smrg                             SAMPLER_ID(1),
1035b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1036b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1037b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1038b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1039b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1040ad43ddacSmrg    /* 22/23 */
1041b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1042b7e1c893Smrg                             BC_FRAC_MODE(0),
1043b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1044b7e1c893Smrg                             RESOURCE_ID(2),
1045b7e1c893Smrg                             SRC_GPR(0),
1046b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1047b7e1c893Smrg                             R7xx_ALT_CONST(0));
1048b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1049b7e1c893Smrg                             DST_REL(ABSOLUTE),
1050b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1051b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_X),
1052b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1053b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1054b7e1c893Smrg                             LOD_BIAS(0),
1055b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1056b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1057b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1058b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1059b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1060b7e1c893Smrg                             OFFSET_Y(0),
1061b7e1c893Smrg                             OFFSET_Z(0),
1062b7e1c893Smrg                             SAMPLER_ID(2),
1063b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1064b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1065b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1066b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1067b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1068ad43ddacSmrg    /* 24 */
1069ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(26));
1070b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1071b7e1c893Smrg                            CF_CONST(0),
1072b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
1073b7e1c893Smrg                            I_COUNT(2),
1074b7e1c893Smrg                            CALL_COUNT(0),
1075b7e1c893Smrg                            END_OF_PROGRAM(0),
1076b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1077b7e1c893Smrg                            CF_INST(SQ_CF_INST_TEX),
1078b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1079b7e1c893Smrg                            BARRIER(1));
1080ad43ddacSmrg    /* 25 */
1081b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1082b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1083b7e1c893Smrg			    CF_CONST(0),
1084b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1085b7e1c893Smrg			    I_COUNT(0),
1086b7e1c893Smrg			    CALL_COUNT(0),
1087b7e1c893Smrg			    END_OF_PROGRAM(0),
1088b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1089b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1090b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1091b7e1c893Smrg			    BARRIER(1));
1092ad43ddacSmrg    /* 26/27 */
1093b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1094b7e1c893Smrg                             BC_FRAC_MODE(0),
1095b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1096b7e1c893Smrg                             RESOURCE_ID(0),
1097b7e1c893Smrg                             SRC_GPR(0),
1098b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1099b7e1c893Smrg                             R7xx_ALT_CONST(0));
1100b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1101b7e1c893Smrg                             DST_REL(ABSOLUTE),
1102b7e1c893Smrg                             DST_SEL_X(SQ_SEL_X),
1103b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1104b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1105b7e1c893Smrg                             DST_SEL_W(SQ_SEL_1),
1106b7e1c893Smrg                             LOD_BIAS(0),
1107b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1108b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1109b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1110b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1111b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1112b7e1c893Smrg                             OFFSET_Y(0),
1113b7e1c893Smrg                             OFFSET_Z(0),
1114b7e1c893Smrg                             SAMPLER_ID(0),
1115b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1116b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1117b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1118b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1119b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1120ad43ddacSmrg    /* 28/29 */
1121b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1122b7e1c893Smrg                             BC_FRAC_MODE(0),
1123b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1124b7e1c893Smrg                             RESOURCE_ID(1),
1125b7e1c893Smrg                             SRC_GPR(0),
1126b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1127b7e1c893Smrg                             R7xx_ALT_CONST(0));
1128b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1129b7e1c893Smrg                             DST_REL(ABSOLUTE),
1130b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1131b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_X),
1132b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_Y),
1133b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1134b7e1c893Smrg                             LOD_BIAS(0),
1135b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1136b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1137b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1138b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1139b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1140b7e1c893Smrg                             OFFSET_Y(0),
1141b7e1c893Smrg                             OFFSET_Z(0),
1142b7e1c893Smrg                             SAMPLER_ID(1),
1143b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1144b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1145b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1146b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1147b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1148b7e1c893Smrg
1149b7e1c893Smrg    return i;
1150b7e1c893Smrg}
1151b7e1c893Smrg
1152b7e1c893Smrg/* comp mask ps --------------------------------------- */
1153b7e1c893Smrgint R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* shader)
1154b7e1c893Smrg{
1155b7e1c893Smrg    int i = 0;
1156b7e1c893Smrg
1157b7e1c893Smrg    /* 0 */
1158b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(8));
1159b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1160b7e1c893Smrg			    CF_CONST(0),
1161b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1162b7e1c893Smrg			    I_COUNT(2),
1163b7e1c893Smrg			    CALL_COUNT(0),
1164b7e1c893Smrg			    END_OF_PROGRAM(0),
1165b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1166b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
1167b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1168b7e1c893Smrg			    BARRIER(1));
1169b7e1c893Smrg
1170b7e1c893Smrg    /* 1 */
1171b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(3),
1172b7e1c893Smrg				KCACHE_BANK0(0),
1173b7e1c893Smrg				KCACHE_BANK1(0),
1174b7e1c893Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1175b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1176b7e1c893Smrg				KCACHE_ADDR0(0),
1177b7e1c893Smrg				KCACHE_ADDR1(0),
1178b7e1c893Smrg				I_COUNT(4),
1179b7e1c893Smrg				USES_WATERFALL(0),
1180b7e1c893Smrg				CF_INST(SQ_CF_INST_ALU),
1181b7e1c893Smrg				WHOLE_QUAD_MODE(0),
1182b7e1c893Smrg				BARRIER(1));
1183b7e1c893Smrg
1184b7e1c893Smrg    /* 2 */
1185b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
1186b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
1187b7e1c893Smrg					  RW_GPR(2),
1188b7e1c893Smrg					  RW_REL(ABSOLUTE),
1189b7e1c893Smrg					  INDEX_GPR(0),
1190b7e1c893Smrg					  ELEM_SIZE(1));
1191b7e1c893Smrg
1192b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1193b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1194b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
1195b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
1196b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1197b7e1c893Smrg					       BURST_COUNT(1),
1198b7e1c893Smrg					       END_OF_PROGRAM(1),
1199b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1200b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1201b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1202b7e1c893Smrg					       BARRIER(1));
1203b7e1c893Smrg
1204b7e1c893Smrg    /* 3 - alu 0 */
1205b7e1c893Smrg    /* MUL gpr[2].x gpr[1].x gpr[0].x */
1206b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1207b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
1208b7e1c893Smrg			     SRC0_ELEM(ELEM_X),
1209b7e1c893Smrg			     SRC0_NEG(0),
1210b7e1c893Smrg			     SRC1_SEL(0),
1211b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
1212b7e1c893Smrg			     SRC1_ELEM(ELEM_X),
1213b7e1c893Smrg			     SRC1_NEG(0),
1214b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
1215b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
1216b7e1c893Smrg			     LAST(0));
1217b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1218b7e1c893Smrg				 SRC0_ABS(0),
1219b7e1c893Smrg				 SRC1_ABS(0),
1220b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
1221b7e1c893Smrg				 UPDATE_PRED(0),
1222b7e1c893Smrg				 WRITE_MASK(1),
1223b7e1c893Smrg				 FOG_MERGE(0),
1224b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
1225b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MUL),
1226b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1227b7e1c893Smrg				 DST_GPR(2),
1228b7e1c893Smrg				 DST_REL(ABSOLUTE),
1229b7e1c893Smrg				 DST_ELEM(ELEM_X),
1230b7e1c893Smrg				 CLAMP(1));
1231b7e1c893Smrg    /* 4 - alu 1 */
1232b7e1c893Smrg    /* MUL gpr[2].y gpr[1].y gpr[0].y */
1233b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1234b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
1235b7e1c893Smrg			     SRC0_ELEM(ELEM_Y),
1236b7e1c893Smrg			     SRC0_NEG(0),
1237b7e1c893Smrg			     SRC1_SEL(0),
1238b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
1239b7e1c893Smrg			     SRC1_ELEM(ELEM_Y),
1240b7e1c893Smrg			     SRC1_NEG(0),
1241b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
1242b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
1243b7e1c893Smrg			     LAST(0));
1244b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1245b7e1c893Smrg				 SRC0_ABS(0),
1246b7e1c893Smrg				 SRC1_ABS(0),
1247b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
1248b7e1c893Smrg				 UPDATE_PRED(0),
1249b7e1c893Smrg				 WRITE_MASK(1),
1250b7e1c893Smrg				 FOG_MERGE(0),
1251b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
1252b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MUL),
1253b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1254b7e1c893Smrg				 DST_GPR(2),
1255b7e1c893Smrg				 DST_REL(ABSOLUTE),
1256b7e1c893Smrg				 DST_ELEM(ELEM_Y),
1257b7e1c893Smrg				 CLAMP(1));
1258b7e1c893Smrg    /* 5 - alu 2 */
1259b7e1c893Smrg    /* MUL gpr[2].z gpr[1].z gpr[0].z */
1260b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1261b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
1262b7e1c893Smrg			     SRC0_ELEM(ELEM_Z),
1263b7e1c893Smrg			     SRC0_NEG(0),
1264b7e1c893Smrg			     SRC1_SEL(0),
1265b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
1266b7e1c893Smrg			     SRC1_ELEM(ELEM_Z),
1267b7e1c893Smrg			     SRC1_NEG(0),
1268b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
1269b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
1270b7e1c893Smrg			     LAST(0));
1271b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1272b7e1c893Smrg				 SRC0_ABS(0),
1273b7e1c893Smrg				 SRC1_ABS(0),
1274b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
1275b7e1c893Smrg				 UPDATE_PRED(0),
1276b7e1c893Smrg				 WRITE_MASK(1),
1277b7e1c893Smrg				 FOG_MERGE(0),
1278b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
1279b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MUL),
1280b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1281b7e1c893Smrg				 DST_GPR(2),
1282b7e1c893Smrg				 DST_REL(ABSOLUTE),
1283b7e1c893Smrg				 DST_ELEM(ELEM_Z),
1284b7e1c893Smrg				 CLAMP(1));
1285b7e1c893Smrg    /* 6 - alu 3 */
1286b7e1c893Smrg    /* MUL gpr[2].w gpr[1].w gpr[0].w */
1287b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1288b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
1289b7e1c893Smrg			     SRC0_ELEM(ELEM_W),
1290b7e1c893Smrg			     SRC0_NEG(0),
1291b7e1c893Smrg			     SRC1_SEL(0),
1292b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
1293b7e1c893Smrg			     SRC1_ELEM(ELEM_W),
1294b7e1c893Smrg			     SRC1_NEG(0),
1295b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
1296b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
1297b7e1c893Smrg			     LAST(1));
1298b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1299b7e1c893Smrg				 SRC0_ABS(0),
1300b7e1c893Smrg				 SRC1_ABS(0),
1301b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
1302b7e1c893Smrg				 UPDATE_PRED(0),
1303b7e1c893Smrg				 WRITE_MASK(1),
1304b7e1c893Smrg				 FOG_MERGE(0),
1305b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
1306b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MUL),
1307b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1308b7e1c893Smrg				 DST_GPR(2),
1309b7e1c893Smrg				 DST_REL(ABSOLUTE),
1310b7e1c893Smrg				 DST_ELEM(ELEM_W),
1311b7e1c893Smrg				 CLAMP(1));
1312b7e1c893Smrg    /* 7 */
1313b7e1c893Smrg    shader[i++] = 0x00000000;
1314b7e1c893Smrg    shader[i++] = 0x00000000;
1315b7e1c893Smrg
1316b7e1c893Smrg    /* 8/9 - src */
1317b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1318b7e1c893Smrg			     BC_FRAC_MODE(0),
1319b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
1320b7e1c893Smrg			     RESOURCE_ID(0),
1321b7e1c893Smrg			     SRC_GPR(0),
1322b7e1c893Smrg			     SRC_REL(ABSOLUTE),
1323b7e1c893Smrg			     R7xx_ALT_CONST(0));
1324b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
1325b7e1c893Smrg			     DST_REL(ABSOLUTE),
1326b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X),
1327b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y),
1328b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z),
1329b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W),
1330b7e1c893Smrg			     LOD_BIAS(0),
1331b7e1c893Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
1332b7e1c893Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
1333b7e1c893Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
1334b7e1c893Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
1335b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1336b7e1c893Smrg			     OFFSET_Y(0),
1337b7e1c893Smrg			     OFFSET_Z(0),
1338b7e1c893Smrg			     SAMPLER_ID(0),
1339b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
1340b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
1341b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
1342b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
1343b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1344b7e1c893Smrg    /* 10/11 - mask */
1345b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1346b7e1c893Smrg			     BC_FRAC_MODE(0),
1347b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
1348b7e1c893Smrg			     RESOURCE_ID(1),
1349b7e1c893Smrg			     SRC_GPR(1),
1350b7e1c893Smrg			     SRC_REL(ABSOLUTE),
1351b7e1c893Smrg			     R7xx_ALT_CONST(0));
1352b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1353b7e1c893Smrg			     DST_REL(ABSOLUTE),
1354b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X),
1355b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y),
1356b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z),
1357b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W),
1358b7e1c893Smrg			     LOD_BIAS(0),
1359b7e1c893Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
1360b7e1c893Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
1361b7e1c893Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
1362b7e1c893Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
1363b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1364b7e1c893Smrg			     OFFSET_Y(0),
1365b7e1c893Smrg			     OFFSET_Z(0),
1366b7e1c893Smrg			     SAMPLER_ID(1),
1367b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
1368b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
1369b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
1370b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
1371b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1372b7e1c893Smrg
1373b7e1c893Smrg    return i;
1374b7e1c893Smrg}
1375b7e1c893Smrg
1376b7e1c893Smrg/* comp vs --------------------------------------- */
1377b7e1c893Smrgint R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1378b7e1c893Smrg{
1379b7e1c893Smrg    int i = 0;
1380b7e1c893Smrg
1381b7e1c893Smrg    /* 0 */
1382b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(3));
1383b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1384b7e1c893Smrg                            CF_CONST(0),
1385b7e1c893Smrg                            COND(SQ_CF_COND_BOOL),
1386b7e1c893Smrg                            I_COUNT(0),
1387b7e1c893Smrg                            CALL_COUNT(0),
1388b7e1c893Smrg                            END_OF_PROGRAM(0),
1389b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1390b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
1391b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1392b7e1c893Smrg                            BARRIER(0));
1393b7e1c893Smrg    /* 1 */
1394ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(28));
1395b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1396b7e1c893Smrg                            CF_CONST(0),
1397b7e1c893Smrg                            COND(SQ_CF_COND_NOT_BOOL),
1398b7e1c893Smrg                            I_COUNT(0),
1399b7e1c893Smrg                            CALL_COUNT(0),
1400b7e1c893Smrg                            END_OF_PROGRAM(0),
1401b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1402b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
1403b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1404b7e1c893Smrg                            BARRIER(0));
1405b7e1c893Smrg    /* 2 */
14062f39173dSmrg    shader[i++] = CF_DWORD0(ADDR(0));
1407b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1408b7e1c893Smrg                            CF_CONST(0),
1409b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
1410b7e1c893Smrg                            I_COUNT(0),
1411b7e1c893Smrg                            CALL_COUNT(0),
1412b7e1c893Smrg                            END_OF_PROGRAM(1),
1413b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1414b7e1c893Smrg                            CF_INST(SQ_CF_INST_NOP),
1415b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1416b7e1c893Smrg                            BARRIER(1));
1417b7e1c893Smrg    /* 3 - mask sub */
1418ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(22));
1419b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1420b7e1c893Smrg			    CF_CONST(0),
1421b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1422b7e1c893Smrg			    I_COUNT(3),
1423b7e1c893Smrg			    CALL_COUNT(0),
1424b7e1c893Smrg			    END_OF_PROGRAM(0),
1425b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1426b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
1427b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1428b7e1c893Smrg			    BARRIER(1));
1429ad43ddacSmrg
1430ad43ddacSmrg    /* 4 - ALU */
1431ad43ddacSmrg    shader[i++] = CF_ALU_DWORD0(ADDR(9),
1432ad43ddacSmrg				KCACHE_BANK0(0),
1433ad43ddacSmrg				KCACHE_BANK1(0),
1434ad43ddacSmrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1435ad43ddacSmrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1436ad43ddacSmrg				KCACHE_ADDR0(0),
1437ad43ddacSmrg				KCACHE_ADDR1(0),
1438ad43ddacSmrg				I_COUNT(12),
1439ad43ddacSmrg				USES_WATERFALL(0),
1440ad43ddacSmrg				CF_INST(SQ_CF_INST_ALU),
1441ad43ddacSmrg				WHOLE_QUAD_MODE(0),
1442ad43ddacSmrg				BARRIER(1));
1443ad43ddacSmrg
1444ad43ddacSmrg    /* 5 - dst */
1445b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1446b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
1447b7e1c893Smrg					  RW_GPR(2),
1448b7e1c893Smrg					  RW_REL(ABSOLUTE),
1449b7e1c893Smrg					  INDEX_GPR(0),
1450b7e1c893Smrg					  ELEM_SIZE(0));
1451b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1452b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1453ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1454ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1455b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1456b7e1c893Smrg					       BURST_COUNT(1),
1457b7e1c893Smrg					       END_OF_PROGRAM(0),
1458b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1459b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1460b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1461b7e1c893Smrg					       BARRIER(1));
1462ad43ddacSmrg    /* 6 - src */
1463b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1464b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1465b7e1c893Smrg					  RW_GPR(1),
1466b7e1c893Smrg					  RW_REL(ABSOLUTE),
1467b7e1c893Smrg					  INDEX_GPR(0),
1468b7e1c893Smrg					  ELEM_SIZE(0));
1469b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1470b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1471ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1472ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1473b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1474b7e1c893Smrg					       BURST_COUNT(1),
1475b7e1c893Smrg					       END_OF_PROGRAM(0),
1476b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1477b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT),
1478b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1479b7e1c893Smrg					       BARRIER(0));
1480ad43ddacSmrg    /* 7 - mask */
1481b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1482b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1483b7e1c893Smrg					  RW_GPR(0),
1484b7e1c893Smrg					  RW_REL(ABSOLUTE),
1485b7e1c893Smrg					  INDEX_GPR(0),
1486b7e1c893Smrg					  ELEM_SIZE(0));
1487b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1488b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1489ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1490ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1491b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1492b7e1c893Smrg					       BURST_COUNT(1),
1493b7e1c893Smrg					       END_OF_PROGRAM(0),
1494b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1495b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1496b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1497b7e1c893Smrg					       BARRIER(0));
1498ad43ddacSmrg    /* 8 */
1499b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1500b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1501b7e1c893Smrg			    CF_CONST(0),
1502b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1503b7e1c893Smrg			    I_COUNT(0),
1504b7e1c893Smrg			    CALL_COUNT(0),
1505b7e1c893Smrg			    END_OF_PROGRAM(0),
1506b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1507b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1508b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1509b7e1c893Smrg			    BARRIER(1));
1510b7e1c893Smrg
1511ad43ddacSmrg
1512ad43ddacSmrg    /* 9 srcX MAD */
1513ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
1514ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1515ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1516ad43ddacSmrg                             SRC0_NEG(0),
1517ad43ddacSmrg                             SRC1_SEL(1),
1518ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1519ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1520ad43ddacSmrg                             SRC1_NEG(0),
1521ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1522ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1523ad43ddacSmrg                             LAST(1));
1524ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
1525ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1526ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1527ad43ddacSmrg                                 SRC2_NEG(0),
1528ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1529ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1530ad43ddacSmrg                                 DST_GPR(1),
1531ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1532ad43ddacSmrg                                 DST_ELEM(ELEM_Z),
1533ad43ddacSmrg                                 CLAMP(0));
1534ad43ddacSmrg    /* 10 srcY MAD */
1535ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
1536ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1537ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1538ad43ddacSmrg                             SRC0_NEG(0),
1539ad43ddacSmrg                             SRC1_SEL(1),
1540ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1541ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1542ad43ddacSmrg                             SRC1_NEG(0),
1543ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1544ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1545ad43ddacSmrg                             LAST(1));
1546ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257),
1547ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1548ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1549ad43ddacSmrg                                 SRC2_NEG(0),
1550ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1551ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1552ad43ddacSmrg                                 DST_GPR(1),
1553ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1554ad43ddacSmrg                                 DST_ELEM(ELEM_W),
1555ad43ddacSmrg                                 CLAMP(0));
1556ad43ddacSmrg
1557ad43ddacSmrg    /* 11 srcX MAD */
1558ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
1559ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1560ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1561ad43ddacSmrg                             SRC0_NEG(0),
1562ad43ddacSmrg                             SRC1_SEL(1),
1563ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1564ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1565ad43ddacSmrg                             SRC1_NEG(0),
1566ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1567ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1568ad43ddacSmrg                             LAST(0));
1569ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1),
1570ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1571ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1572ad43ddacSmrg                                 SRC2_NEG(0),
1573ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1574ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1575ad43ddacSmrg                                 DST_GPR(1),
1576ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1577ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1578ad43ddacSmrg                                 CLAMP(0));
1579ad43ddacSmrg    /* 12 srcY MAD */
1580ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
1581ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1582ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1583ad43ddacSmrg                             SRC0_NEG(0),
1584ad43ddacSmrg                             SRC1_SEL(1),
1585ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1586ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1587ad43ddacSmrg                             SRC1_NEG(0),
1588ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1589ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1590ad43ddacSmrg                             LAST(1));
1591ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1),
1592ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1593ad43ddacSmrg                                 SRC2_ELEM(ELEM_W),
1594ad43ddacSmrg                                 SRC2_NEG(0),
1595ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1596ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1597ad43ddacSmrg                                 DST_GPR(1),
1598ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1599ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1600ad43ddacSmrg                                 CLAMP(0));
1601ad43ddacSmrg
1602ad43ddacSmrg    /* 13 maskX MAD */
1603ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
1604ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1605ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1606ad43ddacSmrg                             SRC0_NEG(0),
1607ad43ddacSmrg                             SRC1_SEL(0),
1608ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1609ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1610ad43ddacSmrg                             SRC1_NEG(0),
1611ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1612ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1613ad43ddacSmrg                             LAST(1));
1614ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(258),
1615ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1616ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1617ad43ddacSmrg                                 SRC2_NEG(0),
1618ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1619ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1620ad43ddacSmrg                                 DST_GPR(0),
1621ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1622ad43ddacSmrg                                 DST_ELEM(ELEM_Z),
1623ad43ddacSmrg                                 CLAMP(0));
1624ad43ddacSmrg
1625ad43ddacSmrg    /* 14 maskY MAD */
1626ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(259),
1627ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1628ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1629ad43ddacSmrg                             SRC0_NEG(0),
1630ad43ddacSmrg                             SRC1_SEL(0),
1631ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1632ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1633ad43ddacSmrg                             SRC1_NEG(0),
1634ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1635ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1636ad43ddacSmrg                             LAST(1));
1637ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
1638ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1639ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1640ad43ddacSmrg                                 SRC2_NEG(0),
1641ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1642ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1643ad43ddacSmrg                                 DST_GPR(0),
1644ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1645ad43ddacSmrg                                 DST_ELEM(ELEM_W),
1646ad43ddacSmrg                                 CLAMP(0));
1647ad43ddacSmrg
1648ad43ddacSmrg    /* 15 srcX MAD */
1649ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(258),
1650ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1651ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1652ad43ddacSmrg                             SRC0_NEG(0),
1653ad43ddacSmrg                             SRC1_SEL(0),
1654ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1655ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1656ad43ddacSmrg                             SRC1_NEG(0),
1657ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1658ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1659ad43ddacSmrg                             LAST(0));
1660ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
1661ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1662ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1663ad43ddacSmrg                                 SRC2_NEG(0),
1664ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1665ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1666ad43ddacSmrg                                 DST_GPR(0),
1667ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1668ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1669ad43ddacSmrg                                 CLAMP(0));
1670ad43ddacSmrg    /* 16 srcY MAD */
1671ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(259),
1672ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1673ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1674ad43ddacSmrg                             SRC0_NEG(0),
1675ad43ddacSmrg                             SRC1_SEL(0),
1676ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1677ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
1678ad43ddacSmrg                             SRC1_NEG(0),
1679ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1680ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1681ad43ddacSmrg                             LAST(1));
1682ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
1683ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1684ad43ddacSmrg                                 SRC2_ELEM(ELEM_W),
1685ad43ddacSmrg                                 SRC2_NEG(0),
1686ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1687ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1688ad43ddacSmrg                                 DST_GPR(0),
1689ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1690ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1691ad43ddacSmrg                                 CLAMP(0));
1692ad43ddacSmrg
1693ad43ddacSmrg    /* 17 srcX / w */
1694ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1695ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1696ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1697ad43ddacSmrg                             SRC0_NEG(0),
1698ad43ddacSmrg                             SRC1_SEL(256),
1699ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1700ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1701ad43ddacSmrg                             SRC1_NEG(0),
1702ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1703ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1704ad43ddacSmrg                             LAST(1));
1705ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1706ad43ddacSmrg                                 SRC0_ABS(0),
1707ad43ddacSmrg                                 SRC1_ABS(0),
1708ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1709ad43ddacSmrg                                 UPDATE_PRED(0),
1710ad43ddacSmrg                                 WRITE_MASK(1),
1711ad43ddacSmrg                                 FOG_MERGE(0),
1712ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1713ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1714ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1715ad43ddacSmrg                                 DST_GPR(1),
1716ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1717ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1718ad43ddacSmrg                                 CLAMP(0));
1719ad43ddacSmrg
1720ad43ddacSmrg    /* 18 srcY / h */
1721ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1722ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1723ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1724ad43ddacSmrg                             SRC0_NEG(0),
1725ad43ddacSmrg                             SRC1_SEL(257),
1726ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1727ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1728ad43ddacSmrg                             SRC1_NEG(0),
1729ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1730ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1731ad43ddacSmrg                             LAST(1));
1732ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1733ad43ddacSmrg                                 SRC0_ABS(0),
1734ad43ddacSmrg                                 SRC1_ABS(0),
1735ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1736ad43ddacSmrg                                 UPDATE_PRED(0),
1737ad43ddacSmrg                                 WRITE_MASK(1),
1738ad43ddacSmrg                                 FOG_MERGE(0),
1739ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1740ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1741ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1742ad43ddacSmrg                                 DST_GPR(1),
1743ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1744ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1745ad43ddacSmrg                                 CLAMP(0));
1746ad43ddacSmrg
1747ad43ddacSmrg    /* 19 maskX / w */
1748ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
1749ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1750ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
1751ad43ddacSmrg                             SRC0_NEG(0),
1752ad43ddacSmrg                             SRC1_SEL(258),
1753ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1754ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1755ad43ddacSmrg                             SRC1_NEG(0),
1756ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1757ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1758ad43ddacSmrg                             LAST(1));
1759ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1760ad43ddacSmrg                                 SRC0_ABS(0),
1761ad43ddacSmrg                                 SRC1_ABS(0),
1762ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1763ad43ddacSmrg                                 UPDATE_PRED(0),
1764ad43ddacSmrg                                 WRITE_MASK(1),
1765ad43ddacSmrg                                 FOG_MERGE(0),
1766ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1767ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1768ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1769ad43ddacSmrg                                 DST_GPR(0),
1770ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1771ad43ddacSmrg                                 DST_ELEM(ELEM_X),
1772ad43ddacSmrg                                 CLAMP(0));
1773ad43ddacSmrg
1774ad43ddacSmrg    /* 20 maskY / h */
1775ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
1776ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1777ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1778ad43ddacSmrg                             SRC0_NEG(0),
1779ad43ddacSmrg                             SRC1_SEL(259),
1780ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1781ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
1782ad43ddacSmrg                             SRC1_NEG(0),
1783ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
1784ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1785ad43ddacSmrg                             LAST(1));
1786ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1787ad43ddacSmrg                                 SRC0_ABS(0),
1788ad43ddacSmrg                                 SRC1_ABS(0),
1789ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
1790ad43ddacSmrg                                 UPDATE_PRED(0),
1791ad43ddacSmrg                                 WRITE_MASK(1),
1792ad43ddacSmrg                                 FOG_MERGE(0),
1793ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
1794ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
1795ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1796ad43ddacSmrg                                 DST_GPR(0),
1797ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1798ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
1799ad43ddacSmrg                                 CLAMP(0));
1800ad43ddacSmrg    /* 21 */
1801ad43ddacSmrg    shader[i++] = 0x00000000;
1802ad43ddacSmrg    shader[i++] = 0x00000000;
1803ad43ddacSmrg
1804ad43ddacSmrg    /* 22/23 - dst */
1805ad43ddacSmrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1806ad43ddacSmrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1807ad43ddacSmrg			     FETCH_WHOLE_QUAD(0),
1808ad43ddacSmrg			     BUFFER_ID(0),
1809ad43ddacSmrg			     SRC_GPR(0),
1810ad43ddacSmrg			     SRC_REL(ABSOLUTE),
1811ad43ddacSmrg			     SRC_SEL_X(SQ_SEL_X),
1812ad43ddacSmrg			     MEGA_FETCH_COUNT(24));
1813ad43ddacSmrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
1814ad43ddacSmrg				 DST_REL(0),
1815ad43ddacSmrg				 DST_SEL_X(SQ_SEL_X),
1816ad43ddacSmrg				 DST_SEL_Y(SQ_SEL_Y),
1817ad43ddacSmrg				 DST_SEL_Z(SQ_SEL_0),
1818ad43ddacSmrg				 DST_SEL_W(SQ_SEL_1),
1819ad43ddacSmrg				 USE_CONST_FIELDS(0),
1820ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
1821ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
1822ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
1823ad43ddacSmrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1824ad43ddacSmrg    shader[i++] = VTX_DWORD2(OFFSET(0),
1825ad43ddacSmrg			     ENDIAN_SWAP(ENDIAN_NONE),
1826ad43ddacSmrg			     CONST_BUF_NO_STRIDE(0),
1827ad43ddacSmrg			     MEGA_FETCH(1));
1828ad43ddacSmrg    shader[i++] = VTX_DWORD_PAD;
1829ad43ddacSmrg    /* 24/25 - src */
1830ad43ddacSmrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1831ad43ddacSmrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1832ad43ddacSmrg			     FETCH_WHOLE_QUAD(0),
1833ad43ddacSmrg			     BUFFER_ID(0),
1834ad43ddacSmrg			     SRC_GPR(0),
1835ad43ddacSmrg			     SRC_REL(ABSOLUTE),
1836ad43ddacSmrg			     SRC_SEL_X(SQ_SEL_X),
1837ad43ddacSmrg			     MEGA_FETCH_COUNT(8));
1838ad43ddacSmrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
1839ad43ddacSmrg				 DST_REL(0),
1840ad43ddacSmrg				 DST_SEL_X(SQ_SEL_X),
1841ad43ddacSmrg				 DST_SEL_Y(SQ_SEL_Y),
1842ad43ddacSmrg				 DST_SEL_Z(SQ_SEL_1),
1843ad43ddacSmrg				 DST_SEL_W(SQ_SEL_0),
1844ad43ddacSmrg				 USE_CONST_FIELDS(0),
1845ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
1846ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
1847ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
1848ad43ddacSmrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1849ad43ddacSmrg    shader[i++] = VTX_DWORD2(OFFSET(8),
1850ad43ddacSmrg			     ENDIAN_SWAP(ENDIAN_NONE),
1851ad43ddacSmrg			     CONST_BUF_NO_STRIDE(0),
1852ad43ddacSmrg			     MEGA_FETCH(0));
1853ad43ddacSmrg    shader[i++] = VTX_DWORD_PAD;
1854ad43ddacSmrg    /* 26/27 - mask */
1855ad43ddacSmrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1856ad43ddacSmrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1857ad43ddacSmrg			     FETCH_WHOLE_QUAD(0),
1858ad43ddacSmrg			     BUFFER_ID(0),
1859ad43ddacSmrg			     SRC_GPR(0),
1860ad43ddacSmrg			     SRC_REL(ABSOLUTE),
1861ad43ddacSmrg			     SRC_SEL_X(SQ_SEL_X),
1862ad43ddacSmrg			     MEGA_FETCH_COUNT(8));
1863ad43ddacSmrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
1864ad43ddacSmrg				 DST_REL(0),
1865ad43ddacSmrg				 DST_SEL_X(SQ_SEL_X),
1866ad43ddacSmrg				 DST_SEL_Y(SQ_SEL_Y),
1867ad43ddacSmrg				 DST_SEL_Z(SQ_SEL_1),
1868ad43ddacSmrg				 DST_SEL_W(SQ_SEL_0),
1869ad43ddacSmrg				 USE_CONST_FIELDS(0),
1870ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
1871ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
1872ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
1873ad43ddacSmrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1874ad43ddacSmrg    shader[i++] = VTX_DWORD2(OFFSET(16),
1875ad43ddacSmrg			     ENDIAN_SWAP(ENDIAN_NONE),
1876ad43ddacSmrg			     CONST_BUF_NO_STRIDE(0),
1877ad43ddacSmrg			     MEGA_FETCH(0));
1878ad43ddacSmrg    shader[i++] = VTX_DWORD_PAD;
1879ad43ddacSmrg
1880ad43ddacSmrg    /* 28 - non-mask sub */
1881ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(40));
1882ad43ddacSmrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1883ad43ddacSmrg			    CF_CONST(0),
1884ad43ddacSmrg			    COND(SQ_CF_COND_ACTIVE),
1885ad43ddacSmrg			    I_COUNT(2),
1886ad43ddacSmrg			    CALL_COUNT(0),
1887ad43ddacSmrg			    END_OF_PROGRAM(0),
1888ad43ddacSmrg			    VALID_PIXEL_MODE(0),
1889ad43ddacSmrg			    CF_INST(SQ_CF_INST_VTX),
1890ad43ddacSmrg			    WHOLE_QUAD_MODE(0),
1891ad43ddacSmrg			    BARRIER(1));
1892ad43ddacSmrg
1893ad43ddacSmrg    /* 29 - ALU */
1894ad43ddacSmrg    shader[i++] = CF_ALU_DWORD0(ADDR(33),
1895ad43ddacSmrg				KCACHE_BANK0(0),
1896ad43ddacSmrg				KCACHE_BANK1(0),
1897ad43ddacSmrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1898ad43ddacSmrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1899ad43ddacSmrg				KCACHE_ADDR0(0),
1900ad43ddacSmrg				KCACHE_ADDR1(0),
1901ad43ddacSmrg				I_COUNT(6),
1902ad43ddacSmrg				USES_WATERFALL(0),
1903ad43ddacSmrg				CF_INST(SQ_CF_INST_ALU),
1904ad43ddacSmrg				WHOLE_QUAD_MODE(0),
1905ad43ddacSmrg				BARRIER(1));
1906ad43ddacSmrg
1907ad43ddacSmrg    /* 30 - dst */
1908ad43ddacSmrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1909ad43ddacSmrg					  TYPE(SQ_EXPORT_POS),
1910ad43ddacSmrg					  RW_GPR(1),
1911ad43ddacSmrg					  RW_REL(ABSOLUTE),
1912ad43ddacSmrg					  INDEX_GPR(0),
1913ad43ddacSmrg					  ELEM_SIZE(0));
1914ad43ddacSmrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1915ad43ddacSmrg					       SRC_SEL_Y(SQ_SEL_Y),
1916ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1917ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1918ad43ddacSmrg					       R6xx_ELEM_LOOP(0),
1919ad43ddacSmrg					       BURST_COUNT(0),
1920ad43ddacSmrg					       END_OF_PROGRAM(0),
1921ad43ddacSmrg					       VALID_PIXEL_MODE(0),
1922ad43ddacSmrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1923ad43ddacSmrg					       WHOLE_QUAD_MODE(0),
1924ad43ddacSmrg					       BARRIER(1));
1925ad43ddacSmrg    /* 31 - src */
1926ad43ddacSmrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1927ad43ddacSmrg					  TYPE(SQ_EXPORT_PARAM),
1928ad43ddacSmrg					  RW_GPR(0),
1929ad43ddacSmrg					  RW_REL(ABSOLUTE),
1930ad43ddacSmrg					  INDEX_GPR(0),
1931ad43ddacSmrg					  ELEM_SIZE(0));
1932ad43ddacSmrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1933ad43ddacSmrg					       SRC_SEL_Y(SQ_SEL_Y),
1934ad43ddacSmrg					       SRC_SEL_Z(SQ_SEL_0),
1935ad43ddacSmrg					       SRC_SEL_W(SQ_SEL_1),
1936ad43ddacSmrg					       R6xx_ELEM_LOOP(0),
1937ad43ddacSmrg					       BURST_COUNT(0),
1938ad43ddacSmrg					       END_OF_PROGRAM(0),
1939ad43ddacSmrg					       VALID_PIXEL_MODE(0),
1940ad43ddacSmrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1941ad43ddacSmrg					       WHOLE_QUAD_MODE(0),
1942ad43ddacSmrg					       BARRIER(0));
1943ad43ddacSmrg    /* 32 */
1944ad43ddacSmrg    shader[i++] = CF_DWORD0(ADDR(0));
1945ad43ddacSmrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1946ad43ddacSmrg			    CF_CONST(0),
1947ad43ddacSmrg			    COND(SQ_CF_COND_ACTIVE),
1948ad43ddacSmrg			    I_COUNT(0),
1949ad43ddacSmrg			    CALL_COUNT(0),
1950ad43ddacSmrg			    END_OF_PROGRAM(0),
1951b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1952b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1953b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1954b7e1c893Smrg			    BARRIER(1));
1955ad43ddacSmrg
1956ad43ddacSmrg
1957ad43ddacSmrg    /* 33 srcX MAD */
1958ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
1959ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1960ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1961ad43ddacSmrg                             SRC0_NEG(0),
1962ad43ddacSmrg                             SRC1_SEL(0),
1963ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1964ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1965ad43ddacSmrg                             SRC1_NEG(0),
1966ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1967ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1968ad43ddacSmrg                             LAST(1));
1969ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
1970ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1971ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1972ad43ddacSmrg                                 SRC2_NEG(0),
1973ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1974ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1975ad43ddacSmrg                                 DST_GPR(0),
1976ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1977ad43ddacSmrg                                 DST_ELEM(ELEM_Z),
1978ad43ddacSmrg                                 CLAMP(0));
1979ad43ddacSmrg    /* 34 srcY MAD */
1980ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
1981ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
1982ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
1983ad43ddacSmrg                             SRC0_NEG(0),
1984ad43ddacSmrg                             SRC1_SEL(0),
1985ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
1986ad43ddacSmrg                             SRC1_ELEM(ELEM_Y),
1987ad43ddacSmrg                             SRC1_NEG(0),
1988ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
1989ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1990ad43ddacSmrg                             LAST(1));
1991ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257),
1992ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
1993ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
1994ad43ddacSmrg                                 SRC2_NEG(0),
1995ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1996ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1997ad43ddacSmrg                                 DST_GPR(0),
1998ad43ddacSmrg                                 DST_REL(ABSOLUTE),
1999ad43ddacSmrg                                 DST_ELEM(ELEM_W),
2000ad43ddacSmrg                                 CLAMP(0));
2001ad43ddacSmrg
2002ad43ddacSmrg    /* 35 srcX MAD */
2003ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
2004ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2005ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
2006ad43ddacSmrg                             SRC0_NEG(0),
2007ad43ddacSmrg                             SRC1_SEL(0),
2008ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2009ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
2010ad43ddacSmrg                             SRC1_NEG(0),
2011ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
2012ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2013ad43ddacSmrg                             LAST(0));
2014ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
2015ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
2016ad43ddacSmrg                                 SRC2_ELEM(ELEM_Z),
2017ad43ddacSmrg                                 SRC2_NEG(0),
2018ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
2019ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2020ad43ddacSmrg                                 DST_GPR(0),
2021ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2022ad43ddacSmrg                                 DST_ELEM(ELEM_X),
2023ad43ddacSmrg                                 CLAMP(0));
2024ad43ddacSmrg    /* 36 srcY MAD */
2025ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(257),
2026ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2027ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
2028ad43ddacSmrg                             SRC0_NEG(0),
2029ad43ddacSmrg                             SRC1_SEL(0),
2030ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2031ad43ddacSmrg                             SRC1_ELEM(ELEM_X),
2032ad43ddacSmrg                             SRC1_NEG(0),
2033ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_LOOP),
2034ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2035ad43ddacSmrg                             LAST(1));
2036ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
2037ad43ddacSmrg                                 SRC2_REL(ABSOLUTE),
2038ad43ddacSmrg                                 SRC2_ELEM(ELEM_W),
2039ad43ddacSmrg                                 SRC2_NEG(0),
2040ad43ddacSmrg                                 ALU_INST(SQ_OP3_INST_MULADD),
2041ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2042ad43ddacSmrg                                 DST_GPR(0),
2043ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2044ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
2045ad43ddacSmrg                                 CLAMP(0));
2046ad43ddacSmrg    /* 37 srcX / w */
2047ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
2048ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2049ad43ddacSmrg                             SRC0_ELEM(ELEM_X),
2050ad43ddacSmrg                             SRC0_NEG(0),
2051ad43ddacSmrg                             SRC1_SEL(256),
2052ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2053ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
2054ad43ddacSmrg                             SRC1_NEG(0),
2055ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
2056ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2057ad43ddacSmrg                             LAST(1));
2058ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2059ad43ddacSmrg                                 SRC0_ABS(0),
2060ad43ddacSmrg                                 SRC1_ABS(0),
2061ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
2062ad43ddacSmrg                                 UPDATE_PRED(0),
2063ad43ddacSmrg                                 WRITE_MASK(1),
2064ad43ddacSmrg                                 FOG_MERGE(0),
2065ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
2066ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
2067ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2068ad43ddacSmrg                                 DST_GPR(0),
2069ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2070ad43ddacSmrg                                 DST_ELEM(ELEM_X),
2071ad43ddacSmrg                                 CLAMP(0));
2072ad43ddacSmrg
2073ad43ddacSmrg    /* 38 srcY / h */
2074ad43ddacSmrg    shader[i++] = ALU_DWORD0(SRC0_SEL(0),
2075ad43ddacSmrg                             SRC0_REL(ABSOLUTE),
2076ad43ddacSmrg                             SRC0_ELEM(ELEM_Y),
2077ad43ddacSmrg                             SRC0_NEG(0),
2078ad43ddacSmrg                             SRC1_SEL(257),
2079ad43ddacSmrg                             SRC1_REL(ABSOLUTE),
2080ad43ddacSmrg                             SRC1_ELEM(ELEM_W),
2081ad43ddacSmrg                             SRC1_NEG(0),
2082ad43ddacSmrg                             INDEX_MODE(SQ_INDEX_AR_X),
2083ad43ddacSmrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2084ad43ddacSmrg                             LAST(1));
2085ad43ddacSmrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
2086ad43ddacSmrg                                 SRC0_ABS(0),
2087ad43ddacSmrg                                 SRC1_ABS(0),
2088ad43ddacSmrg                                 UPDATE_EXECUTE_MASK(0),
2089ad43ddacSmrg                                 UPDATE_PRED(0),
2090ad43ddacSmrg                                 WRITE_MASK(1),
2091ad43ddacSmrg                                 FOG_MERGE(0),
2092ad43ddacSmrg                                 OMOD(SQ_ALU_OMOD_OFF),
2093ad43ddacSmrg                                 ALU_INST(SQ_OP2_INST_MUL),
2094ad43ddacSmrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2095ad43ddacSmrg                                 DST_GPR(0),
2096ad43ddacSmrg                                 DST_REL(ABSOLUTE),
2097ad43ddacSmrg                                 DST_ELEM(ELEM_Y),
2098ad43ddacSmrg                                 CLAMP(0));
2099ad43ddacSmrg
2100ad43ddacSmrg    /* 39 */
2101ad43ddacSmrg    shader[i++] = 0x00000000;
2102ad43ddacSmrg    shader[i++] = 0x00000000;
2103ad43ddacSmrg
2104ad43ddacSmrg    /* 40/41 - dst */
2105b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2106b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2107b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
2108b7e1c893Smrg			     BUFFER_ID(0),
2109b7e1c893Smrg			     SRC_GPR(0),
2110b7e1c893Smrg			     SRC_REL(ABSOLUTE),
2111b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
2112b7e1c893Smrg			     MEGA_FETCH_COUNT(16));
2113b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2114b7e1c893Smrg				 DST_REL(0),
2115b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
2116b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
2117b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
2118b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
2119b7e1c893Smrg				 USE_CONST_FIELDS(0),
2120ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2121ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2122ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2123b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2124b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2125b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
2126b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
2127b7e1c893Smrg			     MEGA_FETCH(1));
2128b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
2129ad43ddacSmrg    /* 42/43 - src */
2130b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2131b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2132b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
2133b7e1c893Smrg			     BUFFER_ID(0),
2134b7e1c893Smrg			     SRC_GPR(0),
2135b7e1c893Smrg			     SRC_REL(ABSOLUTE),
2136b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
2137b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
2138b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2139b7e1c893Smrg				 DST_REL(0),
2140b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
2141b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
2142ad43ddacSmrg				 DST_SEL_Z(SQ_SEL_1),
2143ad43ddacSmrg				 DST_SEL_W(SQ_SEL_0),
2144b7e1c893Smrg				 USE_CONST_FIELDS(0),
2145ad43ddacSmrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2146ad43ddacSmrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2147ad43ddacSmrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2148b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2149b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2150b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
2151b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
2152b7e1c893Smrg			     MEGA_FETCH(0));
2153b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
2154b7e1c893Smrg
2155b7e1c893Smrg    return i;
2156b7e1c893Smrg}
2157b7e1c893Smrg
2158b7e1c893Smrg/* comp ps --------------------------------------- */
2159b7e1c893Smrgint R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2160b7e1c893Smrg{
2161b7e1c893Smrg    int i = 0;
2162b7e1c893Smrg
2163b7e1c893Smrg    /* 0 */
2164b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(2));
2165b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2166b7e1c893Smrg			    CF_CONST(0),
2167b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
2168b7e1c893Smrg			    I_COUNT(1),
2169b7e1c893Smrg			    CALL_COUNT(0),
2170b7e1c893Smrg			    END_OF_PROGRAM(0),
2171b7e1c893Smrg			    VALID_PIXEL_MODE(0),
2172b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
2173b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
2174b7e1c893Smrg			    BARRIER(1));
2175b7e1c893Smrg    /* 1 */
2176b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2177b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
2178b7e1c893Smrg					  RW_GPR(0),
2179b7e1c893Smrg					  RW_REL(ABSOLUTE),
2180b7e1c893Smrg					  INDEX_GPR(0),
2181b7e1c893Smrg					  ELEM_SIZE(1));
2182b7e1c893Smrg
2183b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2184b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
2185b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
2186b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
2187b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
2188b7e1c893Smrg					       BURST_COUNT(1),
2189b7e1c893Smrg					       END_OF_PROGRAM(1),
2190b7e1c893Smrg					       VALID_PIXEL_MODE(0),
2191b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2192b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
2193b7e1c893Smrg					       BARRIER(1));
2194b7e1c893Smrg
2195b7e1c893Smrg
2196b7e1c893Smrg    /* 2/3 - src */
2197b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2198b7e1c893Smrg			     BC_FRAC_MODE(0),
2199b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
2200b7e1c893Smrg			     RESOURCE_ID(0),
2201b7e1c893Smrg			     SRC_GPR(0),
2202b7e1c893Smrg			     SRC_REL(ABSOLUTE),
2203b7e1c893Smrg			     R7xx_ALT_CONST(0));
2204b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
2205b7e1c893Smrg			     DST_REL(ABSOLUTE),
2206b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X),
2207b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y),
2208b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z),
2209b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W),
2210b7e1c893Smrg			     LOD_BIAS(0),
2211b7e1c893Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
2212b7e1c893Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
2213b7e1c893Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
2214b7e1c893Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
2215b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2216b7e1c893Smrg			     OFFSET_Y(0),
2217b7e1c893Smrg			     OFFSET_Z(0),
2218b7e1c893Smrg			     SAMPLER_ID(0),
2219b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
2220b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
2221b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
2222b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
2223b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
2224b7e1c893Smrg
2225b7e1c893Smrg    return i;
2226b7e1c893Smrg}
2227