r600_shader.c revision b7e1c893
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "radeon.h"
34b7e1c893Smrg#include "r600_shader.h"
35b7e1c893Smrg#include "r600_reg.h"
36b7e1c893Smrg
37b7e1c893Smrg/* solid vs --------------------------------------- */
38b7e1c893Smrgint R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
39b7e1c893Smrg{
40b7e1c893Smrg    int i = 0;
41b7e1c893Smrg
42b7e1c893Smrg    /* 0 */
43b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
44b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
45b7e1c893Smrg			    CF_CONST(0),
46b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
47b7e1c893Smrg			    I_COUNT(1),
48b7e1c893Smrg			    CALL_COUNT(0),
49b7e1c893Smrg			    END_OF_PROGRAM(0),
50b7e1c893Smrg			    VALID_PIXEL_MODE(0),
51b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
52b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
53b7e1c893Smrg			    BARRIER(1));
54b7e1c893Smrg    /* 1 */
55b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
56b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
57b7e1c893Smrg					  RW_GPR(1),
58b7e1c893Smrg					  RW_REL(ABSOLUTE),
59b7e1c893Smrg					  INDEX_GPR(0),
60b7e1c893Smrg					  ELEM_SIZE(0));
61b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
62b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
63b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
64b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
65b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
66b7e1c893Smrg					       BURST_COUNT(1),
67b7e1c893Smrg					       END_OF_PROGRAM(0),
68b7e1c893Smrg					       VALID_PIXEL_MODE(0),
69b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
71b7e1c893Smrg					       BARRIER(1));
72b7e1c893Smrg    /* 2 - always export a param whether it's used or not */
73b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
75b7e1c893Smrg					  RW_GPR(0),
76b7e1c893Smrg					  RW_REL(ABSOLUTE),
77b7e1c893Smrg					  INDEX_GPR(0),
78b7e1c893Smrg					  ELEM_SIZE(0));
79b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
81b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
82b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
83b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
84b7e1c893Smrg					       BURST_COUNT(0),
85b7e1c893Smrg					       END_OF_PROGRAM(1),
86b7e1c893Smrg					       VALID_PIXEL_MODE(0),
87b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
88b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
89b7e1c893Smrg					       BARRIER(0));
90b7e1c893Smrg    /* 3 - padding */
91b7e1c893Smrg    shader[i++] = 0x00000000;
92b7e1c893Smrg    shader[i++] = 0x00000000;
93b7e1c893Smrg    /* 4/5 */
94b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
97b7e1c893Smrg			     BUFFER_ID(0),
98b7e1c893Smrg			     SRC_GPR(0),
99b7e1c893Smrg			     SRC_REL(ABSOLUTE),
100b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
101b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
102b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
103b7e1c893Smrg				 DST_REL(0),
104b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
105b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
106b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
107b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
108b7e1c893Smrg				 USE_CONST_FIELDS(0),
109b7e1c893Smrg				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
110b7e1c893Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
111b7e1c893Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
112b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
113b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
114b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
115b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
116b7e1c893Smrg			     MEGA_FETCH(1));
117b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
118b7e1c893Smrg
119b7e1c893Smrg    return i;
120b7e1c893Smrg}
121b7e1c893Smrg
122b7e1c893Smrg/* solid ps --------------------------------------- */
123b7e1c893Smrgint R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
124b7e1c893Smrg{
125b7e1c893Smrg    int i = 0;
126b7e1c893Smrg
127b7e1c893Smrg    /* 0 */
128b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(2),
129b7e1c893Smrg				KCACHE_BANK0(0),
130b7e1c893Smrg				KCACHE_BANK1(0),
131b7e1c893Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
132b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
133b7e1c893Smrg				KCACHE_ADDR0(0),
134b7e1c893Smrg				KCACHE_ADDR1(0),
135b7e1c893Smrg				I_COUNT(4),
136b7e1c893Smrg				USES_WATERFALL(0),
137b7e1c893Smrg				CF_INST(SQ_CF_INST_ALU),
138b7e1c893Smrg				WHOLE_QUAD_MODE(0),
139b7e1c893Smrg				BARRIER(1));
140b7e1c893Smrg    /* 1 */
141b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
142b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
143b7e1c893Smrg					  RW_GPR(0),
144b7e1c893Smrg					  RW_REL(ABSOLUTE),
145b7e1c893Smrg					  INDEX_GPR(0),
146b7e1c893Smrg					  ELEM_SIZE(1));
147b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
148b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
149b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
150b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
151b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
152b7e1c893Smrg					       BURST_COUNT(1),
153b7e1c893Smrg					       END_OF_PROGRAM(1),
154b7e1c893Smrg					       VALID_PIXEL_MODE(0),
155b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
156b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
157b7e1c893Smrg					       BARRIER(1));
158b7e1c893Smrg
159b7e1c893Smrg    /* 2 */
160b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
161b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
162b7e1c893Smrg			     SRC0_ELEM(ELEM_X),
163b7e1c893Smrg			     SRC0_NEG(0),
164b7e1c893Smrg			     SRC1_SEL(0),
165b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
166b7e1c893Smrg			     SRC1_ELEM(ELEM_X),
167b7e1c893Smrg			     SRC1_NEG(0),
168b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
169b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
170b7e1c893Smrg			     LAST(0));
171b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
172b7e1c893Smrg				 SRC0_ABS(0),
173b7e1c893Smrg				 SRC1_ABS(0),
174b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
175b7e1c893Smrg				 UPDATE_PRED(0),
176b7e1c893Smrg				 WRITE_MASK(1),
177b7e1c893Smrg				 FOG_MERGE(0),
178b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
179b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
180b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
181b7e1c893Smrg				 DST_GPR(0),
182b7e1c893Smrg				 DST_REL(ABSOLUTE),
183b7e1c893Smrg				 DST_ELEM(ELEM_X),
184b7e1c893Smrg				 CLAMP(1));
185b7e1c893Smrg    /* 3 */
186b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
187b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
188b7e1c893Smrg			     SRC0_ELEM(ELEM_Y),
189b7e1c893Smrg			     SRC0_NEG(0),
190b7e1c893Smrg			     SRC1_SEL(0),
191b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
192b7e1c893Smrg			     SRC1_ELEM(ELEM_Y),
193b7e1c893Smrg			     SRC1_NEG(0),
194b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
195b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
196b7e1c893Smrg			     LAST(0));
197b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
198b7e1c893Smrg				 SRC0_ABS(0),
199b7e1c893Smrg				 SRC1_ABS(0),
200b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
201b7e1c893Smrg				 UPDATE_PRED(0),
202b7e1c893Smrg				 WRITE_MASK(1),
203b7e1c893Smrg				 FOG_MERGE(0),
204b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
205b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
206b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
207b7e1c893Smrg				 DST_GPR(0),
208b7e1c893Smrg				 DST_REL(ABSOLUTE),
209b7e1c893Smrg				 DST_ELEM(ELEM_Y),
210b7e1c893Smrg				 CLAMP(1));
211b7e1c893Smrg    /* 4 */
212b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
213b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
214b7e1c893Smrg			     SRC0_ELEM(ELEM_Z),
215b7e1c893Smrg			     SRC0_NEG(0),
216b7e1c893Smrg			     SRC1_SEL(0),
217b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
218b7e1c893Smrg			     SRC1_ELEM(ELEM_Z),
219b7e1c893Smrg			     SRC1_NEG(0),
220b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
221b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
222b7e1c893Smrg			     LAST(0));
223b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
224b7e1c893Smrg				 SRC0_ABS(0),
225b7e1c893Smrg				 SRC1_ABS(0),
226b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
227b7e1c893Smrg				 UPDATE_PRED(0),
228b7e1c893Smrg				 WRITE_MASK(1),
229b7e1c893Smrg				 FOG_MERGE(0),
230b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
231b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
232b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
233b7e1c893Smrg				 DST_GPR(0),
234b7e1c893Smrg				 DST_REL(ABSOLUTE),
235b7e1c893Smrg				 DST_ELEM(ELEM_Z),
236b7e1c893Smrg				 CLAMP(1));
237b7e1c893Smrg    /* 5 */
238b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
239b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
240b7e1c893Smrg			     SRC0_ELEM(ELEM_W),
241b7e1c893Smrg			     SRC0_NEG(0),
242b7e1c893Smrg			     SRC1_SEL(0),
243b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
244b7e1c893Smrg			     SRC1_ELEM(ELEM_W),
245b7e1c893Smrg			     SRC1_NEG(0),
246b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
247b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
248b7e1c893Smrg			     LAST(1));
249b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
250b7e1c893Smrg				 SRC0_ABS(0),
251b7e1c893Smrg				 SRC1_ABS(0),
252b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
253b7e1c893Smrg				 UPDATE_PRED(0),
254b7e1c893Smrg				 WRITE_MASK(1),
255b7e1c893Smrg				 FOG_MERGE(0),
256b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
257b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MOV),
258b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
259b7e1c893Smrg				 DST_GPR(0),
260b7e1c893Smrg				 DST_REL(ABSOLUTE),
261b7e1c893Smrg				 DST_ELEM(ELEM_W),
262b7e1c893Smrg				 CLAMP(1));
263b7e1c893Smrg
264b7e1c893Smrg    return i;
265b7e1c893Smrg}
266b7e1c893Smrg
267b7e1c893Smrg/* copy vs --------------------------------------- */
268b7e1c893Smrgint R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
269b7e1c893Smrg{
270b7e1c893Smrg    int i = 0;
271b7e1c893Smrg
272b7e1c893Smrg    /* 0 */
273b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
274b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
275b7e1c893Smrg			    CF_CONST(0),
276b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
277b7e1c893Smrg			    I_COUNT(2),
278b7e1c893Smrg			    CALL_COUNT(0),
279b7e1c893Smrg			    END_OF_PROGRAM(0),
280b7e1c893Smrg			    VALID_PIXEL_MODE(0),
281b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
282b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
283b7e1c893Smrg			    BARRIER(1));
284b7e1c893Smrg    /* 1 */
285b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
286b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
287b7e1c893Smrg					  RW_GPR(1),
288b7e1c893Smrg					  RW_REL(ABSOLUTE),
289b7e1c893Smrg					  INDEX_GPR(0),
290b7e1c893Smrg					  ELEM_SIZE(0));
291b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
292b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
293b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
294b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
295b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
296b7e1c893Smrg					       BURST_COUNT(0),
297b7e1c893Smrg					       END_OF_PROGRAM(0),
298b7e1c893Smrg					       VALID_PIXEL_MODE(0),
299b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
300b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
301b7e1c893Smrg					       BARRIER(1));
302b7e1c893Smrg    /* 2 */
303b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
304b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
305b7e1c893Smrg					  RW_GPR(0),
306b7e1c893Smrg					  RW_REL(ABSOLUTE),
307b7e1c893Smrg					  INDEX_GPR(0),
308b7e1c893Smrg					  ELEM_SIZE(0));
309b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
310b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
311b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
312b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
313b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
314b7e1c893Smrg					       BURST_COUNT(0),
315b7e1c893Smrg					       END_OF_PROGRAM(1),
316b7e1c893Smrg					       VALID_PIXEL_MODE(0),
317b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
318b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
319b7e1c893Smrg					       BARRIER(0));
320b7e1c893Smrg    /* 3 */
321b7e1c893Smrg    shader[i++] = 0x00000000;
322b7e1c893Smrg    shader[i++] = 0x00000000;
323b7e1c893Smrg    /* 4/5 */
324b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
325b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
326b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
327b7e1c893Smrg			     BUFFER_ID(0),
328b7e1c893Smrg			     SRC_GPR(0),
329b7e1c893Smrg			     SRC_REL(ABSOLUTE),
330b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
331b7e1c893Smrg			     MEGA_FETCH_COUNT(16));
332b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
333b7e1c893Smrg				 DST_REL(0),
334b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
335b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
336b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
337b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
338b7e1c893Smrg				 USE_CONST_FIELDS(0),
339b7e1c893Smrg				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
340b7e1c893Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
341b7e1c893Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
342b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
343b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
344b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
345b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
346b7e1c893Smrg			     MEGA_FETCH(1));
347b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
348b7e1c893Smrg    /* 6/7 */
349b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
350b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
351b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
352b7e1c893Smrg			     BUFFER_ID(0),
353b7e1c893Smrg			     SRC_GPR(0),
354b7e1c893Smrg			     SRC_REL(ABSOLUTE),
355b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
356b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
357b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
358b7e1c893Smrg				 DST_REL(0),
359b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
360b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
361b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
362b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
363b7e1c893Smrg				 USE_CONST_FIELDS(0),
364b7e1c893Smrg				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
365b7e1c893Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
366b7e1c893Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
367b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
368b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
369b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
370b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
371b7e1c893Smrg			     MEGA_FETCH(0));
372b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
373b7e1c893Smrg
374b7e1c893Smrg    return i;
375b7e1c893Smrg}
376b7e1c893Smrg
377b7e1c893Smrg/* copy ps --------------------------------------- */
378b7e1c893Smrgint R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
379b7e1c893Smrg{
380b7e1c893Smrg    int i=0;
381b7e1c893Smrg
382b7e1c893Smrg    /* CF INST 0 */
383b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(2));
384b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
385b7e1c893Smrg			    CF_CONST(0),
386b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
387b7e1c893Smrg			    I_COUNT(1),
388b7e1c893Smrg			    CALL_COUNT(0),
389b7e1c893Smrg			    END_OF_PROGRAM(0),
390b7e1c893Smrg			    VALID_PIXEL_MODE(0),
391b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
392b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
393b7e1c893Smrg			    BARRIER(1));
394b7e1c893Smrg    /* CF INST 1 */
395b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
396b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
397b7e1c893Smrg					  RW_GPR(0),
398b7e1c893Smrg					  RW_REL(ABSOLUTE),
399b7e1c893Smrg					  INDEX_GPR(0),
400b7e1c893Smrg					  ELEM_SIZE(1));
401b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
402b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
403b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
404b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
405b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
406b7e1c893Smrg					       BURST_COUNT(1),
407b7e1c893Smrg					       END_OF_PROGRAM(1),
408b7e1c893Smrg					       VALID_PIXEL_MODE(0),
409b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
410b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
411b7e1c893Smrg					       BARRIER(1));
412b7e1c893Smrg    /* TEX INST 0 */
413b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
414b7e1c893Smrg			     BC_FRAC_MODE(0),
415b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
416b7e1c893Smrg			     RESOURCE_ID(0),
417b7e1c893Smrg			     SRC_GPR(0),
418b7e1c893Smrg			     SRC_REL(ABSOLUTE),
419b7e1c893Smrg			     R7xx_ALT_CONST(0));
420b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
421b7e1c893Smrg			     DST_REL(ABSOLUTE),
422b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X), /* R */
423b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y), /* G */
424b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z), /* B */
425b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W), /* A */
426b7e1c893Smrg			     LOD_BIAS(0),
427b7e1c893Smrg			     COORD_TYPE_X(TEX_UNNORMALIZED),
428b7e1c893Smrg			     COORD_TYPE_Y(TEX_UNNORMALIZED),
429b7e1c893Smrg			     COORD_TYPE_Z(TEX_UNNORMALIZED),
430b7e1c893Smrg			     COORD_TYPE_W(TEX_UNNORMALIZED));
431b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
432b7e1c893Smrg			     OFFSET_Y(0),
433b7e1c893Smrg			     OFFSET_Z(0),
434b7e1c893Smrg			     SAMPLER_ID(0),
435b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
436b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
437b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
438b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
439b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
440b7e1c893Smrg
441b7e1c893Smrg    return i;
442b7e1c893Smrg}
443b7e1c893Smrg
444b7e1c893Smrg/*
445b7e1c893Smrg * ; xv vertex shader
446b7e1c893Smrg * 00 VTX: ADDR(4) CNT(2)
447b7e1c893Smrg *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT)
448b7e1c893Smrg *          FORMAT_COMP(SIGNED)
449b7e1c893Smrg *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT)
450b7e1c893Smrg *          FORMAT_COMP(SIGNED)
451b7e1c893Smrg * 01 EXP_DONE: POS0, R1
452b7e1c893Smrg * 02 EXP_DONE: PARAM0, R0  NO_BARRIER
453b7e1c893Smrg * END_OF_PROGRAM
454b7e1c893Smrg */
455b7e1c893Smrgint R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
456b7e1c893Smrg{
457b7e1c893Smrg    int i = 0;
458b7e1c893Smrg
459b7e1c893Smrg    /* 0 */
460b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(4));
461b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
462b7e1c893Smrg                            CF_CONST(0),
463b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
464b7e1c893Smrg                            I_COUNT(2),
465b7e1c893Smrg                            CALL_COUNT(0),
466b7e1c893Smrg                            END_OF_PROGRAM(0),
467b7e1c893Smrg                            VALID_PIXEL_MODE(0),
468b7e1c893Smrg                            CF_INST(SQ_CF_INST_VTX),
469b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
470b7e1c893Smrg                            BARRIER(1));
471b7e1c893Smrg    /* 1 */
472b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
473b7e1c893Smrg                                          TYPE(SQ_EXPORT_POS),
474b7e1c893Smrg                                          RW_GPR(1),
475b7e1c893Smrg                                          RW_REL(ABSOLUTE),
476b7e1c893Smrg                                          INDEX_GPR(0),
477b7e1c893Smrg                                          ELEM_SIZE(3));
478b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
479b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
480b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
481b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
482b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
483b7e1c893Smrg                                               BURST_COUNT(1),
484b7e1c893Smrg                                               END_OF_PROGRAM(0),
485b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
486b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
487b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
488b7e1c893Smrg                                               BARRIER(1));
489b7e1c893Smrg    /* 2 */
490b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
491b7e1c893Smrg                                          TYPE(SQ_EXPORT_PARAM),
492b7e1c893Smrg                                          RW_GPR(0),
493b7e1c893Smrg                                          RW_REL(ABSOLUTE),
494b7e1c893Smrg                                          INDEX_GPR(0),
495b7e1c893Smrg                                          ELEM_SIZE(3));
496b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
497b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
498b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
499b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
500b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
501b7e1c893Smrg                                               BURST_COUNT(1),
502b7e1c893Smrg                                               END_OF_PROGRAM(1),
503b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
504b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
505b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
506b7e1c893Smrg                                               BARRIER(0));
507b7e1c893Smrg    shader[i++] = 0x00000000;
508b7e1c893Smrg    shader[i++] = 0x00000000;
509b7e1c893Smrg    /* 4/5 */
510b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
511b7e1c893Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
512b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
513b7e1c893Smrg                             BUFFER_ID(0),
514b7e1c893Smrg                             SRC_GPR(0),
515b7e1c893Smrg                             SRC_REL(ABSOLUTE),
516b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
517b7e1c893Smrg                             MEGA_FETCH_COUNT(16));
518b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
519b7e1c893Smrg                                 DST_REL(ABSOLUTE),
520b7e1c893Smrg                                 DST_SEL_X(SQ_SEL_X),
521b7e1c893Smrg                                 DST_SEL_Y(SQ_SEL_Y),
522b7e1c893Smrg                                 DST_SEL_Z(SQ_SEL_0),
523b7e1c893Smrg                                 DST_SEL_W(SQ_SEL_1),
524b7e1c893Smrg                                 USE_CONST_FIELDS(0),
525b7e1c893Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
526b7e1c893Smrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
527b7e1c893Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
528b7e1c893Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
529b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
530b7e1c893Smrg                             ENDIAN_SWAP(ENDIAN_NONE),
531b7e1c893Smrg                             CONST_BUF_NO_STRIDE(0),
532b7e1c893Smrg                             MEGA_FETCH(1));
533b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
534b7e1c893Smrg    /* 6/7 */
535b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
536b7e1c893Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
537b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
538b7e1c893Smrg                             BUFFER_ID(0),
539b7e1c893Smrg                             SRC_GPR(0),
540b7e1c893Smrg                             SRC_REL(ABSOLUTE),
541b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
542b7e1c893Smrg                             MEGA_FETCH_COUNT(8));
543b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
544b7e1c893Smrg                                 DST_REL(ABSOLUTE),
545b7e1c893Smrg                                 DST_SEL_X(SQ_SEL_X),
546b7e1c893Smrg                                 DST_SEL_Y(SQ_SEL_Y),
547b7e1c893Smrg                                 DST_SEL_Z(SQ_SEL_0),
548b7e1c893Smrg                                 DST_SEL_W(SQ_SEL_1),
549b7e1c893Smrg                                 USE_CONST_FIELDS(0),
550b7e1c893Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
551b7e1c893Smrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
552b7e1c893Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
553b7e1c893Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
554b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
555b7e1c893Smrg                             ENDIAN_SWAP(ENDIAN_NONE),
556b7e1c893Smrg                             CONST_BUF_NO_STRIDE(0),
557b7e1c893Smrg                             MEGA_FETCH(0));
558b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
559b7e1c893Smrg
560b7e1c893Smrg    return i;
561b7e1c893Smrg}
562b7e1c893Smrg
563b7e1c893Smrg/*
564b7e1c893Smrg * ; xv ps planar
565b7e1c893Smrg * 00 TEX: ADDR(20) CNT(3) NO_BARRIER
566b7e1c893Smrg *       0  SAMPLE R1.x__1, R0.xy01, t0, s0
567b7e1c893Smrg *       1  SAMPLE R1.__x_, R0.xy01, t1, s1
568b7e1c893Smrg *       2  SAMPLE R1._x__, R0.xy01, t2, s2
569b7e1c893Smrg * 01 TEX: ADDR(28) CNT(2) NO_BARRIER
570b7e1c893Smrg *       0  SAMPLE R1.x__1, R0.xy01, t0, s0
571b7e1c893Smrg *       1  SAMPLE R1._xy_, R0.xy01, t1, s1
572b7e1c893Smrg * 02 ALU: ADDR(4) CNT(16)
573b7e1c893Smrg *       3  x: MULADD      R1.x,  R1.x,  C3.x,  C3.y      CLAMP
574b7e1c893Smrg *          y: MULADD      R1.y,  R1.y,  C3.z,  C3.w
575b7e1c893Smrg *          z: MULADD      R1.z,  R1.z,  C3.z,  C3.w
576b7e1c893Smrg *          w: MOV         R1.w,  0.0f
577b7e1c893Smrg *       4  x: DOT4        R2.x,  R1.x,  C0.x      CLAMP VEC_102
578b7e1c893Smrg *          y: DOT4        ____,  R1.y,  C0.y      CLAMP VEC_102
579b7e1c893Smrg *          z: DOT4        ____,  R1.z,  C0.z      CLAMP VEC_102
580b7e1c893Smrg *          w: DOT4        ____,  R1.w,  C0.w      CLAMP VEC_021
581b7e1c893Smrg *       5  x: DOT4        ____,  R1.x,  C1.x      CLAMP VEC_102
582b7e1c893Smrg *          y: DOT4        R2.y,  R1.y,  C1.y      CLAMP VEC_102
583b7e1c893Smrg *          z: DOT4        ____,  R1.z,  C1.z      CLAMP VEC_102
584b7e1c893Smrg *          w: DOT4        ____,  R1.w,  C1.w      CLAMP VEC_021
585b7e1c893Smrg *       6  x: DOT4        ____,  R1.x,  C2.x      CLAMP VEC_102
586b7e1c893Smrg *          y: DOT4        ____,  R1.y,  C2.y      CLAMP VEC_102
587b7e1c893Smrg *          z: DOT4        R2.z,  R1.z,  C2.z      CLAMP VEC_102
588b7e1c893Smrg *          w: DOT4        ____,  R1.w,  C2.w      CLAMP VEC_021
589b7e1c893Smrg * 03 EXP_DONE: PIX0, R2
590b7e1c893Smrg * END_OF_PROGRAM
591b7e1c893Smrg */
592b7e1c893Smrgint R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
593b7e1c893Smrg{
594b7e1c893Smrg    int i = 0;
595b7e1c893Smrg
596b7e1c893Smrg    /* 0 */
597b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(20));
598b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
599b7e1c893Smrg                            CF_CONST(0),
600b7e1c893Smrg                            COND(SQ_CF_COND_BOOL),
601b7e1c893Smrg                            I_COUNT(0),
602b7e1c893Smrg                            CALL_COUNT(0),
603b7e1c893Smrg                            END_OF_PROGRAM(0),
604b7e1c893Smrg                            VALID_PIXEL_MODE(0),
605b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
606b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
607b7e1c893Smrg                            BARRIER(0));
608b7e1c893Smrg    /* 1 */
609b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(28));
610b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
611b7e1c893Smrg                            CF_CONST(0),
612b7e1c893Smrg                            COND(SQ_CF_COND_NOT_BOOL),
613b7e1c893Smrg                            I_COUNT(0),
614b7e1c893Smrg                            CALL_COUNT(0),
615b7e1c893Smrg                            END_OF_PROGRAM(0),
616b7e1c893Smrg                            VALID_PIXEL_MODE(0),
617b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
618b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
619b7e1c893Smrg                            BARRIER(0));
620b7e1c893Smrg    /* 2 */
621b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
622b7e1c893Smrg                                KCACHE_BANK0(0),
623b7e1c893Smrg                                KCACHE_BANK1(0),
624b7e1c893Smrg                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
625b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
626b7e1c893Smrg                                KCACHE_ADDR0(0),
627b7e1c893Smrg                                KCACHE_ADDR1(0),
628b7e1c893Smrg                                I_COUNT(16),
629b7e1c893Smrg                                USES_WATERFALL(0),
630b7e1c893Smrg                                CF_INST(SQ_CF_INST_ALU),
631b7e1c893Smrg                                WHOLE_QUAD_MODE(0),
632b7e1c893Smrg                                BARRIER(1));
633b7e1c893Smrg    /* 3 */
634b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
635b7e1c893Smrg                                          TYPE(SQ_EXPORT_PIXEL),
636b7e1c893Smrg                                          RW_GPR(2),
637b7e1c893Smrg                                          RW_REL(ABSOLUTE),
638b7e1c893Smrg                                          INDEX_GPR(0),
639b7e1c893Smrg                                          ELEM_SIZE(3));
640b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
641b7e1c893Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
642b7e1c893Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
643b7e1c893Smrg                                               SRC_SEL_W(SQ_SEL_W),
644b7e1c893Smrg                                               R6xx_ELEM_LOOP(0),
645b7e1c893Smrg                                               BURST_COUNT(1),
646b7e1c893Smrg                                               END_OF_PROGRAM(1),
647b7e1c893Smrg                                               VALID_PIXEL_MODE(0),
648b7e1c893Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
649b7e1c893Smrg                                               WHOLE_QUAD_MODE(0),
650b7e1c893Smrg                                               BARRIER(1));
651b7e1c893Smrg    /* 4 */
652b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
653b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
654b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
655b7e1c893Smrg                             SRC0_NEG(0),
656b7e1c893Smrg                             SRC1_SEL(259),
657b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
658b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
659b7e1c893Smrg                             SRC1_NEG(0),
660b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
661b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
662b7e1c893Smrg                             LAST(0));
663b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
664b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
665b7e1c893Smrg                                 SRC2_ELEM(ELEM_Y),
666b7e1c893Smrg                                 SRC2_NEG(0),
667b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
668b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
669b7e1c893Smrg                                 DST_GPR(1),
670b7e1c893Smrg                                 DST_REL(ABSOLUTE),
671b7e1c893Smrg                                 DST_ELEM(ELEM_X),
672b7e1c893Smrg                                 CLAMP(1));
673b7e1c893Smrg    /* 5 */
674b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
675b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
676b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
677b7e1c893Smrg                             SRC0_NEG(0),
678b7e1c893Smrg                             SRC1_SEL(259),
679b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
680b7e1c893Smrg                             SRC1_ELEM(ELEM_Z),
681b7e1c893Smrg                             SRC1_NEG(0),
682b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
683b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
684b7e1c893Smrg                             LAST(0));
685b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
686b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
687b7e1c893Smrg                                 SRC2_ELEM(ELEM_W),
688b7e1c893Smrg                                 SRC2_NEG(0),
689b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
690b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
691b7e1c893Smrg                                 DST_GPR(1),
692b7e1c893Smrg                                 DST_REL(ABSOLUTE),
693b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
694b7e1c893Smrg                                 CLAMP(0));
695b7e1c893Smrg    /* 6 */
696b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
697b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
698b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
699b7e1c893Smrg                             SRC0_NEG(0),
700b7e1c893Smrg                             SRC1_SEL(259),
701b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
702b7e1c893Smrg                             SRC1_ELEM(ELEM_Z),
703b7e1c893Smrg                             SRC1_NEG(0),
704b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
705b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
706b7e1c893Smrg                             LAST(0));
707b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
708b7e1c893Smrg                                 SRC2_REL(ABSOLUTE),
709b7e1c893Smrg                                 SRC2_ELEM(ELEM_W),
710b7e1c893Smrg                                 SRC2_NEG(0),
711b7e1c893Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
712b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
713b7e1c893Smrg                                 DST_GPR(1),
714b7e1c893Smrg                                 DST_REL(ABSOLUTE),
715b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
716b7e1c893Smrg                                 CLAMP(0));
717b7e1c893Smrg    /* 7 */
718b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
719b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
720b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
721b7e1c893Smrg                             SRC0_NEG(0),
722b7e1c893Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
723b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
724b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
725b7e1c893Smrg                             SRC1_NEG(0),
726b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
727b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
728b7e1c893Smrg                             LAST(1));
729b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
730b7e1c893Smrg                                 SRC0_ABS(0),
731b7e1c893Smrg                                 SRC1_ABS(0),
732b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
733b7e1c893Smrg                                 UPDATE_PRED(0),
734b7e1c893Smrg                                 WRITE_MASK(1),
735b7e1c893Smrg                                 FOG_MERGE(0),
736b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
737b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_MOV),
738b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
739b7e1c893Smrg                                 DST_GPR(1),
740b7e1c893Smrg                                 DST_REL(ABSOLUTE),
741b7e1c893Smrg                                 DST_ELEM(ELEM_W),
742b7e1c893Smrg                                 CLAMP(0));
743b7e1c893Smrg    /* 8 */
744b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
745b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
746b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
747b7e1c893Smrg                             SRC0_NEG(0),
748b7e1c893Smrg                             SRC1_SEL(256),
749b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
750b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
751b7e1c893Smrg                             SRC1_NEG(0),
752b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
753b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
754b7e1c893Smrg                             LAST(0));
755b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
756b7e1c893Smrg                                 SRC0_ABS(0),
757b7e1c893Smrg                                 SRC1_ABS(0),
758b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
759b7e1c893Smrg                                 UPDATE_PRED(0),
760b7e1c893Smrg                                 WRITE_MASK(1),
761b7e1c893Smrg                                 FOG_MERGE(0),
762b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
763b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
764b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
765b7e1c893Smrg                                 DST_GPR(2),
766b7e1c893Smrg                                 DST_REL(ABSOLUTE),
767b7e1c893Smrg                                 DST_ELEM(ELEM_X),
768b7e1c893Smrg                                 CLAMP(1));
769b7e1c893Smrg    /* 9 */
770b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
771b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
772b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
773b7e1c893Smrg                             SRC0_NEG(0),
774b7e1c893Smrg                             SRC1_SEL(256),
775b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
776b7e1c893Smrg                             SRC1_ELEM(ELEM_Y),
777b7e1c893Smrg                             SRC1_NEG(0),
778b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
779b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
780b7e1c893Smrg                             LAST(0));
781b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
782b7e1c893Smrg                                 SRC0_ABS(0),
783b7e1c893Smrg                                 SRC1_ABS(0),
784b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
785b7e1c893Smrg                                 UPDATE_PRED(0),
786b7e1c893Smrg                                 WRITE_MASK(0),
787b7e1c893Smrg                                 FOG_MERGE(0),
788b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
789b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
790b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
791b7e1c893Smrg                                 DST_GPR(0),
792b7e1c893Smrg                                 DST_REL(ABSOLUTE),
793b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
794b7e1c893Smrg                                 CLAMP(1));
795b7e1c893Smrg    /* 10 */
796b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
797b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
798b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
799b7e1c893Smrg                             SRC0_NEG(0),
800b7e1c893Smrg                             SRC1_SEL(256),
801b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
802b7e1c893Smrg                             SRC1_ELEM(ELEM_Z),
803b7e1c893Smrg                             SRC1_NEG(0),
804b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
805b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
806b7e1c893Smrg                             LAST(0));
807b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
808b7e1c893Smrg                                 SRC0_ABS(0),
809b7e1c893Smrg                                 SRC1_ABS(0),
810b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
811b7e1c893Smrg                                 UPDATE_PRED(0),
812b7e1c893Smrg                                 WRITE_MASK(0),
813b7e1c893Smrg                                 FOG_MERGE(0),
814b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
815b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
816b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
817b7e1c893Smrg                                 DST_GPR(0),
818b7e1c893Smrg                                 DST_REL(ABSOLUTE),
819b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
820b7e1c893Smrg                                 CLAMP(1));
821b7e1c893Smrg    /* 11 */
822b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
823b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
824b7e1c893Smrg                             SRC0_ELEM(ELEM_W),
825b7e1c893Smrg                             SRC0_NEG(0),
826b7e1c893Smrg                             SRC1_SEL(256),
827b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
828b7e1c893Smrg                             SRC1_ELEM(ELEM_W),
829b7e1c893Smrg                             SRC1_NEG(0),
830b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
831b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
832b7e1c893Smrg                             LAST(1));
833b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
834b7e1c893Smrg                                 SRC0_ABS(0),
835b7e1c893Smrg                                 SRC1_ABS(0),
836b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
837b7e1c893Smrg                                 UPDATE_PRED(0),
838b7e1c893Smrg                                 WRITE_MASK(0),
839b7e1c893Smrg                                 FOG_MERGE(0),
840b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
841b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
842b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_021),
843b7e1c893Smrg                                 DST_GPR(0),
844b7e1c893Smrg                                 DST_REL(ABSOLUTE),
845b7e1c893Smrg                                 DST_ELEM(ELEM_W),
846b7e1c893Smrg                                 CLAMP(1));
847b7e1c893Smrg    /* 12 */
848b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
849b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
850b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
851b7e1c893Smrg                             SRC0_NEG(0),
852b7e1c893Smrg                             SRC1_SEL(257),
853b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
854b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
855b7e1c893Smrg                             SRC1_NEG(0),
856b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
857b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
858b7e1c893Smrg                             LAST(0));
859b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
860b7e1c893Smrg                                 SRC0_ABS(0),
861b7e1c893Smrg                                 SRC1_ABS(0),
862b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
863b7e1c893Smrg                                 UPDATE_PRED(0),
864b7e1c893Smrg                                 WRITE_MASK(0),
865b7e1c893Smrg                                 FOG_MERGE(0),
866b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
867b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
868b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
869b7e1c893Smrg                                 DST_GPR(0),
870b7e1c893Smrg                                 DST_REL(ABSOLUTE),
871b7e1c893Smrg                                 DST_ELEM(ELEM_X),
872b7e1c893Smrg                                 CLAMP(1));
873b7e1c893Smrg    /* 13 */
874b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
875b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
876b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
877b7e1c893Smrg                             SRC0_NEG(0),
878b7e1c893Smrg                             SRC1_SEL(257),
879b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
880b7e1c893Smrg                             SRC1_ELEM(ELEM_Y),
881b7e1c893Smrg                             SRC1_NEG(0),
882b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
883b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
884b7e1c893Smrg                             LAST(0));
885b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
886b7e1c893Smrg                                 SRC0_ABS(0),
887b7e1c893Smrg                                 SRC1_ABS(0),
888b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
889b7e1c893Smrg                                 UPDATE_PRED(0),
890b7e1c893Smrg                                 WRITE_MASK(1),
891b7e1c893Smrg                                 FOG_MERGE(0),
892b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
893b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
894b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
895b7e1c893Smrg                                 DST_GPR(2),
896b7e1c893Smrg                                 DST_REL(ABSOLUTE),
897b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
898b7e1c893Smrg                                 CLAMP(1));
899b7e1c893Smrg    /* 14 */
900b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
901b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
902b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
903b7e1c893Smrg                             SRC0_NEG(0),
904b7e1c893Smrg                             SRC1_SEL(257),
905b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
906b7e1c893Smrg                             SRC1_ELEM(ELEM_Z),
907b7e1c893Smrg                             SRC1_NEG(0),
908b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
909b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
910b7e1c893Smrg                             LAST(0));
911b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
912b7e1c893Smrg                                 SRC0_ABS(0),
913b7e1c893Smrg                                 SRC1_ABS(0),
914b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
915b7e1c893Smrg                                 UPDATE_PRED(0),
916b7e1c893Smrg                                 WRITE_MASK(0),
917b7e1c893Smrg                                 FOG_MERGE(0),
918b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
919b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
920b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
921b7e1c893Smrg                                 DST_GPR(0),
922b7e1c893Smrg                                 DST_REL(ABSOLUTE),
923b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
924b7e1c893Smrg                                 CLAMP(1));
925b7e1c893Smrg    /* 15 */
926b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
927b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
928b7e1c893Smrg                             SRC0_ELEM(ELEM_W),
929b7e1c893Smrg                             SRC0_NEG(0),
930b7e1c893Smrg                             SRC1_SEL(257),
931b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
932b7e1c893Smrg                             SRC1_ELEM(ELEM_W),
933b7e1c893Smrg                             SRC1_NEG(0),
934b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
935b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
936b7e1c893Smrg                             LAST(1));
937b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
938b7e1c893Smrg                                 SRC0_ABS(0),
939b7e1c893Smrg                                 SRC1_ABS(0),
940b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
941b7e1c893Smrg                                 UPDATE_PRED(0),
942b7e1c893Smrg                                 WRITE_MASK(0),
943b7e1c893Smrg                                 FOG_MERGE(0),
944b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
945b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
946b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_021),
947b7e1c893Smrg                                 DST_GPR(0),
948b7e1c893Smrg                                 DST_REL(ABSOLUTE),
949b7e1c893Smrg                                 DST_ELEM(ELEM_W),
950b7e1c893Smrg                                 CLAMP(1));
951b7e1c893Smrg    /* 16 */
952b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
953b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
954b7e1c893Smrg                             SRC0_ELEM(ELEM_X),
955b7e1c893Smrg                             SRC0_NEG(0),
956b7e1c893Smrg                             SRC1_SEL(258),
957b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
958b7e1c893Smrg                             SRC1_ELEM(ELEM_X),
959b7e1c893Smrg                             SRC1_NEG(0),
960b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
961b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
962b7e1c893Smrg                             LAST(0));
963b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
964b7e1c893Smrg                                 SRC0_ABS(0),
965b7e1c893Smrg                                 SRC1_ABS(0),
966b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
967b7e1c893Smrg                                 UPDATE_PRED(0),
968b7e1c893Smrg                                 WRITE_MASK(0),
969b7e1c893Smrg                                 FOG_MERGE(0),
970b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
971b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
972b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
973b7e1c893Smrg                                 DST_GPR(0),
974b7e1c893Smrg                                 DST_REL(ABSOLUTE),
975b7e1c893Smrg                                 DST_ELEM(ELEM_X),
976b7e1c893Smrg                                 CLAMP(1));
977b7e1c893Smrg    /* 17 */
978b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
979b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
980b7e1c893Smrg                             SRC0_ELEM(ELEM_Y),
981b7e1c893Smrg                             SRC0_NEG(0),
982b7e1c893Smrg                             SRC1_SEL(258),
983b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
984b7e1c893Smrg                             SRC1_ELEM(ELEM_Y),
985b7e1c893Smrg                             SRC1_NEG(0),
986b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
987b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
988b7e1c893Smrg                             LAST(0));
989b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
990b7e1c893Smrg                                 SRC0_ABS(0),
991b7e1c893Smrg                                 SRC1_ABS(0),
992b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
993b7e1c893Smrg                                 UPDATE_PRED(0),
994b7e1c893Smrg                                 WRITE_MASK(0),
995b7e1c893Smrg                                 FOG_MERGE(0),
996b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
997b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
998b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
999b7e1c893Smrg                                 DST_GPR(0),
1000b7e1c893Smrg                                 DST_REL(ABSOLUTE),
1001b7e1c893Smrg                                 DST_ELEM(ELEM_Y),
1002b7e1c893Smrg                                 CLAMP(1));
1003b7e1c893Smrg    /* 18 */
1004b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1005b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
1006b7e1c893Smrg                             SRC0_ELEM(ELEM_Z),
1007b7e1c893Smrg                             SRC0_NEG(0),
1008b7e1c893Smrg                             SRC1_SEL(258),
1009b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
1010b7e1c893Smrg                             SRC1_ELEM(ELEM_Z),
1011b7e1c893Smrg                             SRC1_NEG(0),
1012b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1013b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1014b7e1c893Smrg                             LAST(0));
1015b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1016b7e1c893Smrg                                 SRC0_ABS(0),
1017b7e1c893Smrg                                 SRC1_ABS(0),
1018b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
1019b7e1c893Smrg                                 UPDATE_PRED(0),
1020b7e1c893Smrg                                 WRITE_MASK(1),
1021b7e1c893Smrg                                 FOG_MERGE(0),
1022b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1023b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1024b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
1025b7e1c893Smrg                                 DST_GPR(2),
1026b7e1c893Smrg                                 DST_REL(ABSOLUTE),
1027b7e1c893Smrg                                 DST_ELEM(ELEM_Z),
1028b7e1c893Smrg                                 CLAMP(1));
1029b7e1c893Smrg    /* 19 */
1030b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1031b7e1c893Smrg                             SRC0_REL(ABSOLUTE),
1032b7e1c893Smrg                             SRC0_ELEM(ELEM_W),
1033b7e1c893Smrg                             SRC0_NEG(0),
1034b7e1c893Smrg                             SRC1_SEL(258),
1035b7e1c893Smrg                             SRC1_REL(ABSOLUTE),
1036b7e1c893Smrg                             SRC1_ELEM(ELEM_W),
1037b7e1c893Smrg                             SRC1_NEG(0),
1038b7e1c893Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1039b7e1c893Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1040b7e1c893Smrg                             LAST(1));
1041b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1042b7e1c893Smrg                                 SRC0_ABS(0),
1043b7e1c893Smrg                                 SRC1_ABS(0),
1044b7e1c893Smrg                                 UPDATE_EXECUTE_MASK(0),
1045b7e1c893Smrg                                 UPDATE_PRED(0),
1046b7e1c893Smrg                                 WRITE_MASK(0),
1047b7e1c893Smrg                                 FOG_MERGE(0),
1048b7e1c893Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1049b7e1c893Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1050b7e1c893Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_021),
1051b7e1c893Smrg                                 DST_GPR(0),
1052b7e1c893Smrg                                 DST_REL(ABSOLUTE),
1053b7e1c893Smrg                                 DST_ELEM(ELEM_W),
1054b7e1c893Smrg                                 CLAMP(1));
1055b7e1c893Smrg    /* 20 */
1056b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(22));
1057b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1058b7e1c893Smrg                            CF_CONST(0),
1059b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
1060b7e1c893Smrg                            I_COUNT(3),
1061b7e1c893Smrg                            CALL_COUNT(0),
1062b7e1c893Smrg                            END_OF_PROGRAM(0),
1063b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1064b7e1c893Smrg                            CF_INST(SQ_CF_INST_TEX),
1065b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1066b7e1c893Smrg                            BARRIER(1));
1067b7e1c893Smrg    /* 21 */
1068b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1069b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1070b7e1c893Smrg			    CF_CONST(0),
1071b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1072b7e1c893Smrg			    I_COUNT(0),
1073b7e1c893Smrg			    CALL_COUNT(0),
1074b7e1c893Smrg			    END_OF_PROGRAM(0),
1075b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1076b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1077b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1078b7e1c893Smrg			    BARRIER(1));
1079b7e1c893Smrg    /* 22/23 */
1080b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1081b7e1c893Smrg                             BC_FRAC_MODE(0),
1082b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1083b7e1c893Smrg                             RESOURCE_ID(0),
1084b7e1c893Smrg                             SRC_GPR(0),
1085b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1086b7e1c893Smrg                             R7xx_ALT_CONST(0));
1087b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1088b7e1c893Smrg                             DST_REL(ABSOLUTE),
1089b7e1c893Smrg                             DST_SEL_X(SQ_SEL_X),
1090b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1091b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1092b7e1c893Smrg                             DST_SEL_W(SQ_SEL_1),
1093b7e1c893Smrg                             LOD_BIAS(0),
1094b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1095b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1096b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1097b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1098b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1099b7e1c893Smrg                             OFFSET_Y(0),
1100b7e1c893Smrg                             OFFSET_Z(0),
1101b7e1c893Smrg                             SAMPLER_ID(0),
1102b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1103b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1104b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1105b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1106b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1107b7e1c893Smrg    /* 24/25 */
1108b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1109b7e1c893Smrg                             BC_FRAC_MODE(0),
1110b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1111b7e1c893Smrg                             RESOURCE_ID(1),
1112b7e1c893Smrg                             SRC_GPR(0),
1113b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1114b7e1c893Smrg                             R7xx_ALT_CONST(0));
1115b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1116b7e1c893Smrg                             DST_REL(ABSOLUTE),
1117b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1118b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1119b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_X),
1120b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1121b7e1c893Smrg                             LOD_BIAS(0),
1122b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1123b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1124b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1125b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1126b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1127b7e1c893Smrg                             OFFSET_Y(0),
1128b7e1c893Smrg                             OFFSET_Z(0),
1129b7e1c893Smrg                             SAMPLER_ID(1),
1130b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1131b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1132b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1133b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1134b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1135b7e1c893Smrg    /* 26/27 */
1136b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1137b7e1c893Smrg                             BC_FRAC_MODE(0),
1138b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1139b7e1c893Smrg                             RESOURCE_ID(2),
1140b7e1c893Smrg                             SRC_GPR(0),
1141b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1142b7e1c893Smrg                             R7xx_ALT_CONST(0));
1143b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1144b7e1c893Smrg                             DST_REL(ABSOLUTE),
1145b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1146b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_X),
1147b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1148b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1149b7e1c893Smrg                             LOD_BIAS(0),
1150b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1151b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1152b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1153b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1154b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1155b7e1c893Smrg                             OFFSET_Y(0),
1156b7e1c893Smrg                             OFFSET_Z(0),
1157b7e1c893Smrg                             SAMPLER_ID(2),
1158b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1159b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1160b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1161b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1162b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1163b7e1c893Smrg    /* 28 */
1164b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(30));
1165b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1166b7e1c893Smrg                            CF_CONST(0),
1167b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
1168b7e1c893Smrg                            I_COUNT(2),
1169b7e1c893Smrg                            CALL_COUNT(0),
1170b7e1c893Smrg                            END_OF_PROGRAM(0),
1171b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1172b7e1c893Smrg                            CF_INST(SQ_CF_INST_TEX),
1173b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1174b7e1c893Smrg                            BARRIER(1));
1175b7e1c893Smrg    /* 29 */
1176b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1177b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1178b7e1c893Smrg			    CF_CONST(0),
1179b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1180b7e1c893Smrg			    I_COUNT(0),
1181b7e1c893Smrg			    CALL_COUNT(0),
1182b7e1c893Smrg			    END_OF_PROGRAM(0),
1183b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1184b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1185b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1186b7e1c893Smrg			    BARRIER(1));
1187b7e1c893Smrg    /* 30/31 */
1188b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1189b7e1c893Smrg                             BC_FRAC_MODE(0),
1190b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1191b7e1c893Smrg                             RESOURCE_ID(0),
1192b7e1c893Smrg                             SRC_GPR(0),
1193b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1194b7e1c893Smrg                             R7xx_ALT_CONST(0));
1195b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1196b7e1c893Smrg                             DST_REL(ABSOLUTE),
1197b7e1c893Smrg                             DST_SEL_X(SQ_SEL_X),
1198b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1199b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1200b7e1c893Smrg                             DST_SEL_W(SQ_SEL_1),
1201b7e1c893Smrg                             LOD_BIAS(0),
1202b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1203b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1204b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1205b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1206b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1207b7e1c893Smrg                             OFFSET_Y(0),
1208b7e1c893Smrg                             OFFSET_Z(0),
1209b7e1c893Smrg                             SAMPLER_ID(0),
1210b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1211b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1212b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1213b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1214b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1215b7e1c893Smrg    /* 32/33 */
1216b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1217b7e1c893Smrg                             BC_FRAC_MODE(0),
1218b7e1c893Smrg                             FETCH_WHOLE_QUAD(0),
1219b7e1c893Smrg                             RESOURCE_ID(1),
1220b7e1c893Smrg                             SRC_GPR(0),
1221b7e1c893Smrg                             SRC_REL(ABSOLUTE),
1222b7e1c893Smrg                             R7xx_ALT_CONST(0));
1223b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1224b7e1c893Smrg                             DST_REL(ABSOLUTE),
1225b7e1c893Smrg                             DST_SEL_X(SQ_SEL_MASK),
1226b7e1c893Smrg                             DST_SEL_Y(SQ_SEL_X),
1227b7e1c893Smrg                             DST_SEL_Z(SQ_SEL_Y),
1228b7e1c893Smrg                             DST_SEL_W(SQ_SEL_MASK),
1229b7e1c893Smrg                             LOD_BIAS(0),
1230b7e1c893Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1231b7e1c893Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1232b7e1c893Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1233b7e1c893Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1234b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1235b7e1c893Smrg                             OFFSET_Y(0),
1236b7e1c893Smrg                             OFFSET_Z(0),
1237b7e1c893Smrg                             SAMPLER_ID(1),
1238b7e1c893Smrg                             SRC_SEL_X(SQ_SEL_X),
1239b7e1c893Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1240b7e1c893Smrg                             SRC_SEL_Z(SQ_SEL_0),
1241b7e1c893Smrg                             SRC_SEL_W(SQ_SEL_1));
1242b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1243b7e1c893Smrg
1244b7e1c893Smrg    return i;
1245b7e1c893Smrg}
1246b7e1c893Smrg
1247b7e1c893Smrg/* comp mask ps --------------------------------------- */
1248b7e1c893Smrgint R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* shader)
1249b7e1c893Smrg{
1250b7e1c893Smrg    int i = 0;
1251b7e1c893Smrg
1252b7e1c893Smrg    /* 0 */
1253b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(8));
1254b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1255b7e1c893Smrg			    CF_CONST(0),
1256b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1257b7e1c893Smrg			    I_COUNT(2),
1258b7e1c893Smrg			    CALL_COUNT(0),
1259b7e1c893Smrg			    END_OF_PROGRAM(0),
1260b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1261b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
1262b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1263b7e1c893Smrg			    BARRIER(1));
1264b7e1c893Smrg
1265b7e1c893Smrg    /* 1 */
1266b7e1c893Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(3),
1267b7e1c893Smrg				KCACHE_BANK0(0),
1268b7e1c893Smrg				KCACHE_BANK1(0),
1269b7e1c893Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
1270b7e1c893Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1271b7e1c893Smrg				KCACHE_ADDR0(0),
1272b7e1c893Smrg				KCACHE_ADDR1(0),
1273b7e1c893Smrg				I_COUNT(4),
1274b7e1c893Smrg				USES_WATERFALL(0),
1275b7e1c893Smrg				CF_INST(SQ_CF_INST_ALU),
1276b7e1c893Smrg				WHOLE_QUAD_MODE(0),
1277b7e1c893Smrg				BARRIER(1));
1278b7e1c893Smrg
1279b7e1c893Smrg    /* 2 */
1280b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
1281b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
1282b7e1c893Smrg					  RW_GPR(2),
1283b7e1c893Smrg					  RW_REL(ABSOLUTE),
1284b7e1c893Smrg					  INDEX_GPR(0),
1285b7e1c893Smrg					  ELEM_SIZE(1));
1286b7e1c893Smrg
1287b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1288b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1289b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
1290b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
1291b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1292b7e1c893Smrg					       BURST_COUNT(1),
1293b7e1c893Smrg					       END_OF_PROGRAM(1),
1294b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1295b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1296b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1297b7e1c893Smrg					       BARRIER(1));
1298b7e1c893Smrg
1299b7e1c893Smrg    /* 3 - alu 0 */
1300b7e1c893Smrg    /* MUL gpr[2].x gpr[1].x gpr[0].x */
1301b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1302b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
1303b7e1c893Smrg			     SRC0_ELEM(ELEM_X),
1304b7e1c893Smrg			     SRC0_NEG(0),
1305b7e1c893Smrg			     SRC1_SEL(0),
1306b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
1307b7e1c893Smrg			     SRC1_ELEM(ELEM_X),
1308b7e1c893Smrg			     SRC1_NEG(0),
1309b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
1310b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
1311b7e1c893Smrg			     LAST(0));
1312b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1313b7e1c893Smrg				 SRC0_ABS(0),
1314b7e1c893Smrg				 SRC1_ABS(0),
1315b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
1316b7e1c893Smrg				 UPDATE_PRED(0),
1317b7e1c893Smrg				 WRITE_MASK(1),
1318b7e1c893Smrg				 FOG_MERGE(0),
1319b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
1320b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MUL),
1321b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1322b7e1c893Smrg				 DST_GPR(2),
1323b7e1c893Smrg				 DST_REL(ABSOLUTE),
1324b7e1c893Smrg				 DST_ELEM(ELEM_X),
1325b7e1c893Smrg				 CLAMP(1));
1326b7e1c893Smrg    /* 4 - alu 1 */
1327b7e1c893Smrg    /* MUL gpr[2].y gpr[1].y gpr[0].y */
1328b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1329b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
1330b7e1c893Smrg			     SRC0_ELEM(ELEM_Y),
1331b7e1c893Smrg			     SRC0_NEG(0),
1332b7e1c893Smrg			     SRC1_SEL(0),
1333b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
1334b7e1c893Smrg			     SRC1_ELEM(ELEM_Y),
1335b7e1c893Smrg			     SRC1_NEG(0),
1336b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
1337b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
1338b7e1c893Smrg			     LAST(0));
1339b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1340b7e1c893Smrg				 SRC0_ABS(0),
1341b7e1c893Smrg				 SRC1_ABS(0),
1342b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
1343b7e1c893Smrg				 UPDATE_PRED(0),
1344b7e1c893Smrg				 WRITE_MASK(1),
1345b7e1c893Smrg				 FOG_MERGE(0),
1346b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
1347b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MUL),
1348b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1349b7e1c893Smrg				 DST_GPR(2),
1350b7e1c893Smrg				 DST_REL(ABSOLUTE),
1351b7e1c893Smrg				 DST_ELEM(ELEM_Y),
1352b7e1c893Smrg				 CLAMP(1));
1353b7e1c893Smrg    /* 5 - alu 2 */
1354b7e1c893Smrg    /* MUL gpr[2].z gpr[1].z gpr[0].z */
1355b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1356b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
1357b7e1c893Smrg			     SRC0_ELEM(ELEM_Z),
1358b7e1c893Smrg			     SRC0_NEG(0),
1359b7e1c893Smrg			     SRC1_SEL(0),
1360b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
1361b7e1c893Smrg			     SRC1_ELEM(ELEM_Z),
1362b7e1c893Smrg			     SRC1_NEG(0),
1363b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
1364b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
1365b7e1c893Smrg			     LAST(0));
1366b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1367b7e1c893Smrg				 SRC0_ABS(0),
1368b7e1c893Smrg				 SRC1_ABS(0),
1369b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
1370b7e1c893Smrg				 UPDATE_PRED(0),
1371b7e1c893Smrg				 WRITE_MASK(1),
1372b7e1c893Smrg				 FOG_MERGE(0),
1373b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
1374b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MUL),
1375b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1376b7e1c893Smrg				 DST_GPR(2),
1377b7e1c893Smrg				 DST_REL(ABSOLUTE),
1378b7e1c893Smrg				 DST_ELEM(ELEM_Z),
1379b7e1c893Smrg				 CLAMP(1));
1380b7e1c893Smrg    /* 6 - alu 3 */
1381b7e1c893Smrg    /* MUL gpr[2].w gpr[1].w gpr[0].w */
1382b7e1c893Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
1383b7e1c893Smrg			     SRC0_REL(ABSOLUTE),
1384b7e1c893Smrg			     SRC0_ELEM(ELEM_W),
1385b7e1c893Smrg			     SRC0_NEG(0),
1386b7e1c893Smrg			     SRC1_SEL(0),
1387b7e1c893Smrg			     SRC1_REL(ABSOLUTE),
1388b7e1c893Smrg			     SRC1_ELEM(ELEM_W),
1389b7e1c893Smrg			     SRC1_NEG(0),
1390b7e1c893Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
1391b7e1c893Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
1392b7e1c893Smrg			     LAST(1));
1393b7e1c893Smrg    shader[i++] = ALU_DWORD1_OP2(ChipSet,
1394b7e1c893Smrg				 SRC0_ABS(0),
1395b7e1c893Smrg				 SRC1_ABS(0),
1396b7e1c893Smrg				 UPDATE_EXECUTE_MASK(0),
1397b7e1c893Smrg				 UPDATE_PRED(0),
1398b7e1c893Smrg				 WRITE_MASK(1),
1399b7e1c893Smrg				 FOG_MERGE(0),
1400b7e1c893Smrg				 OMOD(SQ_ALU_OMOD_OFF),
1401b7e1c893Smrg				 ALU_INST(SQ_OP2_INST_MUL),
1402b7e1c893Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
1403b7e1c893Smrg				 DST_GPR(2),
1404b7e1c893Smrg				 DST_REL(ABSOLUTE),
1405b7e1c893Smrg				 DST_ELEM(ELEM_W),
1406b7e1c893Smrg				 CLAMP(1));
1407b7e1c893Smrg    /* 7 */
1408b7e1c893Smrg    shader[i++] = 0x00000000;
1409b7e1c893Smrg    shader[i++] = 0x00000000;
1410b7e1c893Smrg
1411b7e1c893Smrg    /* 8/9 - src */
1412b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1413b7e1c893Smrg			     BC_FRAC_MODE(0),
1414b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
1415b7e1c893Smrg			     RESOURCE_ID(0),
1416b7e1c893Smrg			     SRC_GPR(0),
1417b7e1c893Smrg			     SRC_REL(ABSOLUTE),
1418b7e1c893Smrg			     R7xx_ALT_CONST(0));
1419b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
1420b7e1c893Smrg			     DST_REL(ABSOLUTE),
1421b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X),
1422b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y),
1423b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z),
1424b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W),
1425b7e1c893Smrg			     LOD_BIAS(0),
1426b7e1c893Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
1427b7e1c893Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
1428b7e1c893Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
1429b7e1c893Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
1430b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1431b7e1c893Smrg			     OFFSET_Y(0),
1432b7e1c893Smrg			     OFFSET_Z(0),
1433b7e1c893Smrg			     SAMPLER_ID(0),
1434b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
1435b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
1436b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
1437b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
1438b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1439b7e1c893Smrg    /* 10/11 - mask */
1440b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1441b7e1c893Smrg			     BC_FRAC_MODE(0),
1442b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
1443b7e1c893Smrg			     RESOURCE_ID(1),
1444b7e1c893Smrg			     SRC_GPR(1),
1445b7e1c893Smrg			     SRC_REL(ABSOLUTE),
1446b7e1c893Smrg			     R7xx_ALT_CONST(0));
1447b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1448b7e1c893Smrg			     DST_REL(ABSOLUTE),
1449b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X),
1450b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y),
1451b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z),
1452b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W),
1453b7e1c893Smrg			     LOD_BIAS(0),
1454b7e1c893Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
1455b7e1c893Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
1456b7e1c893Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
1457b7e1c893Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
1458b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1459b7e1c893Smrg			     OFFSET_Y(0),
1460b7e1c893Smrg			     OFFSET_Z(0),
1461b7e1c893Smrg			     SAMPLER_ID(1),
1462b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
1463b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
1464b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
1465b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
1466b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1467b7e1c893Smrg
1468b7e1c893Smrg    return i;
1469b7e1c893Smrg}
1470b7e1c893Smrg
1471b7e1c893Smrg/* comp vs --------------------------------------- */
1472b7e1c893Smrgint R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1473b7e1c893Smrg{
1474b7e1c893Smrg    int i = 0;
1475b7e1c893Smrg
1476b7e1c893Smrg    /* 0 */
1477b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(3));
1478b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1479b7e1c893Smrg                            CF_CONST(0),
1480b7e1c893Smrg                            COND(SQ_CF_COND_BOOL),
1481b7e1c893Smrg                            I_COUNT(0),
1482b7e1c893Smrg                            CALL_COUNT(0),
1483b7e1c893Smrg                            END_OF_PROGRAM(0),
1484b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1485b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
1486b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1487b7e1c893Smrg                            BARRIER(0));
1488b7e1c893Smrg    /* 1 */
1489b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(14));
1490b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1491b7e1c893Smrg                            CF_CONST(0),
1492b7e1c893Smrg                            COND(SQ_CF_COND_NOT_BOOL),
1493b7e1c893Smrg                            I_COUNT(0),
1494b7e1c893Smrg                            CALL_COUNT(0),
1495b7e1c893Smrg                            END_OF_PROGRAM(0),
1496b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1497b7e1c893Smrg                            CF_INST(SQ_CF_INST_CALL),
1498b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1499b7e1c893Smrg                            BARRIER(0));
1500b7e1c893Smrg    /* 2 */
1501b7e1c893Smrg    shader[i++] = CF_DWORD0(0);
1502b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1503b7e1c893Smrg                            CF_CONST(0),
1504b7e1c893Smrg                            COND(SQ_CF_COND_ACTIVE),
1505b7e1c893Smrg                            I_COUNT(0),
1506b7e1c893Smrg                            CALL_COUNT(0),
1507b7e1c893Smrg                            END_OF_PROGRAM(1),
1508b7e1c893Smrg                            VALID_PIXEL_MODE(0),
1509b7e1c893Smrg                            CF_INST(SQ_CF_INST_NOP),
1510b7e1c893Smrg                            WHOLE_QUAD_MODE(0),
1511b7e1c893Smrg                            BARRIER(1));
1512b7e1c893Smrg    /* 3 - mask sub */
1513b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(8));
1514b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1515b7e1c893Smrg			    CF_CONST(0),
1516b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1517b7e1c893Smrg			    I_COUNT(3),
1518b7e1c893Smrg			    CALL_COUNT(0),
1519b7e1c893Smrg			    END_OF_PROGRAM(0),
1520b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1521b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
1522b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1523b7e1c893Smrg			    BARRIER(1));
1524b7e1c893Smrg    /* 4 - dst */
1525b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1526b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
1527b7e1c893Smrg					  RW_GPR(2),
1528b7e1c893Smrg					  RW_REL(ABSOLUTE),
1529b7e1c893Smrg					  INDEX_GPR(0),
1530b7e1c893Smrg					  ELEM_SIZE(0));
1531b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1532b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1533b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
1534b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
1535b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1536b7e1c893Smrg					       BURST_COUNT(1),
1537b7e1c893Smrg					       END_OF_PROGRAM(0),
1538b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1539b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1540b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1541b7e1c893Smrg					       BARRIER(1));
1542b7e1c893Smrg    /* 5 - src */
1543b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1544b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1545b7e1c893Smrg					  RW_GPR(1),
1546b7e1c893Smrg					  RW_REL(ABSOLUTE),
1547b7e1c893Smrg					  INDEX_GPR(0),
1548b7e1c893Smrg					  ELEM_SIZE(0));
1549b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1550b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1551b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
1552b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
1553b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1554b7e1c893Smrg					       BURST_COUNT(1),
1555b7e1c893Smrg					       END_OF_PROGRAM(0),
1556b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1557b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT),
1558b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1559b7e1c893Smrg					       BARRIER(0));
1560b7e1c893Smrg    /* 6 - mask */
1561b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1562b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1563b7e1c893Smrg					  RW_GPR(0),
1564b7e1c893Smrg					  RW_REL(ABSOLUTE),
1565b7e1c893Smrg					  INDEX_GPR(0),
1566b7e1c893Smrg					  ELEM_SIZE(0));
1567b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1568b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1569b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
1570b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
1571b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1572b7e1c893Smrg					       BURST_COUNT(1),
1573b7e1c893Smrg					       END_OF_PROGRAM(0),
1574b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1575b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1576b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1577b7e1c893Smrg					       BARRIER(0));
1578b7e1c893Smrg    /* 7 */
1579b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1580b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1581b7e1c893Smrg			    CF_CONST(0),
1582b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1583b7e1c893Smrg			    I_COUNT(0),
1584b7e1c893Smrg			    CALL_COUNT(0),
1585b7e1c893Smrg			    END_OF_PROGRAM(0),
1586b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1587b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1588b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1589b7e1c893Smrg			    BARRIER(1));
1590b7e1c893Smrg    /* 8/9 - dst */
1591b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1592b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1593b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
1594b7e1c893Smrg			     BUFFER_ID(0),
1595b7e1c893Smrg			     SRC_GPR(0),
1596b7e1c893Smrg			     SRC_REL(ABSOLUTE),
1597b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
1598b7e1c893Smrg			     MEGA_FETCH_COUNT(24));
1599b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
1600b7e1c893Smrg				 DST_REL(0),
1601b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
1602b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
1603b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
1604b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
1605b7e1c893Smrg				 USE_CONST_FIELDS(0),
1606b7e1c893Smrg				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
1607b7e1c893Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
1608b7e1c893Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
1609b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1610b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
1611b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
1612b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
1613b7e1c893Smrg			     MEGA_FETCH(1));
1614b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
1615b7e1c893Smrg    /* 10/11 - src */
1616b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1617b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1618b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
1619b7e1c893Smrg			     BUFFER_ID(0),
1620b7e1c893Smrg			     SRC_GPR(0),
1621b7e1c893Smrg			     SRC_REL(ABSOLUTE),
1622b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
1623b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
1624b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
1625b7e1c893Smrg				 DST_REL(0),
1626b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
1627b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
1628b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
1629b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
1630b7e1c893Smrg				 USE_CONST_FIELDS(0),
1631b7e1c893Smrg				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
1632b7e1c893Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
1633b7e1c893Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
1634b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1635b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
1636b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
1637b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
1638b7e1c893Smrg			     MEGA_FETCH(0));
1639b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
1640b7e1c893Smrg    /* 12/13 - mask */
1641b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1642b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1643b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
1644b7e1c893Smrg			     BUFFER_ID(0),
1645b7e1c893Smrg			     SRC_GPR(0),
1646b7e1c893Smrg			     SRC_REL(ABSOLUTE),
1647b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
1648b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
1649b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
1650b7e1c893Smrg				 DST_REL(0),
1651b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
1652b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
1653b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
1654b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
1655b7e1c893Smrg				 USE_CONST_FIELDS(0),
1656b7e1c893Smrg				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
1657b7e1c893Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
1658b7e1c893Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
1659b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1660b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(16),
1661b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
1662b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
1663b7e1c893Smrg			     MEGA_FETCH(0));
1664b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
1665b7e1c893Smrg
1666b7e1c893Smrg    /* 14 - non-mask sub */
1667b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(18));
1668b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1669b7e1c893Smrg			    CF_CONST(0),
1670b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1671b7e1c893Smrg			    I_COUNT(2),
1672b7e1c893Smrg			    CALL_COUNT(0),
1673b7e1c893Smrg			    END_OF_PROGRAM(0),
1674b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1675b7e1c893Smrg			    CF_INST(SQ_CF_INST_VTX),
1676b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1677b7e1c893Smrg			    BARRIER(1));
1678b7e1c893Smrg    /* 15 - dst */
1679b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1680b7e1c893Smrg					  TYPE(SQ_EXPORT_POS),
1681b7e1c893Smrg					  RW_GPR(1),
1682b7e1c893Smrg					  RW_REL(ABSOLUTE),
1683b7e1c893Smrg					  INDEX_GPR(0),
1684b7e1c893Smrg					  ELEM_SIZE(0));
1685b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1686b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1687b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
1688b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
1689b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1690b7e1c893Smrg					       BURST_COUNT(0),
1691b7e1c893Smrg					       END_OF_PROGRAM(0),
1692b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1693b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1694b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1695b7e1c893Smrg					       BARRIER(1));
1696b7e1c893Smrg    /* 16 - src */
1697b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1698b7e1c893Smrg					  TYPE(SQ_EXPORT_PARAM),
1699b7e1c893Smrg					  RW_GPR(0),
1700b7e1c893Smrg					  RW_REL(ABSOLUTE),
1701b7e1c893Smrg					  INDEX_GPR(0),
1702b7e1c893Smrg					  ELEM_SIZE(0));
1703b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1704b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1705b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
1706b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
1707b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1708b7e1c893Smrg					       BURST_COUNT(0),
1709b7e1c893Smrg					       END_OF_PROGRAM(0),
1710b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1711b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1712b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1713b7e1c893Smrg					       BARRIER(0));
1714b7e1c893Smrg    /* 17 */
1715b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(0));
1716b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1717b7e1c893Smrg			    CF_CONST(0),
1718b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1719b7e1c893Smrg			    I_COUNT(0),
1720b7e1c893Smrg			    CALL_COUNT(0),
1721b7e1c893Smrg			    END_OF_PROGRAM(0),
1722b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1723b7e1c893Smrg			    CF_INST(SQ_CF_INST_RETURN),
1724b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1725b7e1c893Smrg			    BARRIER(1));
1726b7e1c893Smrg    /* 18/19 - dst */
1727b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1728b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1729b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
1730b7e1c893Smrg			     BUFFER_ID(0),
1731b7e1c893Smrg			     SRC_GPR(0),
1732b7e1c893Smrg			     SRC_REL(ABSOLUTE),
1733b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
1734b7e1c893Smrg			     MEGA_FETCH_COUNT(16));
1735b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
1736b7e1c893Smrg				 DST_REL(0),
1737b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
1738b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
1739b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
1740b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
1741b7e1c893Smrg				 USE_CONST_FIELDS(0),
1742b7e1c893Smrg				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
1743b7e1c893Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
1744b7e1c893Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
1745b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1746b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
1747b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
1748b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
1749b7e1c893Smrg			     MEGA_FETCH(1));
1750b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
1751b7e1c893Smrg    /* 20/21 - src */
1752b7e1c893Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
1753b7e1c893Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
1754b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
1755b7e1c893Smrg			     BUFFER_ID(0),
1756b7e1c893Smrg			     SRC_GPR(0),
1757b7e1c893Smrg			     SRC_REL(ABSOLUTE),
1758b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
1759b7e1c893Smrg			     MEGA_FETCH_COUNT(8));
1760b7e1c893Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
1761b7e1c893Smrg				 DST_REL(0),
1762b7e1c893Smrg				 DST_SEL_X(SQ_SEL_X),
1763b7e1c893Smrg				 DST_SEL_Y(SQ_SEL_Y),
1764b7e1c893Smrg				 DST_SEL_Z(SQ_SEL_0),
1765b7e1c893Smrg				 DST_SEL_W(SQ_SEL_1),
1766b7e1c893Smrg				 USE_CONST_FIELDS(0),
1767b7e1c893Smrg				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
1768b7e1c893Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
1769b7e1c893Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
1770b7e1c893Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
1771b7e1c893Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
1772b7e1c893Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
1773b7e1c893Smrg			     CONST_BUF_NO_STRIDE(0),
1774b7e1c893Smrg			     MEGA_FETCH(0));
1775b7e1c893Smrg    shader[i++] = VTX_DWORD_PAD;
1776b7e1c893Smrg
1777b7e1c893Smrg    return i;
1778b7e1c893Smrg}
1779b7e1c893Smrg
1780b7e1c893Smrg/* comp ps --------------------------------------- */
1781b7e1c893Smrgint R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
1782b7e1c893Smrg{
1783b7e1c893Smrg    int i = 0;
1784b7e1c893Smrg
1785b7e1c893Smrg    /* 0 */
1786b7e1c893Smrg    shader[i++] = CF_DWORD0(ADDR(2));
1787b7e1c893Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1788b7e1c893Smrg			    CF_CONST(0),
1789b7e1c893Smrg			    COND(SQ_CF_COND_ACTIVE),
1790b7e1c893Smrg			    I_COUNT(1),
1791b7e1c893Smrg			    CALL_COUNT(0),
1792b7e1c893Smrg			    END_OF_PROGRAM(0),
1793b7e1c893Smrg			    VALID_PIXEL_MODE(0),
1794b7e1c893Smrg			    CF_INST(SQ_CF_INST_TEX),
1795b7e1c893Smrg			    WHOLE_QUAD_MODE(0),
1796b7e1c893Smrg			    BARRIER(1));
1797b7e1c893Smrg    /* 1 */
1798b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
1799b7e1c893Smrg					  TYPE(SQ_EXPORT_PIXEL),
1800b7e1c893Smrg					  RW_GPR(0),
1801b7e1c893Smrg					  RW_REL(ABSOLUTE),
1802b7e1c893Smrg					  INDEX_GPR(0),
1803b7e1c893Smrg					  ELEM_SIZE(1));
1804b7e1c893Smrg
1805b7e1c893Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1806b7e1c893Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1807b7e1c893Smrg					       SRC_SEL_Z(SQ_SEL_Z),
1808b7e1c893Smrg					       SRC_SEL_W(SQ_SEL_W),
1809b7e1c893Smrg					       R6xx_ELEM_LOOP(0),
1810b7e1c893Smrg					       BURST_COUNT(1),
1811b7e1c893Smrg					       END_OF_PROGRAM(1),
1812b7e1c893Smrg					       VALID_PIXEL_MODE(0),
1813b7e1c893Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1814b7e1c893Smrg					       WHOLE_QUAD_MODE(0),
1815b7e1c893Smrg					       BARRIER(1));
1816b7e1c893Smrg
1817b7e1c893Smrg
1818b7e1c893Smrg    /* 2/3 - src */
1819b7e1c893Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1820b7e1c893Smrg			     BC_FRAC_MODE(0),
1821b7e1c893Smrg			     FETCH_WHOLE_QUAD(0),
1822b7e1c893Smrg			     RESOURCE_ID(0),
1823b7e1c893Smrg			     SRC_GPR(0),
1824b7e1c893Smrg			     SRC_REL(ABSOLUTE),
1825b7e1c893Smrg			     R7xx_ALT_CONST(0));
1826b7e1c893Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
1827b7e1c893Smrg			     DST_REL(ABSOLUTE),
1828b7e1c893Smrg			     DST_SEL_X(SQ_SEL_X),
1829b7e1c893Smrg			     DST_SEL_Y(SQ_SEL_Y),
1830b7e1c893Smrg			     DST_SEL_Z(SQ_SEL_Z),
1831b7e1c893Smrg			     DST_SEL_W(SQ_SEL_W),
1832b7e1c893Smrg			     LOD_BIAS(0),
1833b7e1c893Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
1834b7e1c893Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
1835b7e1c893Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
1836b7e1c893Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
1837b7e1c893Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1838b7e1c893Smrg			     OFFSET_Y(0),
1839b7e1c893Smrg			     OFFSET_Z(0),
1840b7e1c893Smrg			     SAMPLER_ID(0),
1841b7e1c893Smrg			     SRC_SEL_X(SQ_SEL_X),
1842b7e1c893Smrg			     SRC_SEL_Y(SQ_SEL_Y),
1843b7e1c893Smrg			     SRC_SEL_Z(SQ_SEL_0),
1844b7e1c893Smrg			     SRC_SEL_W(SQ_SEL_1));
1845b7e1c893Smrg    shader[i++] = TEX_DWORD_PAD;
1846b7e1c893Smrg
1847b7e1c893Smrg    return i;
1848b7e1c893Smrg}
1849