1de2362d3Smrg/*
2de2362d3Smrg * Copyright 2010 Advanced Micro Devices, Inc.
3de2362d3Smrg *
4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5de2362d3Smrg * copy of this software and associated documentation files (the "Software"),
6de2362d3Smrg * to deal in the Software without restriction, including without limitation
7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the
9de2362d3Smrg * Software is furnished to do so, subject to the following conditions:
10de2362d3Smrg *
11de2362d3Smrg * The above copyright notice and this permission notice (including the next
12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the
13de2362d3Smrg * Software.
14de2362d3Smrg *
15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21de2362d3Smrg * SOFTWARE.
22de2362d3Smrg *
23de2362d3Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24de2362d3Smrg *
25de2362d3Smrg */
26de2362d3Smrg
27de2362d3Smrg#ifdef HAVE_CONFIG_H
28de2362d3Smrg#include "config.h"
29de2362d3Smrg#endif
30de2362d3Smrg
31de2362d3Smrg#include "xf86.h"
32de2362d3Smrg
33de2362d3Smrg#include "evergreen_shader.h"
34de2362d3Smrg#include "evergreen_reg.h"
35de2362d3Smrg
36de2362d3Smrg/* solid vs --------------------------------------- */
37de2362d3Smrgint evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
38de2362d3Smrg{
39de2362d3Smrg    int i = 0;
40de2362d3Smrg
41de2362d3Smrg    /* 0 */
42de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(4),
43de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
44de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
45de2362d3Smrg			    CF_CONST(0),
46de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
47de2362d3Smrg			    I_COUNT(1),
48de2362d3Smrg			    VALID_PIXEL_MODE(0),
49de2362d3Smrg			    END_OF_PROGRAM(0),
50de2362d3Smrg			    CF_INST(SQ_CF_INST_VC),
51de2362d3Smrg			    WHOLE_QUAD_MODE(0),
52de2362d3Smrg			    BARRIER(1));
53de2362d3Smrg    /* 1 */
54de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
55de2362d3Smrg					  TYPE(SQ_EXPORT_POS),
56de2362d3Smrg					  RW_GPR(1),
57de2362d3Smrg					  RW_REL(ABSOLUTE),
58de2362d3Smrg					  INDEX_GPR(0),
59de2362d3Smrg					  ELEM_SIZE(0));
60de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
61de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
62de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
63de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
64de2362d3Smrg					       BURST_COUNT(1),
65de2362d3Smrg					       VALID_PIXEL_MODE(0),
66de2362d3Smrg					       END_OF_PROGRAM(0),
67de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
68de2362d3Smrg					       MARK(0),
69de2362d3Smrg					       BARRIER(1));
70de2362d3Smrg    /* 2 - always export a param whether it's used or not */
71de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
72de2362d3Smrg					  TYPE(SQ_EXPORT_PARAM),
73de2362d3Smrg					  RW_GPR(0),
74de2362d3Smrg					  RW_REL(ABSOLUTE),
75de2362d3Smrg					  INDEX_GPR(0),
76de2362d3Smrg					  ELEM_SIZE(0));
77de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
78de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
79de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
80de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
81de2362d3Smrg					       BURST_COUNT(0),
82de2362d3Smrg					       VALID_PIXEL_MODE(0),
83de2362d3Smrg					       END_OF_PROGRAM(1),
84de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
85de2362d3Smrg					       MARK(0),
86de2362d3Smrg					       BARRIER(0));
87de2362d3Smrg    /* 3 - padding */
88de2362d3Smrg    shader[i++] = 0x00000000;
89de2362d3Smrg    shader[i++] = 0x00000000;
90de2362d3Smrg    /* 4/5 */
91de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
92de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
93de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
94de2362d3Smrg			     BUFFER_ID(0),
95de2362d3Smrg			     SRC_GPR(0),
96de2362d3Smrg			     SRC_REL(ABSOLUTE),
97de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
98de2362d3Smrg			     MEGA_FETCH_COUNT(8));
99de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
100de2362d3Smrg				 DST_REL(0),
101de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
102de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
103de2362d3Smrg				 DST_SEL_Z(SQ_SEL_0),
104de2362d3Smrg				 DST_SEL_W(SQ_SEL_1),
105de2362d3Smrg				 USE_CONST_FIELDS(0),
106de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
107de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
108de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
109de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
110de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
111de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
112de2362d3Smrg			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
113de2362d3Smrg#else
114de2362d3Smrg			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
115de2362d3Smrg#endif
116de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
117de2362d3Smrg			     MEGA_FETCH(1),
118de2362d3Smrg			     ALT_CONST(0),
119de2362d3Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
120de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
121de2362d3Smrg
122de2362d3Smrg    return i;
123de2362d3Smrg}
124de2362d3Smrg
125de2362d3Smrg/* solid ps --------------------------------------- */
126de2362d3Smrgint evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
127de2362d3Smrg{
128de2362d3Smrg    int i = 0;
129de2362d3Smrg
130de2362d3Smrg    /* 0 */
131de2362d3Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(2),
132de2362d3Smrg				KCACHE_BANK0(0),
133de2362d3Smrg				KCACHE_BANK1(0),
134de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
135de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
136de2362d3Smrg				KCACHE_ADDR0(0),
137de2362d3Smrg				KCACHE_ADDR1(0),
138de2362d3Smrg				I_COUNT(4),
139de2362d3Smrg				ALT_CONST(0),
140de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
141de2362d3Smrg				WHOLE_QUAD_MODE(0),
142de2362d3Smrg				BARRIER(1));
143de2362d3Smrg    /* 1 */
144de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
145de2362d3Smrg					  TYPE(SQ_EXPORT_PIXEL),
146de2362d3Smrg					  RW_GPR(0),
147de2362d3Smrg					  RW_REL(ABSOLUTE),
148de2362d3Smrg					  INDEX_GPR(0),
149de2362d3Smrg					  ELEM_SIZE(1));
150de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
151de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
152de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
153de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
154de2362d3Smrg					       BURST_COUNT(1),
155de2362d3Smrg					       VALID_PIXEL_MODE(0),
156de2362d3Smrg					       END_OF_PROGRAM(1),
157de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
158de2362d3Smrg					       MARK(0),
159de2362d3Smrg					       BARRIER(1));
160de2362d3Smrg
161de2362d3Smrg    /* 2 */
162de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
163de2362d3Smrg			     SRC0_REL(ABSOLUTE),
164de2362d3Smrg			     SRC0_ELEM(ELEM_X),
165de2362d3Smrg			     SRC0_NEG(0),
166de2362d3Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
167de2362d3Smrg			     SRC1_REL(ABSOLUTE),
168de2362d3Smrg			     SRC1_ELEM(ELEM_X),
169de2362d3Smrg			     SRC1_NEG(0),
170de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
171de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
172de2362d3Smrg			     LAST(0));
173de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
174de2362d3Smrg				 SRC1_ABS(0),
175de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
176de2362d3Smrg				 UPDATE_PRED(0),
177de2362d3Smrg				 WRITE_MASK(1),
178de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
179de2362d3Smrg				 ALU_INST(SQ_OP2_INST_MOV),
180de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
181de2362d3Smrg				 DST_GPR(0),
182de2362d3Smrg				 DST_REL(ABSOLUTE),
183de2362d3Smrg				 DST_ELEM(ELEM_X),
184de2362d3Smrg				 CLAMP(1));
185de2362d3Smrg    /* 3 */
186de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
187de2362d3Smrg			     SRC0_REL(ABSOLUTE),
188de2362d3Smrg			     SRC0_ELEM(ELEM_Y),
189de2362d3Smrg			     SRC0_NEG(0),
190de2362d3Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
191de2362d3Smrg			     SRC1_REL(ABSOLUTE),
192de2362d3Smrg			     SRC1_ELEM(ELEM_Y),
193de2362d3Smrg			     SRC1_NEG(0),
194de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
195de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
196de2362d3Smrg			     LAST(0));
197de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
198de2362d3Smrg				 SRC1_ABS(0),
199de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
200de2362d3Smrg				 UPDATE_PRED(0),
201de2362d3Smrg				 WRITE_MASK(1),
202de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
203de2362d3Smrg				 ALU_INST(SQ_OP2_INST_MOV),
204de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
205de2362d3Smrg				 DST_GPR(0),
206de2362d3Smrg				 DST_REL(ABSOLUTE),
207de2362d3Smrg				 DST_ELEM(ELEM_Y),
208de2362d3Smrg				 CLAMP(1));
209de2362d3Smrg    /* 4 */
210de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
211de2362d3Smrg			     SRC0_REL(ABSOLUTE),
212de2362d3Smrg			     SRC0_ELEM(ELEM_Z),
213de2362d3Smrg			     SRC0_NEG(0),
214de2362d3Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
215de2362d3Smrg			     SRC1_REL(ABSOLUTE),
216de2362d3Smrg			     SRC1_ELEM(ELEM_Z),
217de2362d3Smrg			     SRC1_NEG(0),
218de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
219de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
220de2362d3Smrg			     LAST(0));
221de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
222de2362d3Smrg				 SRC1_ABS(0),
223de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
224de2362d3Smrg				 UPDATE_PRED(0),
225de2362d3Smrg				 WRITE_MASK(1),
226de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
227de2362d3Smrg				 ALU_INST(SQ_OP2_INST_MOV),
228de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
229de2362d3Smrg				 DST_GPR(0),
230de2362d3Smrg				 DST_REL(ABSOLUTE),
231de2362d3Smrg				 DST_ELEM(ELEM_Z),
232de2362d3Smrg				 CLAMP(1));
233de2362d3Smrg    /* 5 */
234de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
235de2362d3Smrg			     SRC0_REL(ABSOLUTE),
236de2362d3Smrg			     SRC0_ELEM(ELEM_W),
237de2362d3Smrg			     SRC0_NEG(0),
238de2362d3Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
239de2362d3Smrg			     SRC1_REL(ABSOLUTE),
240de2362d3Smrg			     SRC1_ELEM(ELEM_W),
241de2362d3Smrg			     SRC1_NEG(0),
242de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
243de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
244de2362d3Smrg			     LAST(1));
245de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
246de2362d3Smrg				 SRC1_ABS(0),
247de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
248de2362d3Smrg				 UPDATE_PRED(0),
249de2362d3Smrg				 WRITE_MASK(1),
250de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
251de2362d3Smrg				 ALU_INST(SQ_OP2_INST_MOV),
252de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
253de2362d3Smrg				 DST_GPR(0),
254de2362d3Smrg				 DST_REL(ABSOLUTE),
255de2362d3Smrg				 DST_ELEM(ELEM_W),
256de2362d3Smrg				 CLAMP(1));
257de2362d3Smrg
258de2362d3Smrg    return i;
259de2362d3Smrg}
260de2362d3Smrg
261de2362d3Smrg/* copy vs --------------------------------------- */
262de2362d3Smrgint evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
263de2362d3Smrg{
264de2362d3Smrg    int i = 0;
265de2362d3Smrg
266de2362d3Smrg    /* 0 */
267de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(4),
268de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
269de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
270de2362d3Smrg			    CF_CONST(0),
271de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
272de2362d3Smrg			    I_COUNT(2),
273de2362d3Smrg			    VALID_PIXEL_MODE(0),
274de2362d3Smrg			    END_OF_PROGRAM(0),
275de2362d3Smrg			    CF_INST(SQ_CF_INST_VC),
276de2362d3Smrg			    WHOLE_QUAD_MODE(0),
277de2362d3Smrg			    BARRIER(1));
278de2362d3Smrg    /* 1 */
279de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
280de2362d3Smrg					  TYPE(SQ_EXPORT_POS),
281de2362d3Smrg					  RW_GPR(1),
282de2362d3Smrg					  RW_REL(ABSOLUTE),
283de2362d3Smrg					  INDEX_GPR(0),
284de2362d3Smrg					  ELEM_SIZE(0));
285de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
286de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
287de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
288de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
289de2362d3Smrg					       BURST_COUNT(0),
290de2362d3Smrg					       VALID_PIXEL_MODE(0),
291de2362d3Smrg					       END_OF_PROGRAM(0),
292de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
293de2362d3Smrg					       MARK(0),
294de2362d3Smrg					       BARRIER(1));
295de2362d3Smrg    /* 2 */
296de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
297de2362d3Smrg					  TYPE(SQ_EXPORT_PARAM),
298de2362d3Smrg					  RW_GPR(0),
299de2362d3Smrg					  RW_REL(ABSOLUTE),
300de2362d3Smrg					  INDEX_GPR(0),
301de2362d3Smrg					  ELEM_SIZE(0));
302de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
303de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
304de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
305de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
306de2362d3Smrg					       BURST_COUNT(0),
307de2362d3Smrg					       VALID_PIXEL_MODE(0),
308de2362d3Smrg					       END_OF_PROGRAM(1),
309de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
310de2362d3Smrg					       MARK(0),
311de2362d3Smrg					       BARRIER(0));
312de2362d3Smrg    /* 3 */
313de2362d3Smrg    shader[i++] = 0x00000000;
314de2362d3Smrg    shader[i++] = 0x00000000;
315de2362d3Smrg    /* 4/5 */
316de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
317de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
318de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
319de2362d3Smrg			     BUFFER_ID(0),
320de2362d3Smrg			     SRC_GPR(0),
321de2362d3Smrg			     SRC_REL(ABSOLUTE),
322de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
323de2362d3Smrg			     MEGA_FETCH_COUNT(16));
324de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
325de2362d3Smrg				 DST_REL(0),
326de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
327de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
328de2362d3Smrg				 DST_SEL_Z(SQ_SEL_0),
329de2362d3Smrg				 DST_SEL_W(SQ_SEL_1),
330de2362d3Smrg				 USE_CONST_FIELDS(0),
331de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
332de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
333de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
334de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
335de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
336de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
337de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
338de2362d3Smrg#else
339de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
340de2362d3Smrg#endif
341de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
342de2362d3Smrg			     MEGA_FETCH(1),
343de2362d3Smrg			     ALT_CONST(0),
344de2362d3Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
345de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
346de2362d3Smrg    /* 6/7 */
347de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
348de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
349de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
350de2362d3Smrg			     BUFFER_ID(0),
351de2362d3Smrg			     SRC_GPR(0),
352de2362d3Smrg			     SRC_REL(ABSOLUTE),
353de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
354de2362d3Smrg			     MEGA_FETCH_COUNT(8));
355de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
356de2362d3Smrg				 DST_REL(0),
357de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
358de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
359de2362d3Smrg				 DST_SEL_Z(SQ_SEL_0),
360de2362d3Smrg				 DST_SEL_W(SQ_SEL_1),
361de2362d3Smrg				 USE_CONST_FIELDS(0),
362de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
363de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
364de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
365de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
366de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
367de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
368de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
369de2362d3Smrg#else
370de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
371de2362d3Smrg#endif
372de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
373de2362d3Smrg			     MEGA_FETCH(0),
374de2362d3Smrg			     ALT_CONST(0),
375de2362d3Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
376de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
377de2362d3Smrg
378de2362d3Smrg    return i;
379de2362d3Smrg}
380de2362d3Smrg
381de2362d3Smrg/* copy ps --------------------------------------- */
382de2362d3Smrgint evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
383de2362d3Smrg{
384de2362d3Smrg    int i = 0;
385de2362d3Smrg
386de2362d3Smrg    /* CF INST 0 */
387de2362d3Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(3),
388de2362d3Smrg				KCACHE_BANK0(0),
389de2362d3Smrg				KCACHE_BANK1(0),
390de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
391de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
392de2362d3Smrg				KCACHE_ADDR0(0),
393de2362d3Smrg				KCACHE_ADDR1(0),
394de2362d3Smrg				I_COUNT(4),
395de2362d3Smrg				ALT_CONST(0),
396de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
397de2362d3Smrg				WHOLE_QUAD_MODE(0),
398de2362d3Smrg				BARRIER(1));
399de2362d3Smrg    /* CF INST 1 */
400de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(8),
401de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
402de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
403de2362d3Smrg			    CF_CONST(0),
404de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
405de2362d3Smrg			    I_COUNT(1),
406de2362d3Smrg			    VALID_PIXEL_MODE(0),
407de2362d3Smrg			    END_OF_PROGRAM(0),
408de2362d3Smrg			    CF_INST(SQ_CF_INST_TC),
409de2362d3Smrg			    WHOLE_QUAD_MODE(0),
410de2362d3Smrg			    BARRIER(1));
411de2362d3Smrg    /* CF INST 2 */
412de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
413de2362d3Smrg					  TYPE(SQ_EXPORT_PIXEL),
414de2362d3Smrg					  RW_GPR(0),
415de2362d3Smrg					  RW_REL(ABSOLUTE),
416de2362d3Smrg					  INDEX_GPR(0),
417de2362d3Smrg					  ELEM_SIZE(1));
418de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
419de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
420de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_Z),
421de2362d3Smrg					       SRC_SEL_W(SQ_SEL_W),
422de2362d3Smrg					       BURST_COUNT(1),
423de2362d3Smrg					       VALID_PIXEL_MODE(0),
424de2362d3Smrg					       END_OF_PROGRAM(1),
425de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
426de2362d3Smrg					       MARK(0),
427de2362d3Smrg					       BARRIER(1));
428de2362d3Smrg
429de2362d3Smrg    /* 3 interpolate tex coords */
430de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
431de2362d3Smrg			     SRC0_REL(ABSOLUTE),
432de2362d3Smrg			     SRC0_ELEM(ELEM_Y),
433de2362d3Smrg			     SRC0_NEG(0),
434de2362d3Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
435de2362d3Smrg			     SRC1_REL(ABSOLUTE),
436de2362d3Smrg			     SRC1_ELEM(ELEM_X),
437de2362d3Smrg			     SRC1_NEG(0),
438de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
439de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
440de2362d3Smrg			     LAST(0));
441de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
442de2362d3Smrg				 SRC1_ABS(0),
443de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
444de2362d3Smrg				 UPDATE_PRED(0),
445de2362d3Smrg				 WRITE_MASK(1),
446de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
447de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
448de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
449de2362d3Smrg				 DST_GPR(0),
450de2362d3Smrg				 DST_REL(ABSOLUTE),
451de2362d3Smrg				 DST_ELEM(ELEM_X),
452de2362d3Smrg				 CLAMP(0));
453de2362d3Smrg    /* 4 */
454de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
455de2362d3Smrg			     SRC0_REL(ABSOLUTE),
456de2362d3Smrg			     SRC0_ELEM(ELEM_X),
457de2362d3Smrg			     SRC0_NEG(0),
458de2362d3Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
459de2362d3Smrg			     SRC1_REL(ABSOLUTE),
460de2362d3Smrg			     SRC1_ELEM(ELEM_X),
461de2362d3Smrg			     SRC1_NEG(0),
462de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
463de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
464de2362d3Smrg			     LAST(0));
465de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
466de2362d3Smrg				 SRC1_ABS(0),
467de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
468de2362d3Smrg				 UPDATE_PRED(0),
469de2362d3Smrg				 WRITE_MASK(1),
470de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
471de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
472de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
473de2362d3Smrg				 DST_GPR(0),
474de2362d3Smrg				 DST_REL(ABSOLUTE),
475de2362d3Smrg				 DST_ELEM(ELEM_Y),
476de2362d3Smrg				 CLAMP(0));
477de2362d3Smrg    /* 5 */
478de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
479de2362d3Smrg			     SRC0_REL(ABSOLUTE),
480de2362d3Smrg			     SRC0_ELEM(ELEM_Y),
481de2362d3Smrg			     SRC0_NEG(0),
482de2362d3Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
483de2362d3Smrg			     SRC1_REL(ABSOLUTE),
484de2362d3Smrg			     SRC1_ELEM(ELEM_X),
485de2362d3Smrg			     SRC1_NEG(0),
486de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
487de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
488de2362d3Smrg			     LAST(0));
489de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
490de2362d3Smrg				 SRC1_ABS(0),
491de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
492de2362d3Smrg				 UPDATE_PRED(0),
493de2362d3Smrg				 WRITE_MASK(0),
494de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
495de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
496de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
497de2362d3Smrg				 DST_GPR(0),
498de2362d3Smrg				 DST_REL(ABSOLUTE),
499de2362d3Smrg				 DST_ELEM(ELEM_Z),
500de2362d3Smrg				 CLAMP(0));
501de2362d3Smrg    /* 6 */
502de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
503de2362d3Smrg			     SRC0_REL(ABSOLUTE),
504de2362d3Smrg			     SRC0_ELEM(ELEM_X),
505de2362d3Smrg			     SRC0_NEG(0),
506de2362d3Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
507de2362d3Smrg			     SRC1_REL(ABSOLUTE),
508de2362d3Smrg			     SRC1_ELEM(ELEM_X),
509de2362d3Smrg			     SRC1_NEG(0),
510de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
511de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
512de2362d3Smrg			     LAST(1));
513de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
514de2362d3Smrg				 SRC1_ABS(0),
515de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
516de2362d3Smrg				 UPDATE_PRED(0),
517de2362d3Smrg				 WRITE_MASK(0),
518de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
519de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
520de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
521de2362d3Smrg				 DST_GPR(0),
522de2362d3Smrg				 DST_REL(ABSOLUTE),
523de2362d3Smrg				 DST_ELEM(ELEM_W),
524de2362d3Smrg				 CLAMP(0));
525de2362d3Smrg
526de2362d3Smrg    /* 7 */
527de2362d3Smrg    shader[i++] = 0x00000000;
528de2362d3Smrg    shader[i++] = 0x00000000;
529de2362d3Smrg
530de2362d3Smrg    /* 8/9 TEX INST 0 */
531de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
532de2362d3Smrg			     INST_MOD(0),
533de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
534de2362d3Smrg			     RESOURCE_ID(0),
535de2362d3Smrg			     SRC_GPR(0),
536de2362d3Smrg			     SRC_REL(ABSOLUTE),
537de2362d3Smrg			     ALT_CONST(0),
538de2362d3Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
539de2362d3Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
540de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
541de2362d3Smrg			     DST_REL(ABSOLUTE),
542de2362d3Smrg			     DST_SEL_X(SQ_SEL_X), /* R */
543de2362d3Smrg			     DST_SEL_Y(SQ_SEL_Y), /* G */
544de2362d3Smrg			     DST_SEL_Z(SQ_SEL_Z), /* B */
545de2362d3Smrg			     DST_SEL_W(SQ_SEL_W), /* A */
546de2362d3Smrg			     LOD_BIAS(0),
547de2362d3Smrg			     COORD_TYPE_X(TEX_UNNORMALIZED),
548de2362d3Smrg			     COORD_TYPE_Y(TEX_UNNORMALIZED),
549de2362d3Smrg			     COORD_TYPE_Z(TEX_UNNORMALIZED),
550de2362d3Smrg			     COORD_TYPE_W(TEX_UNNORMALIZED));
551de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
552de2362d3Smrg			     OFFSET_Y(0),
553de2362d3Smrg			     OFFSET_Z(0),
554de2362d3Smrg			     SAMPLER_ID(0),
555de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
556de2362d3Smrg			     SRC_SEL_Y(SQ_SEL_Y),
557de2362d3Smrg			     SRC_SEL_Z(SQ_SEL_0),
558de2362d3Smrg			     SRC_SEL_W(SQ_SEL_1));
559de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
560de2362d3Smrg
561de2362d3Smrg    return i;
562de2362d3Smrg}
563de2362d3Smrg
564de2362d3Smrgint evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
565de2362d3Smrg{
566de2362d3Smrg    int i = 0;
567de2362d3Smrg
568de2362d3Smrg    /* 0 */
569de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(6),
570de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
571de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
572de2362d3Smrg                            CF_CONST(0),
573de2362d3Smrg                            COND(SQ_CF_COND_ACTIVE),
574de2362d3Smrg                            I_COUNT(2),
575de2362d3Smrg                            VALID_PIXEL_MODE(0),
576de2362d3Smrg                            END_OF_PROGRAM(0),
577de2362d3Smrg                            CF_INST(SQ_CF_INST_VC),
578de2362d3Smrg                            WHOLE_QUAD_MODE(0),
579de2362d3Smrg                            BARRIER(1));
580de2362d3Smrg
581de2362d3Smrg    /* 1 - ALU */
582de2362d3Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
583de2362d3Smrg				KCACHE_BANK0(0),
584de2362d3Smrg				KCACHE_BANK1(0),
585de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
586de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
587de2362d3Smrg				KCACHE_ADDR0(0),
588de2362d3Smrg				KCACHE_ADDR1(0),
589de2362d3Smrg				I_COUNT(2),
590de2362d3Smrg				ALT_CONST(0),
591de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
592de2362d3Smrg				WHOLE_QUAD_MODE(0),
593de2362d3Smrg				BARRIER(1));
594de2362d3Smrg
595de2362d3Smrg    /* 2 */
596de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
597de2362d3Smrg                                          TYPE(SQ_EXPORT_POS),
598de2362d3Smrg                                          RW_GPR(1),
599de2362d3Smrg                                          RW_REL(ABSOLUTE),
600de2362d3Smrg                                          INDEX_GPR(0),
601de2362d3Smrg                                          ELEM_SIZE(3));
602de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
603de2362d3Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
604de2362d3Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
605de2362d3Smrg                                               SRC_SEL_W(SQ_SEL_W),
606de2362d3Smrg                                               BURST_COUNT(1),
607de2362d3Smrg                                               VALID_PIXEL_MODE(0),
608de2362d3Smrg                                               END_OF_PROGRAM(0),
609de2362d3Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
610de2362d3Smrg                                               MARK(0),
611de2362d3Smrg                                               BARRIER(1));
612de2362d3Smrg    /* 3 */
613de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
614de2362d3Smrg                                          TYPE(SQ_EXPORT_PARAM),
615de2362d3Smrg                                          RW_GPR(0),
616de2362d3Smrg                                          RW_REL(ABSOLUTE),
617de2362d3Smrg                                          INDEX_GPR(0),
618de2362d3Smrg                                          ELEM_SIZE(3));
619de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
620de2362d3Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
621de2362d3Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
622de2362d3Smrg                                               SRC_SEL_W(SQ_SEL_W),
623de2362d3Smrg                                               BURST_COUNT(1),
624de2362d3Smrg                                               VALID_PIXEL_MODE(0),
625de2362d3Smrg                                               END_OF_PROGRAM(1),
626de2362d3Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
627de2362d3Smrg                                               MARK(0),
628de2362d3Smrg                                               BARRIER(0));
629de2362d3Smrg
630de2362d3Smrg
631de2362d3Smrg    /* 4 texX / w */
632de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
633de2362d3Smrg                             SRC0_REL(ABSOLUTE),
634de2362d3Smrg                             SRC0_ELEM(ELEM_X),
635de2362d3Smrg                             SRC0_NEG(0),
636de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
637de2362d3Smrg                             SRC1_REL(ABSOLUTE),
638de2362d3Smrg                             SRC1_ELEM(ELEM_X),
639de2362d3Smrg                             SRC1_NEG(0),
640de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
641de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
642de2362d3Smrg                             LAST(0));
643de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
644de2362d3Smrg                                 SRC1_ABS(0),
645de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
646de2362d3Smrg                                 UPDATE_PRED(0),
647de2362d3Smrg                                 WRITE_MASK(1),
648de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
649de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
650de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
651de2362d3Smrg                                 DST_GPR(0),
652de2362d3Smrg                                 DST_REL(ABSOLUTE),
653de2362d3Smrg                                 DST_ELEM(ELEM_X),
654de2362d3Smrg                                 CLAMP(0));
655de2362d3Smrg
656de2362d3Smrg    /* 5 texY / h */
657de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
658de2362d3Smrg                             SRC0_REL(ABSOLUTE),
659de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
660de2362d3Smrg                             SRC0_NEG(0),
661de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
662de2362d3Smrg                             SRC1_REL(ABSOLUTE),
663de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
664de2362d3Smrg                             SRC1_NEG(0),
665de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
666de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
667de2362d3Smrg                             LAST(1));
668de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
669de2362d3Smrg                                 SRC1_ABS(0),
670de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
671de2362d3Smrg                                 UPDATE_PRED(0),
672de2362d3Smrg                                 WRITE_MASK(1),
673de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
674de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
675de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
676de2362d3Smrg                                 DST_GPR(0),
677de2362d3Smrg                                 DST_REL(ABSOLUTE),
678de2362d3Smrg                                 DST_ELEM(ELEM_Y),
679de2362d3Smrg                                 CLAMP(0));
680de2362d3Smrg
681de2362d3Smrg    /* 6/7 */
682de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
683de2362d3Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
684de2362d3Smrg                             FETCH_WHOLE_QUAD(0),
685de2362d3Smrg                             BUFFER_ID(0),
686de2362d3Smrg                             SRC_GPR(0),
687de2362d3Smrg                             SRC_REL(ABSOLUTE),
688de2362d3Smrg                             SRC_SEL_X(SQ_SEL_X),
689de2362d3Smrg                             MEGA_FETCH_COUNT(16));
690de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
691de2362d3Smrg                                 DST_REL(ABSOLUTE),
692de2362d3Smrg                                 DST_SEL_X(SQ_SEL_X),
693de2362d3Smrg                                 DST_SEL_Y(SQ_SEL_Y),
694de2362d3Smrg                                 DST_SEL_Z(SQ_SEL_0),
695de2362d3Smrg                                 DST_SEL_W(SQ_SEL_1),
696de2362d3Smrg                                 USE_CONST_FIELDS(0),
697de2362d3Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
698de2362d3Smrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
699de2362d3Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
700de2362d3Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
701de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
702de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
703de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
704de2362d3Smrg#else
705de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
706de2362d3Smrg#endif
707de2362d3Smrg                             CONST_BUF_NO_STRIDE(0),
708de2362d3Smrg                             MEGA_FETCH(1),
709de2362d3Smrg			     ALT_CONST(0),
710de2362d3Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
711de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
712de2362d3Smrg    /* 8/9 */
713de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
714de2362d3Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
715de2362d3Smrg                             FETCH_WHOLE_QUAD(0),
716de2362d3Smrg                             BUFFER_ID(0),
717de2362d3Smrg                             SRC_GPR(0),
718de2362d3Smrg                             SRC_REL(ABSOLUTE),
719de2362d3Smrg                             SRC_SEL_X(SQ_SEL_X),
720de2362d3Smrg                             MEGA_FETCH_COUNT(8));
721de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
722de2362d3Smrg                                 DST_REL(ABSOLUTE),
723de2362d3Smrg                                 DST_SEL_X(SQ_SEL_X),
724de2362d3Smrg                                 DST_SEL_Y(SQ_SEL_Y),
725de2362d3Smrg                                 DST_SEL_Z(SQ_SEL_0),
726de2362d3Smrg                                 DST_SEL_W(SQ_SEL_1),
727de2362d3Smrg                                 USE_CONST_FIELDS(0),
728de2362d3Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
729de2362d3Smrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
730de2362d3Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
731de2362d3Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
732de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
733de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
734de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
735de2362d3Smrg#else
736de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
737de2362d3Smrg#endif
738de2362d3Smrg                             CONST_BUF_NO_STRIDE(0),
739de2362d3Smrg                             MEGA_FETCH(0),
740de2362d3Smrg			     ALT_CONST(0),
741de2362d3Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
742de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
743de2362d3Smrg
744de2362d3Smrg    return i;
745de2362d3Smrg}
746de2362d3Smrg
747de2362d3Smrgint evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
748de2362d3Smrg{
749de2362d3Smrg    int i = 0;
750de2362d3Smrg
751de2362d3Smrg    /* 0 */
752de2362d3Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(5),
753de2362d3Smrg				KCACHE_BANK0(0),
754de2362d3Smrg				KCACHE_BANK1(0),
755de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
756de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
757de2362d3Smrg				KCACHE_ADDR0(0),
758de2362d3Smrg				KCACHE_ADDR1(0),
759de2362d3Smrg				I_COUNT(4),
760de2362d3Smrg				ALT_CONST(0),
761de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
762de2362d3Smrg				WHOLE_QUAD_MODE(0),
763de2362d3Smrg				BARRIER(1));
764de2362d3Smrg    /* 1 */
765de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(21),
766de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
767de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
768de2362d3Smrg                            CF_CONST(0),
769de2362d3Smrg                            COND(SQ_CF_COND_BOOL),
770de2362d3Smrg                            I_COUNT(0),
771de2362d3Smrg                            VALID_PIXEL_MODE(0),
772de2362d3Smrg                            END_OF_PROGRAM(0),
773de2362d3Smrg                            CF_INST(SQ_CF_INST_CALL),
774de2362d3Smrg                            WHOLE_QUAD_MODE(0),
775de2362d3Smrg                            BARRIER(0));
776de2362d3Smrg    /* 2 */
777de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(30),
778de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
779de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
780de2362d3Smrg                            CF_CONST(0),
781de2362d3Smrg                            COND(SQ_CF_COND_NOT_BOOL),
782de2362d3Smrg                            I_COUNT(0),
783de2362d3Smrg                            VALID_PIXEL_MODE(0),
784de2362d3Smrg                            END_OF_PROGRAM(0),
785de2362d3Smrg                            CF_INST(SQ_CF_INST_CALL),
786de2362d3Smrg                            WHOLE_QUAD_MODE(0),
787de2362d3Smrg                            BARRIER(0));
788de2362d3Smrg    /* 3 */
789de2362d3Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(9),
790de2362d3Smrg                                KCACHE_BANK0(0),
791de2362d3Smrg                                KCACHE_BANK1(0),
792de2362d3Smrg                                KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
793de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
794de2362d3Smrg                                KCACHE_ADDR0(0),
795de2362d3Smrg                                KCACHE_ADDR1(0),
796de2362d3Smrg                                I_COUNT(12),
797de2362d3Smrg                                ALT_CONST(0),
798de2362d3Smrg                                CF_INST(SQ_CF_INST_ALU),
799de2362d3Smrg                                WHOLE_QUAD_MODE(0),
800de2362d3Smrg                                BARRIER(1));
801de2362d3Smrg    /* 4 */
802de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
803de2362d3Smrg                                          TYPE(SQ_EXPORT_PIXEL),
804de2362d3Smrg                                          RW_GPR(2),
805de2362d3Smrg                                          RW_REL(ABSOLUTE),
806de2362d3Smrg                                          INDEX_GPR(0),
807de2362d3Smrg                                          ELEM_SIZE(3));
808de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
809de2362d3Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
810de2362d3Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
811de2362d3Smrg                                               SRC_SEL_W(SQ_SEL_W),
812de2362d3Smrg                                               BURST_COUNT(1),
813de2362d3Smrg                                               VALID_PIXEL_MODE(0),
814de2362d3Smrg                                               END_OF_PROGRAM(1),
815de2362d3Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
816de2362d3Smrg                                               MARK(0),
817de2362d3Smrg                                               BARRIER(1));
818de2362d3Smrg    /* 5 interpolate tex coords */
819de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
820de2362d3Smrg			     SRC0_REL(ABSOLUTE),
821de2362d3Smrg			     SRC0_ELEM(ELEM_Y),
822de2362d3Smrg			     SRC0_NEG(0),
823de2362d3Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
824de2362d3Smrg			     SRC1_REL(ABSOLUTE),
825de2362d3Smrg			     SRC1_ELEM(ELEM_X),
826de2362d3Smrg			     SRC1_NEG(0),
827de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
828de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
829de2362d3Smrg			     LAST(0));
830de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
831de2362d3Smrg				 SRC1_ABS(0),
832de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
833de2362d3Smrg				 UPDATE_PRED(0),
834de2362d3Smrg				 WRITE_MASK(1),
835de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
836de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
837de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
838de2362d3Smrg				 DST_GPR(0),
839de2362d3Smrg				 DST_REL(ABSOLUTE),
840de2362d3Smrg				 DST_ELEM(ELEM_X),
841de2362d3Smrg				 CLAMP(0));
842de2362d3Smrg    /* 6 */
843de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
844de2362d3Smrg			     SRC0_REL(ABSOLUTE),
845de2362d3Smrg			     SRC0_ELEM(ELEM_X),
846de2362d3Smrg			     SRC0_NEG(0),
847de2362d3Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
848de2362d3Smrg			     SRC1_REL(ABSOLUTE),
849de2362d3Smrg			     SRC1_ELEM(ELEM_X),
850de2362d3Smrg			     SRC1_NEG(0),
851de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
852de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
853de2362d3Smrg			     LAST(0));
854de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
855de2362d3Smrg				 SRC1_ABS(0),
856de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
857de2362d3Smrg				 UPDATE_PRED(0),
858de2362d3Smrg				 WRITE_MASK(1),
859de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
860de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
861de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
862de2362d3Smrg				 DST_GPR(0),
863de2362d3Smrg				 DST_REL(ABSOLUTE),
864de2362d3Smrg				 DST_ELEM(ELEM_Y),
865de2362d3Smrg				 CLAMP(0));
866de2362d3Smrg    /* 7 */
867de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
868de2362d3Smrg			     SRC0_REL(ABSOLUTE),
869de2362d3Smrg			     SRC0_ELEM(ELEM_Y),
870de2362d3Smrg			     SRC0_NEG(0),
871de2362d3Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
872de2362d3Smrg			     SRC1_REL(ABSOLUTE),
873de2362d3Smrg			     SRC1_ELEM(ELEM_X),
874de2362d3Smrg			     SRC1_NEG(0),
875de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
876de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
877de2362d3Smrg			     LAST(0));
878de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
879de2362d3Smrg				 SRC1_ABS(0),
880de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
881de2362d3Smrg				 UPDATE_PRED(0),
882de2362d3Smrg				 WRITE_MASK(0),
883de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
884de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
885de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
886de2362d3Smrg				 DST_GPR(0),
887de2362d3Smrg				 DST_REL(ABSOLUTE),
888de2362d3Smrg				 DST_ELEM(ELEM_Z),
889de2362d3Smrg				 CLAMP(0));
890de2362d3Smrg    /* 8 */
891de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
892de2362d3Smrg			     SRC0_REL(ABSOLUTE),
893de2362d3Smrg			     SRC0_ELEM(ELEM_X),
894de2362d3Smrg			     SRC0_NEG(0),
895de2362d3Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
896de2362d3Smrg			     SRC1_REL(ABSOLUTE),
897de2362d3Smrg			     SRC1_ELEM(ELEM_X),
898de2362d3Smrg			     SRC1_NEG(0),
899de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
900de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
901de2362d3Smrg			     LAST(1));
902de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
903de2362d3Smrg				 SRC1_ABS(0),
904de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
905de2362d3Smrg				 UPDATE_PRED(0),
906de2362d3Smrg				 WRITE_MASK(0),
907de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
908de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
909de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
910de2362d3Smrg				 DST_GPR(0),
911de2362d3Smrg				 DST_REL(ABSOLUTE),
912de2362d3Smrg				 DST_ELEM(ELEM_W),
913de2362d3Smrg				 CLAMP(0));
914de2362d3Smrg
915de2362d3Smrg    /* 9,10,11,12 */
916de2362d3Smrg    /* r2.x = MAD(c0.w, r1.x, c0.x) */
917de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
918de2362d3Smrg                             SRC0_REL(ABSOLUTE),
919de2362d3Smrg                             SRC0_ELEM(ELEM_W),
920de2362d3Smrg                             SRC0_NEG(0),
921de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
922de2362d3Smrg                             SRC1_REL(ABSOLUTE),
923de2362d3Smrg                             SRC1_ELEM(ELEM_X),
924de2362d3Smrg                             SRC1_NEG(0),
925de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
926de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
927de2362d3Smrg                             LAST(0));
928de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
929de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
930de2362d3Smrg                                 SRC2_ELEM(ELEM_X),
931de2362d3Smrg                                 SRC2_NEG(0),
932de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
933de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
934de2362d3Smrg                                 DST_GPR(2),
935de2362d3Smrg                                 DST_REL(ABSOLUTE),
936de2362d3Smrg                                 DST_ELEM(ELEM_X),
937de2362d3Smrg                                 CLAMP(0));
938de2362d3Smrg    /* r2.y = MAD(c0.w, r1.x, c0.y) */
939de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
940de2362d3Smrg                             SRC0_REL(ABSOLUTE),
941de2362d3Smrg                             SRC0_ELEM(ELEM_W),
942de2362d3Smrg                             SRC0_NEG(0),
943de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
944de2362d3Smrg                             SRC1_REL(ABSOLUTE),
945de2362d3Smrg                             SRC1_ELEM(ELEM_X),
946de2362d3Smrg                             SRC1_NEG(0),
947de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
948de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
949de2362d3Smrg                             LAST(0));
950de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
951de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
952de2362d3Smrg                                 SRC2_ELEM(ELEM_Y),
953de2362d3Smrg                                 SRC2_NEG(0),
954de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
955de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
956de2362d3Smrg                                 DST_GPR(2),
957de2362d3Smrg                                 DST_REL(ABSOLUTE),
958de2362d3Smrg                                 DST_ELEM(ELEM_Y),
959de2362d3Smrg                                 CLAMP(0));
960de2362d3Smrg    /* r2.z = MAD(c0.w, r1.x, c0.z) */
961de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
962de2362d3Smrg                             SRC0_REL(ABSOLUTE),
963de2362d3Smrg                             SRC0_ELEM(ELEM_W),
964de2362d3Smrg                             SRC0_NEG(0),
965de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
966de2362d3Smrg                             SRC1_REL(ABSOLUTE),
967de2362d3Smrg                             SRC1_ELEM(ELEM_X),
968de2362d3Smrg                             SRC1_NEG(0),
969de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
970de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
971de2362d3Smrg                             LAST(0));
972de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
973de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
974de2362d3Smrg                                 SRC2_ELEM(ELEM_Z),
975de2362d3Smrg                                 SRC2_NEG(0),
976de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
977de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
978de2362d3Smrg                                 DST_GPR(2),
979de2362d3Smrg                                 DST_REL(ABSOLUTE),
980de2362d3Smrg                                 DST_ELEM(ELEM_Z),
981de2362d3Smrg                                 CLAMP(0));
982de2362d3Smrg    /* r2.w = MAD(0, 0, 1) */
983de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
984de2362d3Smrg                             SRC0_REL(ABSOLUTE),
985de2362d3Smrg                             SRC0_ELEM(ELEM_X),
986de2362d3Smrg                             SRC0_NEG(0),
987de2362d3Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
988de2362d3Smrg                             SRC1_REL(ABSOLUTE),
989de2362d3Smrg                             SRC1_ELEM(ELEM_X),
990de2362d3Smrg                             SRC1_NEG(0),
991de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
992de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
993de2362d3Smrg                             LAST(1));
994de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
995de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
996de2362d3Smrg                                 SRC2_ELEM(ELEM_X),
997de2362d3Smrg                                 SRC2_NEG(0),
998de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
999de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1000de2362d3Smrg                                 DST_GPR(2),
1001de2362d3Smrg                                 DST_REL(ABSOLUTE),
1002de2362d3Smrg                                 DST_ELEM(ELEM_W),
1003de2362d3Smrg                                 CLAMP(0));
1004de2362d3Smrg
1005de2362d3Smrg    /* 13,14,15,16 */
1006de2362d3Smrg    /* r2.x = MAD(c1.x, r1.y, pv.x) */
1007de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1008de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1009de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1010de2362d3Smrg                             SRC0_NEG(0),
1011de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1012de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1013de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
1014de2362d3Smrg                             SRC1_NEG(0),
1015de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1016de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1017de2362d3Smrg                             LAST(0));
1018de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1019de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
1020de2362d3Smrg                                 SRC2_ELEM(ELEM_X),
1021de2362d3Smrg                                 SRC2_NEG(0),
1022de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1023de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1024de2362d3Smrg                                 DST_GPR(2),
1025de2362d3Smrg                                 DST_REL(ABSOLUTE),
1026de2362d3Smrg                                 DST_ELEM(ELEM_X),
1027de2362d3Smrg                                 CLAMP(0));
1028de2362d3Smrg    /* r2.y = MAD(c1.y, r1.y, pv.y) */
1029de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1030de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1031de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1032de2362d3Smrg                             SRC0_NEG(0),
1033de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1034de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1035de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
1036de2362d3Smrg                             SRC1_NEG(0),
1037de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1038de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1039de2362d3Smrg                             LAST(0));
1040de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1041de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
1042de2362d3Smrg                                 SRC2_ELEM(ELEM_Y),
1043de2362d3Smrg                                 SRC2_NEG(0),
1044de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1045de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1046de2362d3Smrg                                 DST_GPR(2),
1047de2362d3Smrg                                 DST_REL(ABSOLUTE),
1048de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1049de2362d3Smrg                                 CLAMP(0));
1050de2362d3Smrg    /* r2.z = MAD(c1.z, r1.y, pv.z) */
1051de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1052de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1053de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
1054de2362d3Smrg                             SRC0_NEG(0),
1055de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1056de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1057de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
1058de2362d3Smrg                             SRC1_NEG(0),
1059de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1060de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1061de2362d3Smrg                             LAST(0));
1062de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1063de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
1064de2362d3Smrg                                 SRC2_ELEM(ELEM_Z),
1065de2362d3Smrg                                 SRC2_NEG(0),
1066de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1067de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1068de2362d3Smrg                                 DST_GPR(2),
1069de2362d3Smrg                                 DST_REL(ABSOLUTE),
1070de2362d3Smrg                                 DST_ELEM(ELEM_Z),
1071de2362d3Smrg                                 CLAMP(0));
1072de2362d3Smrg    /* r2.w = MAD(0, 0, 1) */
1073de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1074de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1075de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1076de2362d3Smrg                             SRC0_NEG(0),
1077de2362d3Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
1078de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1079de2362d3Smrg                             SRC1_ELEM(ELEM_X),
1080de2362d3Smrg                             SRC1_NEG(0),
1081de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1082de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1083de2362d3Smrg                             LAST(1));
1084de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1085de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
1086de2362d3Smrg                                 SRC2_ELEM(ELEM_W),
1087de2362d3Smrg                                 SRC2_NEG(0),
1088de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1089de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1090de2362d3Smrg                                 DST_GPR(2),
1091de2362d3Smrg                                 DST_REL(ABSOLUTE),
1092de2362d3Smrg                                 DST_ELEM(ELEM_W),
1093de2362d3Smrg                                 CLAMP(0));
1094de2362d3Smrg    /* 17,18,19,20 */
1095de2362d3Smrg    /* r2.x = MAD(c2.x, r1.z, pv.x) */
1096de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1097de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1098de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1099de2362d3Smrg                             SRC0_NEG(0),
1100de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1101de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1102de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
1103de2362d3Smrg                             SRC1_NEG(0),
1104de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1105de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1106de2362d3Smrg                             LAST(0));
1107de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1108de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
1109de2362d3Smrg                                 SRC2_ELEM(ELEM_X),
1110de2362d3Smrg                                 SRC2_NEG(0),
1111de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1112de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1113de2362d3Smrg                                 DST_GPR(2),
1114de2362d3Smrg                                 DST_REL(ABSOLUTE),
1115de2362d3Smrg                                 DST_ELEM(ELEM_X),
1116de2362d3Smrg                                 CLAMP(1));
1117de2362d3Smrg    /* r2.y = MAD(c2.y, r1.z, pv.y) */
1118de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1119de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1120de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1121de2362d3Smrg                             SRC0_NEG(0),
1122de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1123de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1124de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
1125de2362d3Smrg                             SRC1_NEG(0),
1126de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1127de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1128de2362d3Smrg                             LAST(0));
1129de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1130de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
1131de2362d3Smrg                                 SRC2_ELEM(ELEM_Y),
1132de2362d3Smrg                                 SRC2_NEG(0),
1133de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1134de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1135de2362d3Smrg                                 DST_GPR(2),
1136de2362d3Smrg                                 DST_REL(ABSOLUTE),
1137de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1138de2362d3Smrg                                 CLAMP(1));
1139de2362d3Smrg    /* r2.z = MAD(c2.z, r1.z, pv.z) */
1140de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1141de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1142de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
1143de2362d3Smrg                             SRC0_NEG(0),
1144de2362d3Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1145de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1146de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
1147de2362d3Smrg                             SRC1_NEG(0),
1148de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1149de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1150de2362d3Smrg                             LAST(0));
1151de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1152de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
1153de2362d3Smrg                                 SRC2_ELEM(ELEM_Z),
1154de2362d3Smrg                                 SRC2_NEG(0),
1155de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1156de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1157de2362d3Smrg                                 DST_GPR(2),
1158de2362d3Smrg                                 DST_REL(ABSOLUTE),
1159de2362d3Smrg                                 DST_ELEM(ELEM_Z),
1160de2362d3Smrg                                 CLAMP(1));
1161de2362d3Smrg    /* r2.w = MAD(0, 0, 1) */
1162de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1163de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1164de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1165de2362d3Smrg                             SRC0_NEG(0),
1166de2362d3Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
1167de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1168de2362d3Smrg                             SRC1_ELEM(ELEM_X),
1169de2362d3Smrg                             SRC1_NEG(0),
1170de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1171de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1172de2362d3Smrg                             LAST(1));
1173de2362d3Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1174de2362d3Smrg                                 SRC2_REL(ABSOLUTE),
1175de2362d3Smrg                                 SRC2_ELEM(ELEM_X),
1176de2362d3Smrg                                 SRC2_NEG(0),
1177de2362d3Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1178de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1179de2362d3Smrg                                 DST_GPR(2),
1180de2362d3Smrg                                 DST_REL(ABSOLUTE),
1181de2362d3Smrg                                 DST_ELEM(ELEM_W),
1182de2362d3Smrg                                 CLAMP(1));
1183de2362d3Smrg
1184de2362d3Smrg    /* 21 */
1185de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(24),
1186de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1187de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1188de2362d3Smrg                            CF_CONST(0),
1189de2362d3Smrg                            COND(SQ_CF_COND_ACTIVE),
1190de2362d3Smrg                            I_COUNT(3),
1191de2362d3Smrg                            VALID_PIXEL_MODE(0),
1192de2362d3Smrg                            END_OF_PROGRAM(0),
1193de2362d3Smrg                            CF_INST(SQ_CF_INST_TC),
1194de2362d3Smrg                            WHOLE_QUAD_MODE(0),
1195de2362d3Smrg                            BARRIER(1));
1196de2362d3Smrg    /* 22 */
1197de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1198de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1199de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1200de2362d3Smrg			    CF_CONST(0),
1201de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
1202de2362d3Smrg			    I_COUNT(0),
1203de2362d3Smrg			    VALID_PIXEL_MODE(0),
1204de2362d3Smrg			    END_OF_PROGRAM(0),
1205de2362d3Smrg			    CF_INST(SQ_CF_INST_RETURN),
1206de2362d3Smrg			    WHOLE_QUAD_MODE(0),
1207de2362d3Smrg			    BARRIER(1));
1208de2362d3Smrg    /* 23 */
1209de2362d3Smrg    shader[i++] = 0x00000000;
1210de2362d3Smrg    shader[i++] = 0x00000000;
1211de2362d3Smrg    /* 24/25 */
1212de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1213de2362d3Smrg                             INST_MOD(0),
1214de2362d3Smrg                             FETCH_WHOLE_QUAD(0),
1215de2362d3Smrg                             RESOURCE_ID(0),
1216de2362d3Smrg                             SRC_GPR(0),
1217de2362d3Smrg                             SRC_REL(ABSOLUTE),
1218de2362d3Smrg                             ALT_CONST(0),
1219de2362d3Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1220de2362d3Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1221de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1222de2362d3Smrg                             DST_REL(ABSOLUTE),
1223de2362d3Smrg                             DST_SEL_X(SQ_SEL_X),
1224de2362d3Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1225de2362d3Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1226de2362d3Smrg                             DST_SEL_W(SQ_SEL_1),
1227de2362d3Smrg                             LOD_BIAS(0),
1228de2362d3Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1229de2362d3Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1230de2362d3Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1231de2362d3Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1232de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1233de2362d3Smrg                             OFFSET_Y(0),
1234de2362d3Smrg                             OFFSET_Z(0),
1235de2362d3Smrg                             SAMPLER_ID(0),
1236de2362d3Smrg                             SRC_SEL_X(SQ_SEL_X),
1237de2362d3Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1238de2362d3Smrg                             SRC_SEL_Z(SQ_SEL_0),
1239de2362d3Smrg                             SRC_SEL_W(SQ_SEL_1));
1240de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
1241de2362d3Smrg    /* 26/27 */
1242de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1243de2362d3Smrg                             INST_MOD(0),
1244de2362d3Smrg                             FETCH_WHOLE_QUAD(0),
1245de2362d3Smrg                             RESOURCE_ID(1),
1246de2362d3Smrg                             SRC_GPR(0),
1247de2362d3Smrg                             SRC_REL(ABSOLUTE),
1248de2362d3Smrg                             ALT_CONST(0),
1249de2362d3Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1250de2362d3Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1251de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1252de2362d3Smrg                             DST_REL(ABSOLUTE),
1253de2362d3Smrg                             DST_SEL_X(SQ_SEL_MASK),
1254de2362d3Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1255de2362d3Smrg                             DST_SEL_Z(SQ_SEL_X),
1256de2362d3Smrg                             DST_SEL_W(SQ_SEL_MASK),
1257de2362d3Smrg                             LOD_BIAS(0),
1258de2362d3Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1259de2362d3Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1260de2362d3Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1261de2362d3Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1262de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1263de2362d3Smrg                             OFFSET_Y(0),
1264de2362d3Smrg                             OFFSET_Z(0),
1265de2362d3Smrg                             SAMPLER_ID(1),
1266de2362d3Smrg                             SRC_SEL_X(SQ_SEL_X),
1267de2362d3Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1268de2362d3Smrg                             SRC_SEL_Z(SQ_SEL_0),
1269de2362d3Smrg                             SRC_SEL_W(SQ_SEL_1));
1270de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
1271de2362d3Smrg    /* 28/29 */
1272de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1273de2362d3Smrg                             INST_MOD(0),
1274de2362d3Smrg                             FETCH_WHOLE_QUAD(0),
1275de2362d3Smrg                             RESOURCE_ID(2),
1276de2362d3Smrg                             SRC_GPR(0),
1277de2362d3Smrg                             SRC_REL(ABSOLUTE),
1278de2362d3Smrg                             ALT_CONST(0),
1279de2362d3Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1280de2362d3Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1281de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1282de2362d3Smrg                             DST_REL(ABSOLUTE),
1283de2362d3Smrg                             DST_SEL_X(SQ_SEL_MASK),
1284de2362d3Smrg                             DST_SEL_Y(SQ_SEL_X),
1285de2362d3Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1286de2362d3Smrg                             DST_SEL_W(SQ_SEL_MASK),
1287de2362d3Smrg                             LOD_BIAS(0),
1288de2362d3Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1289de2362d3Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1290de2362d3Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1291de2362d3Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1292de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1293de2362d3Smrg                             OFFSET_Y(0),
1294de2362d3Smrg                             OFFSET_Z(0),
1295de2362d3Smrg                             SAMPLER_ID(2),
1296de2362d3Smrg                             SRC_SEL_X(SQ_SEL_X),
1297de2362d3Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1298de2362d3Smrg                             SRC_SEL_Z(SQ_SEL_0),
1299de2362d3Smrg                             SRC_SEL_W(SQ_SEL_1));
1300de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
1301de2362d3Smrg    /* 30 */
1302de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(32),
1303de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1304de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1305de2362d3Smrg                            CF_CONST(0),
1306de2362d3Smrg                            COND(SQ_CF_COND_ACTIVE),
1307de2362d3Smrg                            I_COUNT(1),
1308de2362d3Smrg                            VALID_PIXEL_MODE(0),
1309de2362d3Smrg                            END_OF_PROGRAM(0),
1310de2362d3Smrg                            CF_INST(SQ_CF_INST_TC),
1311de2362d3Smrg                            WHOLE_QUAD_MODE(0),
1312de2362d3Smrg                            BARRIER(1));
1313de2362d3Smrg    /* 31 */
1314de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1315de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1316de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1317de2362d3Smrg			    CF_CONST(0),
1318de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
1319de2362d3Smrg			    I_COUNT(0),
1320de2362d3Smrg			    VALID_PIXEL_MODE(0),
1321de2362d3Smrg			    END_OF_PROGRAM(0),
1322de2362d3Smrg			    CF_INST(SQ_CF_INST_RETURN),
1323de2362d3Smrg			    WHOLE_QUAD_MODE(0),
1324de2362d3Smrg			    BARRIER(1));
1325de2362d3Smrg    /* 32/33 */
1326de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1327de2362d3Smrg                             INST_MOD(0),
1328de2362d3Smrg                             FETCH_WHOLE_QUAD(0),
1329de2362d3Smrg                             RESOURCE_ID(0),
1330de2362d3Smrg                             SRC_GPR(0),
1331de2362d3Smrg                             SRC_REL(ABSOLUTE),
1332de2362d3Smrg                             ALT_CONST(0),
1333de2362d3Smrg                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1334de2362d3Smrg                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1335de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1336de2362d3Smrg                             DST_REL(ABSOLUTE),
1337de2362d3Smrg                             DST_SEL_X(SQ_SEL_X),
1338de2362d3Smrg                             DST_SEL_Y(SQ_SEL_Y),
1339de2362d3Smrg                             DST_SEL_Z(SQ_SEL_Z),
1340de2362d3Smrg                             DST_SEL_W(SQ_SEL_1),
1341de2362d3Smrg                             LOD_BIAS(0),
1342de2362d3Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1343de2362d3Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1344de2362d3Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1345de2362d3Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1346de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1347de2362d3Smrg                             OFFSET_Y(0),
1348de2362d3Smrg                             OFFSET_Z(0),
1349de2362d3Smrg                             SAMPLER_ID(0),
1350de2362d3Smrg                             SRC_SEL_X(SQ_SEL_X),
1351de2362d3Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1352de2362d3Smrg                             SRC_SEL_Z(SQ_SEL_0),
1353de2362d3Smrg                             SRC_SEL_W(SQ_SEL_1));
1354de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
1355de2362d3Smrg
1356de2362d3Smrg    return i;
1357de2362d3Smrg}
1358de2362d3Smrg
1359de2362d3Smrg/* comp vs --------------------------------------- */
1360de2362d3Smrgint evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1361de2362d3Smrg{
1362de2362d3Smrg    int i = 0;
1363de2362d3Smrg
1364de2362d3Smrg    /* 0 */
1365de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(3),
1366de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1367de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1368de2362d3Smrg                            CF_CONST(0),
1369de2362d3Smrg                            COND(SQ_CF_COND_BOOL),
1370de2362d3Smrg                            I_COUNT(0),
1371de2362d3Smrg                            VALID_PIXEL_MODE(0),
1372de2362d3Smrg                            END_OF_PROGRAM(0),
1373de2362d3Smrg                            CF_INST(SQ_CF_INST_CALL),
1374de2362d3Smrg                            WHOLE_QUAD_MODE(0),
1375de2362d3Smrg                            BARRIER(0));
1376de2362d3Smrg    /* 1 */
1377de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(9),
1378de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1379de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1380de2362d3Smrg                            CF_CONST(0),
1381de2362d3Smrg                            COND(SQ_CF_COND_NOT_BOOL),
1382de2362d3Smrg                            I_COUNT(0),
1383de2362d3Smrg                            VALID_PIXEL_MODE(0),
1384de2362d3Smrg                            END_OF_PROGRAM(0),
1385de2362d3Smrg                            CF_INST(SQ_CF_INST_CALL),
1386de2362d3Smrg                            WHOLE_QUAD_MODE(0),
1387de2362d3Smrg                            BARRIER(0));
1388de2362d3Smrg    /* 2 */
1389de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1390de2362d3Smrg                            JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1391de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1392de2362d3Smrg                            CF_CONST(0),
1393de2362d3Smrg                            COND(SQ_CF_COND_ACTIVE),
1394de2362d3Smrg                            I_COUNT(0),
1395de2362d3Smrg                            VALID_PIXEL_MODE(0),
1396de2362d3Smrg                            END_OF_PROGRAM(1),
1397de2362d3Smrg                            CF_INST(SQ_CF_INST_NOP),
1398de2362d3Smrg                            WHOLE_QUAD_MODE(0),
1399de2362d3Smrg                            BARRIER(1));
1400de2362d3Smrg    /* 3 - mask sub */
1401de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(44),
1402de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1403de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1404de2362d3Smrg			    CF_CONST(0),
1405de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
1406de2362d3Smrg			    I_COUNT(3),
1407de2362d3Smrg			    VALID_PIXEL_MODE(0),
1408de2362d3Smrg			    END_OF_PROGRAM(0),
1409de2362d3Smrg			    CF_INST(SQ_CF_INST_VC),
1410de2362d3Smrg			    WHOLE_QUAD_MODE(0),
1411de2362d3Smrg			    BARRIER(1));
1412de2362d3Smrg
1413de2362d3Smrg    /* 4 - ALU */
1414de2362d3Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1415de2362d3Smrg				KCACHE_BANK0(0),
1416de2362d3Smrg				KCACHE_BANK1(0),
1417de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1418de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1419de2362d3Smrg				KCACHE_ADDR0(0),
1420de2362d3Smrg				KCACHE_ADDR1(0),
1421de2362d3Smrg				I_COUNT(20),
1422de2362d3Smrg				ALT_CONST(0),
1423de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
1424de2362d3Smrg				WHOLE_QUAD_MODE(0),
1425de2362d3Smrg				BARRIER(1));
1426de2362d3Smrg
1427de2362d3Smrg    /* 5 - dst */
1428de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1429de2362d3Smrg					  TYPE(SQ_EXPORT_POS),
1430de2362d3Smrg					  RW_GPR(2),
1431de2362d3Smrg					  RW_REL(ABSOLUTE),
1432de2362d3Smrg					  INDEX_GPR(0),
1433de2362d3Smrg					  ELEM_SIZE(0));
1434de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1435de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1436de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_0),
1437de2362d3Smrg					       SRC_SEL_W(SQ_SEL_1),
1438de2362d3Smrg					       BURST_COUNT(1),
1439de2362d3Smrg					       VALID_PIXEL_MODE(0),
1440de2362d3Smrg					       END_OF_PROGRAM(0),
1441de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1442de2362d3Smrg					       MARK(0),
1443de2362d3Smrg					       BARRIER(1));
1444de2362d3Smrg    /* 6 - src */
1445de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1446de2362d3Smrg					  TYPE(SQ_EXPORT_PARAM),
1447de2362d3Smrg					  RW_GPR(1),
1448de2362d3Smrg					  RW_REL(ABSOLUTE),
1449de2362d3Smrg					  INDEX_GPR(0),
1450de2362d3Smrg					  ELEM_SIZE(0));
1451de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1452de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1453de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_0),
1454de2362d3Smrg					       SRC_SEL_W(SQ_SEL_1),
1455de2362d3Smrg					       BURST_COUNT(1),
1456de2362d3Smrg					       VALID_PIXEL_MODE(0),
1457de2362d3Smrg					       END_OF_PROGRAM(0),
1458de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT),
1459de2362d3Smrg					       MARK(0),
1460de2362d3Smrg					       BARRIER(0));
1461de2362d3Smrg    /* 7 - mask */
1462de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1463de2362d3Smrg					  TYPE(SQ_EXPORT_PARAM),
1464de2362d3Smrg					  RW_GPR(0),
1465de2362d3Smrg					  RW_REL(ABSOLUTE),
1466de2362d3Smrg					  INDEX_GPR(0),
1467de2362d3Smrg					  ELEM_SIZE(0));
1468de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1469de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1470de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_0),
1471de2362d3Smrg					       SRC_SEL_W(SQ_SEL_1),
1472de2362d3Smrg					       BURST_COUNT(1),
1473de2362d3Smrg					       VALID_PIXEL_MODE(0),
1474de2362d3Smrg					       END_OF_PROGRAM(0),
1475de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1476de2362d3Smrg					       WHOLE_QUAD_MODE(0),
1477de2362d3Smrg					       BARRIER(0));
1478de2362d3Smrg    /* 8 */
1479de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1480de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1481de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1482de2362d3Smrg			    CF_CONST(0),
1483de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
1484de2362d3Smrg			    I_COUNT(0),
1485de2362d3Smrg			    VALID_PIXEL_MODE(0),
1486de2362d3Smrg			    END_OF_PROGRAM(0),
1487de2362d3Smrg			    CF_INST(SQ_CF_INST_RETURN),
1488de2362d3Smrg			    WHOLE_QUAD_MODE(0),
1489de2362d3Smrg			    BARRIER(1));
1490de2362d3Smrg    /* 9 - non-mask sub */
1491de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(50),
1492de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1493de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1494de2362d3Smrg			    CF_CONST(0),
1495de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
1496de2362d3Smrg			    I_COUNT(2),
1497de2362d3Smrg			    VALID_PIXEL_MODE(0),
1498de2362d3Smrg			    END_OF_PROGRAM(0),
1499de2362d3Smrg			    CF_INST(SQ_CF_INST_VC),
1500de2362d3Smrg			    WHOLE_QUAD_MODE(0),
1501de2362d3Smrg			    BARRIER(1));
1502de2362d3Smrg
1503de2362d3Smrg    /* 10 - ALU */
1504de2362d3Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(34),
1505de2362d3Smrg				KCACHE_BANK0(0),
1506de2362d3Smrg				KCACHE_BANK1(0),
1507de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1508de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1509de2362d3Smrg				KCACHE_ADDR0(0),
1510de2362d3Smrg				KCACHE_ADDR1(0),
1511de2362d3Smrg				I_COUNT(10),
1512de2362d3Smrg				ALT_CONST(0),
1513de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
1514de2362d3Smrg				WHOLE_QUAD_MODE(0),
1515de2362d3Smrg				BARRIER(1));
1516de2362d3Smrg
1517de2362d3Smrg    /* 11 - dst */
1518de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1519de2362d3Smrg					  TYPE(SQ_EXPORT_POS),
1520de2362d3Smrg					  RW_GPR(1),
1521de2362d3Smrg					  RW_REL(ABSOLUTE),
1522de2362d3Smrg					  INDEX_GPR(0),
1523de2362d3Smrg					  ELEM_SIZE(0));
1524de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1525de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1526de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_0),
1527de2362d3Smrg					       SRC_SEL_W(SQ_SEL_1),
1528de2362d3Smrg					       BURST_COUNT(0),
1529de2362d3Smrg					       VALID_PIXEL_MODE(0),
1530de2362d3Smrg					       END_OF_PROGRAM(0),
1531de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1532de2362d3Smrg					       MARK(0),
1533de2362d3Smrg					       BARRIER(1));
1534de2362d3Smrg    /* 12 - src */
1535de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1536de2362d3Smrg					  TYPE(SQ_EXPORT_PARAM),
1537de2362d3Smrg					  RW_GPR(0),
1538de2362d3Smrg					  RW_REL(ABSOLUTE),
1539de2362d3Smrg					  INDEX_GPR(0),
1540de2362d3Smrg					  ELEM_SIZE(0));
1541de2362d3Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1542de2362d3Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1543de2362d3Smrg					       SRC_SEL_Z(SQ_SEL_0),
1544de2362d3Smrg					       SRC_SEL_W(SQ_SEL_1),
1545de2362d3Smrg					       BURST_COUNT(0),
1546de2362d3Smrg					       VALID_PIXEL_MODE(0),
1547de2362d3Smrg					       END_OF_PROGRAM(0),
1548de2362d3Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1549de2362d3Smrg					       MARK(0),
1550de2362d3Smrg					       BARRIER(0));
1551de2362d3Smrg    /* 13 */
1552de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1553de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1554de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1555de2362d3Smrg			    CF_CONST(0),
1556de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
1557de2362d3Smrg			    I_COUNT(0),
1558de2362d3Smrg			    VALID_PIXEL_MODE(0),
1559de2362d3Smrg			    END_OF_PROGRAM(0),
1560de2362d3Smrg			    CF_INST(SQ_CF_INST_RETURN),
1561de2362d3Smrg			    WHOLE_QUAD_MODE(0),
1562de2362d3Smrg			    BARRIER(1));
1563de2362d3Smrg
1564de2362d3Smrg    /* 14 srcX.x DOT4 - mask */
1565de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1566de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1567de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1568de2362d3Smrg                             SRC0_NEG(0),
1569de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1570de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1571de2362d3Smrg                             SRC1_ELEM(ELEM_X),
1572de2362d3Smrg                             SRC1_NEG(0),
1573de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1574de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1575de2362d3Smrg                             LAST(0));
1576de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1577de2362d3Smrg                                 SRC1_ABS(0),
1578de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1579de2362d3Smrg                                 UPDATE_PRED(0),
1580de2362d3Smrg                                 WRITE_MASK(1),
1581de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1582de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1583de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1584de2362d3Smrg                                 DST_GPR(3),
1585de2362d3Smrg                                 DST_REL(ABSOLUTE),
1586de2362d3Smrg                                 DST_ELEM(ELEM_X),
1587de2362d3Smrg                                 CLAMP(0));
1588de2362d3Smrg
1589de2362d3Smrg    /* 15 srcX.y DOT4 - mask */
1590de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1591de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1592de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1593de2362d3Smrg                             SRC0_NEG(0),
1594de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1595de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1596de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
1597de2362d3Smrg                             SRC1_NEG(0),
1598de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1599de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1600de2362d3Smrg                             LAST(0));
1601de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1602de2362d3Smrg                                 SRC1_ABS(0),
1603de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1604de2362d3Smrg                                 UPDATE_PRED(0),
1605de2362d3Smrg                                 WRITE_MASK(0),
1606de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1607de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1608de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1609de2362d3Smrg                                 DST_GPR(3),
1610de2362d3Smrg                                 DST_REL(ABSOLUTE),
1611de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1612de2362d3Smrg                                 CLAMP(0));
1613de2362d3Smrg
1614de2362d3Smrg    /* 16 srcX.z DOT4 - mask */
1615de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1616de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1617de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
1618de2362d3Smrg                             SRC0_NEG(0),
1619de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1620de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1621de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
1622de2362d3Smrg                             SRC1_NEG(0),
1623de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1624de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1625de2362d3Smrg                             LAST(0));
1626de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1627de2362d3Smrg                                 SRC1_ABS(0),
1628de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1629de2362d3Smrg                                 UPDATE_PRED(0),
1630de2362d3Smrg                                 WRITE_MASK(0),
1631de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1632de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1633de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1634de2362d3Smrg                                 DST_GPR(3),
1635de2362d3Smrg                                 DST_REL(ABSOLUTE),
1636de2362d3Smrg                                 DST_ELEM(ELEM_Z),
1637de2362d3Smrg                                 CLAMP(0));
1638de2362d3Smrg
1639de2362d3Smrg    /* 17 srcX.w DOT4 - mask */
1640de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1641de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1642de2362d3Smrg                             SRC0_ELEM(ELEM_W),
1643de2362d3Smrg                             SRC0_NEG(0),
1644de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1645de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1646de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1647de2362d3Smrg                             SRC1_NEG(0),
1648de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1649de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1650de2362d3Smrg                             LAST(1));
1651de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1652de2362d3Smrg                                 SRC1_ABS(0),
1653de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1654de2362d3Smrg                                 UPDATE_PRED(0),
1655de2362d3Smrg                                 WRITE_MASK(0),
1656de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1657de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1658de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1659de2362d3Smrg                                 DST_GPR(3),
1660de2362d3Smrg                                 DST_REL(ABSOLUTE),
1661de2362d3Smrg                                 DST_ELEM(ELEM_W),
1662de2362d3Smrg                                 CLAMP(0));
1663de2362d3Smrg
1664de2362d3Smrg    /* 18 srcY.x DOT4 - mask */
1665de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1666de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1667de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1668de2362d3Smrg                             SRC0_NEG(0),
1669de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1670de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1671de2362d3Smrg                             SRC1_ELEM(ELEM_X),
1672de2362d3Smrg                             SRC1_NEG(0),
1673de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1674de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1675de2362d3Smrg                             LAST(0));
1676de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1677de2362d3Smrg                                 SRC1_ABS(0),
1678de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1679de2362d3Smrg                                 UPDATE_PRED(0),
1680de2362d3Smrg                                 WRITE_MASK(0),
1681de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1682de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1683de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1684de2362d3Smrg                                 DST_GPR(3),
1685de2362d3Smrg                                 DST_REL(ABSOLUTE),
1686de2362d3Smrg                                 DST_ELEM(ELEM_X),
1687de2362d3Smrg                                 CLAMP(0));
1688de2362d3Smrg
1689de2362d3Smrg    /* 19 srcY.y DOT4 - mask */
1690de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1691de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1692de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1693de2362d3Smrg                             SRC0_NEG(0),
1694de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1695de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1696de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
1697de2362d3Smrg                             SRC1_NEG(0),
1698de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1699de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1700de2362d3Smrg                             LAST(0));
1701de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1702de2362d3Smrg                                 SRC1_ABS(0),
1703de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1704de2362d3Smrg                                 UPDATE_PRED(0),
1705de2362d3Smrg                                 WRITE_MASK(1),
1706de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1707de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1708de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1709de2362d3Smrg                                 DST_GPR(3),
1710de2362d3Smrg                                 DST_REL(ABSOLUTE),
1711de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1712de2362d3Smrg                                 CLAMP(0));
1713de2362d3Smrg
1714de2362d3Smrg    /* 20 srcY.z DOT4 - mask */
1715de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1716de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1717de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
1718de2362d3Smrg                             SRC0_NEG(0),
1719de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1720de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1721de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
1722de2362d3Smrg                             SRC1_NEG(0),
1723de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1724de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1725de2362d3Smrg                             LAST(0));
1726de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1727de2362d3Smrg                                 SRC1_ABS(0),
1728de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1729de2362d3Smrg                                 UPDATE_PRED(0),
1730de2362d3Smrg                                 WRITE_MASK(0),
1731de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1732de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1733de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1734de2362d3Smrg                                 DST_GPR(3),
1735de2362d3Smrg                                 DST_REL(ABSOLUTE),
1736de2362d3Smrg                                 DST_ELEM(ELEM_Z),
1737de2362d3Smrg                                 CLAMP(0));
1738de2362d3Smrg
1739de2362d3Smrg    /* 21 srcY.w DOT4 - mask */
1740de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1741de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1742de2362d3Smrg                             SRC0_ELEM(ELEM_W),
1743de2362d3Smrg                             SRC0_NEG(0),
1744de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1745de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1746de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1747de2362d3Smrg                             SRC1_NEG(0),
1748de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1749de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1750de2362d3Smrg                             LAST(1));
1751de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1752de2362d3Smrg                                 SRC1_ABS(0),
1753de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1754de2362d3Smrg                                 UPDATE_PRED(0),
1755de2362d3Smrg                                 WRITE_MASK(0),
1756de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1757de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1758de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1759de2362d3Smrg                                 DST_GPR(3),
1760de2362d3Smrg                                 DST_REL(ABSOLUTE),
1761de2362d3Smrg                                 DST_ELEM(ELEM_W),
1762de2362d3Smrg                                 CLAMP(0));
1763de2362d3Smrg
1764de2362d3Smrg    /* 22 maskX.x DOT4 - mask */
1765de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1766de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1767de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1768de2362d3Smrg                             SRC0_NEG(0),
1769de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1770de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1771de2362d3Smrg                             SRC1_ELEM(ELEM_X),
1772de2362d3Smrg                             SRC1_NEG(0),
1773de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1774de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1775de2362d3Smrg                             LAST(0));
1776de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1777de2362d3Smrg                                 SRC1_ABS(0),
1778de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1779de2362d3Smrg                                 UPDATE_PRED(0),
1780de2362d3Smrg                                 WRITE_MASK(1),
1781de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1782de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1783de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1784de2362d3Smrg                                 DST_GPR(4),
1785de2362d3Smrg                                 DST_REL(ABSOLUTE),
1786de2362d3Smrg                                 DST_ELEM(ELEM_X),
1787de2362d3Smrg                                 CLAMP(0));
1788de2362d3Smrg
1789de2362d3Smrg    /* 23 maskX.y DOT4 - mask */
1790de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1791de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1792de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1793de2362d3Smrg                             SRC0_NEG(0),
1794de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1795de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1796de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
1797de2362d3Smrg                             SRC1_NEG(0),
1798de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1799de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1800de2362d3Smrg                             LAST(0));
1801de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1802de2362d3Smrg                                 SRC1_ABS(0),
1803de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1804de2362d3Smrg                                 UPDATE_PRED(0),
1805de2362d3Smrg                                 WRITE_MASK(0),
1806de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1807de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1808de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1809de2362d3Smrg                                 DST_GPR(4),
1810de2362d3Smrg                                 DST_REL(ABSOLUTE),
1811de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1812de2362d3Smrg                                 CLAMP(0));
1813de2362d3Smrg
1814de2362d3Smrg    /* 24 maskX.z DOT4 - mask */
1815de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1816de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1817de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
1818de2362d3Smrg                             SRC0_NEG(0),
1819de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1820de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1821de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
1822de2362d3Smrg                             SRC1_NEG(0),
1823de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1824de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1825de2362d3Smrg                             LAST(0));
1826de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1827de2362d3Smrg                                 SRC1_ABS(0),
1828de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1829de2362d3Smrg                                 UPDATE_PRED(0),
1830de2362d3Smrg                                 WRITE_MASK(0),
1831de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1832de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1833de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1834de2362d3Smrg                                 DST_GPR(4),
1835de2362d3Smrg                                 DST_REL(ABSOLUTE),
1836de2362d3Smrg                                 DST_ELEM(ELEM_Z),
1837de2362d3Smrg                                 CLAMP(0));
1838de2362d3Smrg
1839de2362d3Smrg    /* 25 maskX.w DOT4 - mask */
1840de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1841de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1842de2362d3Smrg                             SRC0_ELEM(ELEM_W),
1843de2362d3Smrg                             SRC0_NEG(0),
1844de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1845de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1846de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1847de2362d3Smrg                             SRC1_NEG(0),
1848de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1849de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1850de2362d3Smrg                             LAST(1));
1851de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1852de2362d3Smrg                                 SRC1_ABS(0),
1853de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1854de2362d3Smrg                                 UPDATE_PRED(0),
1855de2362d3Smrg                                 WRITE_MASK(0),
1856de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1857de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1858de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1859de2362d3Smrg                                 DST_GPR(4),
1860de2362d3Smrg                                 DST_REL(ABSOLUTE),
1861de2362d3Smrg                                 DST_ELEM(ELEM_W),
1862de2362d3Smrg                                 CLAMP(0));
1863de2362d3Smrg
1864de2362d3Smrg    /* 26 maskY.x DOT4 - mask */
1865de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1866de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1867de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1868de2362d3Smrg                             SRC0_NEG(0),
1869de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1870de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1871de2362d3Smrg                             SRC1_ELEM(ELEM_X),
1872de2362d3Smrg                             SRC1_NEG(0),
1873de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1874de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1875de2362d3Smrg                             LAST(0));
1876de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1877de2362d3Smrg                                 SRC1_ABS(0),
1878de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1879de2362d3Smrg                                 UPDATE_PRED(0),
1880de2362d3Smrg                                 WRITE_MASK(0),
1881de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1882de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1883de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1884de2362d3Smrg                                 DST_GPR(4),
1885de2362d3Smrg                                 DST_REL(ABSOLUTE),
1886de2362d3Smrg                                 DST_ELEM(ELEM_X),
1887de2362d3Smrg                                 CLAMP(0));
1888de2362d3Smrg
1889de2362d3Smrg    /* 27 maskY.y DOT4 - mask */
1890de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1891de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1892de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1893de2362d3Smrg                             SRC0_NEG(0),
1894de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1895de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1896de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
1897de2362d3Smrg                             SRC1_NEG(0),
1898de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1899de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1900de2362d3Smrg                             LAST(0));
1901de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1902de2362d3Smrg                                 SRC1_ABS(0),
1903de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1904de2362d3Smrg                                 UPDATE_PRED(0),
1905de2362d3Smrg                                 WRITE_MASK(1),
1906de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1907de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1908de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1909de2362d3Smrg                                 DST_GPR(4),
1910de2362d3Smrg                                 DST_REL(ABSOLUTE),
1911de2362d3Smrg                                 DST_ELEM(ELEM_Y),
1912de2362d3Smrg                                 CLAMP(0));
1913de2362d3Smrg
1914de2362d3Smrg    /* 28 maskY.z DOT4 - mask */
1915de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1916de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1917de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
1918de2362d3Smrg                             SRC0_NEG(0),
1919de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1920de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1921de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
1922de2362d3Smrg                             SRC1_NEG(0),
1923de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1924de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1925de2362d3Smrg                             LAST(0));
1926de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1927de2362d3Smrg                                 SRC1_ABS(0),
1928de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1929de2362d3Smrg                                 UPDATE_PRED(0),
1930de2362d3Smrg                                 WRITE_MASK(0),
1931de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1932de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1933de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1934de2362d3Smrg                                 DST_GPR(4),
1935de2362d3Smrg                                 DST_REL(ABSOLUTE),
1936de2362d3Smrg                                 DST_ELEM(ELEM_Z),
1937de2362d3Smrg                                 CLAMP(0));
1938de2362d3Smrg
1939de2362d3Smrg    /* 29 maskY.w DOT4 - mask */
1940de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1941de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1942de2362d3Smrg                             SRC0_ELEM(ELEM_W),
1943de2362d3Smrg                             SRC0_NEG(0),
1944de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1945de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1946de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1947de2362d3Smrg                             SRC1_NEG(0),
1948de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1949de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1950de2362d3Smrg                             LAST(1));
1951de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1952de2362d3Smrg                                 SRC1_ABS(0),
1953de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1954de2362d3Smrg                                 UPDATE_PRED(0),
1955de2362d3Smrg                                 WRITE_MASK(0),
1956de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1957de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1958de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1959de2362d3Smrg                                 DST_GPR(4),
1960de2362d3Smrg                                 DST_REL(ABSOLUTE),
1961de2362d3Smrg                                 DST_ELEM(ELEM_W),
1962de2362d3Smrg                                 CLAMP(0));
1963de2362d3Smrg
1964de2362d3Smrg    /* 30 srcX / w */
1965de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1966de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1967de2362d3Smrg                             SRC0_ELEM(ELEM_X),
1968de2362d3Smrg                             SRC0_NEG(0),
1969de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1970de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1971de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1972de2362d3Smrg                             SRC1_NEG(0),
1973de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
1974de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1975de2362d3Smrg                             LAST(1));
1976de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1977de2362d3Smrg                                 SRC1_ABS(0),
1978de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
1979de2362d3Smrg                                 UPDATE_PRED(0),
1980de2362d3Smrg                                 WRITE_MASK(1),
1981de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1982de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
1983de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1984de2362d3Smrg                                 DST_GPR(1),
1985de2362d3Smrg                                 DST_REL(ABSOLUTE),
1986de2362d3Smrg                                 DST_ELEM(ELEM_X),
1987de2362d3Smrg                                 CLAMP(0));
1988de2362d3Smrg
1989de2362d3Smrg    /* 31 srcY / h */
1990de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1991de2362d3Smrg                             SRC0_REL(ABSOLUTE),
1992de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
1993de2362d3Smrg                             SRC0_NEG(0),
1994de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1995de2362d3Smrg                             SRC1_REL(ABSOLUTE),
1996de2362d3Smrg                             SRC1_ELEM(ELEM_W),
1997de2362d3Smrg                             SRC1_NEG(0),
1998de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
1999de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2000de2362d3Smrg                             LAST(1));
2001de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2002de2362d3Smrg                                 SRC1_ABS(0),
2003de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2004de2362d3Smrg                                 UPDATE_PRED(0),
2005de2362d3Smrg                                 WRITE_MASK(1),
2006de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2007de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2008de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2009de2362d3Smrg                                 DST_GPR(1),
2010de2362d3Smrg                                 DST_REL(ABSOLUTE),
2011de2362d3Smrg                                 DST_ELEM(ELEM_Y),
2012de2362d3Smrg                                 CLAMP(0));
2013de2362d3Smrg
2014de2362d3Smrg    /* 32 maskX / w */
2015de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2016de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2017de2362d3Smrg                             SRC0_ELEM(ELEM_X),
2018de2362d3Smrg                             SRC0_NEG(0),
2019de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
2020de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2021de2362d3Smrg                             SRC1_ELEM(ELEM_W),
2022de2362d3Smrg                             SRC1_NEG(0),
2023de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2024de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2025de2362d3Smrg                             LAST(1));
2026de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2027de2362d3Smrg                                 SRC1_ABS(0),
2028de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2029de2362d3Smrg                                 UPDATE_PRED(0),
2030de2362d3Smrg                                 WRITE_MASK(1),
2031de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2032de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2033de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2034de2362d3Smrg                                 DST_GPR(0),
2035de2362d3Smrg                                 DST_REL(ABSOLUTE),
2036de2362d3Smrg                                 DST_ELEM(ELEM_X),
2037de2362d3Smrg                                 CLAMP(0));
2038de2362d3Smrg
2039de2362d3Smrg    /* 33 maskY / h */
2040de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2041de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2042de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
2043de2362d3Smrg                             SRC0_NEG(0),
2044de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
2045de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2046de2362d3Smrg                             SRC1_ELEM(ELEM_W),
2047de2362d3Smrg                             SRC1_NEG(0),
2048de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2049de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2050de2362d3Smrg                             LAST(1));
2051de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2052de2362d3Smrg                                 SRC1_ABS(0),
2053de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2054de2362d3Smrg                                 UPDATE_PRED(0),
2055de2362d3Smrg                                 WRITE_MASK(1),
2056de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2057de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2058de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2059de2362d3Smrg                                 DST_GPR(0),
2060de2362d3Smrg                                 DST_REL(ABSOLUTE),
2061de2362d3Smrg                                 DST_ELEM(ELEM_Y),
2062de2362d3Smrg                                 CLAMP(0));
2063de2362d3Smrg
2064de2362d3Smrg    /* 34 srcX.x DOT4 - non-mask */
2065de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2066de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2067de2362d3Smrg                             SRC0_ELEM(ELEM_X),
2068de2362d3Smrg                             SRC0_NEG(0),
2069de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2070de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2071de2362d3Smrg                             SRC1_ELEM(ELEM_X),
2072de2362d3Smrg                             SRC1_NEG(0),
2073de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2074de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2075de2362d3Smrg                             LAST(0));
2076de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2077de2362d3Smrg                                 SRC1_ABS(0),
2078de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2079de2362d3Smrg                                 UPDATE_PRED(0),
2080de2362d3Smrg                                 WRITE_MASK(1),
2081de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2082de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2083de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2084de2362d3Smrg                                 DST_GPR(2),
2085de2362d3Smrg                                 DST_REL(ABSOLUTE),
2086de2362d3Smrg                                 DST_ELEM(ELEM_X),
2087de2362d3Smrg                                 CLAMP(0));
2088de2362d3Smrg
2089de2362d3Smrg    /* 35 srcX.y DOT4 - non-mask */
2090de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2091de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2092de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
2093de2362d3Smrg                             SRC0_NEG(0),
2094de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2095de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2096de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
2097de2362d3Smrg                             SRC1_NEG(0),
2098de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2099de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2100de2362d3Smrg                             LAST(0));
2101de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2102de2362d3Smrg                                 SRC1_ABS(0),
2103de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2104de2362d3Smrg                                 UPDATE_PRED(0),
2105de2362d3Smrg                                 WRITE_MASK(0),
2106de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2107de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2108de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2109de2362d3Smrg                                 DST_GPR(2),
2110de2362d3Smrg                                 DST_REL(ABSOLUTE),
2111de2362d3Smrg                                 DST_ELEM(ELEM_Y),
2112de2362d3Smrg                                 CLAMP(0));
2113de2362d3Smrg
2114de2362d3Smrg    /* 36 srcX.z DOT4 - non-mask */
2115de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2116de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2117de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
2118de2362d3Smrg                             SRC0_NEG(0),
2119de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2120de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2121de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
2122de2362d3Smrg                             SRC1_NEG(0),
2123de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2124de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2125de2362d3Smrg                             LAST(0));
2126de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2127de2362d3Smrg                                 SRC1_ABS(0),
2128de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2129de2362d3Smrg                                 UPDATE_PRED(0),
2130de2362d3Smrg                                 WRITE_MASK(0),
2131de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2132de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2133de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2134de2362d3Smrg                                 DST_GPR(2),
2135de2362d3Smrg                                 DST_REL(ABSOLUTE),
2136de2362d3Smrg                                 DST_ELEM(ELEM_Z),
2137de2362d3Smrg                                 CLAMP(0));
2138de2362d3Smrg
2139de2362d3Smrg    /* 37 srcX.w DOT4 - non-mask */
2140de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2141de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2142de2362d3Smrg                             SRC0_ELEM(ELEM_W),
2143de2362d3Smrg                             SRC0_NEG(0),
2144de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2145de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2146de2362d3Smrg                             SRC1_ELEM(ELEM_W),
2147de2362d3Smrg                             SRC1_NEG(0),
2148de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2149de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2150de2362d3Smrg                             LAST(1));
2151de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2152de2362d3Smrg                                 SRC1_ABS(0),
2153de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2154de2362d3Smrg                                 UPDATE_PRED(0),
2155de2362d3Smrg                                 WRITE_MASK(0),
2156de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2157de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2158de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2159de2362d3Smrg                                 DST_GPR(2),
2160de2362d3Smrg                                 DST_REL(ABSOLUTE),
2161de2362d3Smrg                                 DST_ELEM(ELEM_W),
2162de2362d3Smrg                                 CLAMP(0));
2163de2362d3Smrg
2164de2362d3Smrg    /* 38 srcY.x DOT4 - non-mask */
2165de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2166de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2167de2362d3Smrg                             SRC0_ELEM(ELEM_X),
2168de2362d3Smrg                             SRC0_NEG(0),
2169de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2170de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2171de2362d3Smrg                             SRC1_ELEM(ELEM_X),
2172de2362d3Smrg                             SRC1_NEG(0),
2173de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2174de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2175de2362d3Smrg                             LAST(0));
2176de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2177de2362d3Smrg                                 SRC1_ABS(0),
2178de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2179de2362d3Smrg                                 UPDATE_PRED(0),
2180de2362d3Smrg                                 WRITE_MASK(0),
2181de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2182de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2183de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2184de2362d3Smrg                                 DST_GPR(2),
2185de2362d3Smrg                                 DST_REL(ABSOLUTE),
2186de2362d3Smrg                                 DST_ELEM(ELEM_X),
2187de2362d3Smrg                                 CLAMP(0));
2188de2362d3Smrg
2189de2362d3Smrg    /* 39 srcY.y DOT4 - non-mask */
2190de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2191de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2192de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
2193de2362d3Smrg                             SRC0_NEG(0),
2194de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2195de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2196de2362d3Smrg                             SRC1_ELEM(ELEM_Y),
2197de2362d3Smrg                             SRC1_NEG(0),
2198de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2199de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2200de2362d3Smrg                             LAST(0));
2201de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2202de2362d3Smrg                                 SRC1_ABS(0),
2203de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2204de2362d3Smrg                                 UPDATE_PRED(0),
2205de2362d3Smrg                                 WRITE_MASK(1),
2206de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2207de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2208de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2209de2362d3Smrg                                 DST_GPR(2),
2210de2362d3Smrg                                 DST_REL(ABSOLUTE),
2211de2362d3Smrg                                 DST_ELEM(ELEM_Y),
2212de2362d3Smrg                                 CLAMP(0));
2213de2362d3Smrg
2214de2362d3Smrg    /* 40 srcY.z DOT4 - non-mask */
2215de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2216de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2217de2362d3Smrg                             SRC0_ELEM(ELEM_Z),
2218de2362d3Smrg                             SRC0_NEG(0),
2219de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2220de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2221de2362d3Smrg                             SRC1_ELEM(ELEM_Z),
2222de2362d3Smrg                             SRC1_NEG(0),
2223de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2224de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2225de2362d3Smrg                             LAST(0));
2226de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2227de2362d3Smrg                                 SRC1_ABS(0),
2228de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2229de2362d3Smrg                                 UPDATE_PRED(0),
2230de2362d3Smrg                                 WRITE_MASK(0),
2231de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2232de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2233de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2234de2362d3Smrg                                 DST_GPR(2),
2235de2362d3Smrg                                 DST_REL(ABSOLUTE),
2236de2362d3Smrg                                 DST_ELEM(ELEM_Z),
2237de2362d3Smrg                                 CLAMP(0));
2238de2362d3Smrg
2239de2362d3Smrg    /* 41 srcY.w DOT4 - non-mask */
2240de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2241de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2242de2362d3Smrg                             SRC0_ELEM(ELEM_W),
2243de2362d3Smrg                             SRC0_NEG(0),
2244de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2245de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2246de2362d3Smrg                             SRC1_ELEM(ELEM_W),
2247de2362d3Smrg                             SRC1_NEG(0),
2248de2362d3Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2249de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2250de2362d3Smrg                             LAST(1));
2251de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2252de2362d3Smrg                                 SRC1_ABS(0),
2253de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2254de2362d3Smrg                                 UPDATE_PRED(0),
2255de2362d3Smrg                                 WRITE_MASK(0),
2256de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2257de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2258de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2259de2362d3Smrg                                 DST_GPR(2),
2260de2362d3Smrg                                 DST_REL(ABSOLUTE),
2261de2362d3Smrg                                 DST_ELEM(ELEM_W),
2262de2362d3Smrg                                 CLAMP(0));
2263de2362d3Smrg
2264de2362d3Smrg    /* 42 srcX / w */
2265de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2266de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2267de2362d3Smrg                             SRC0_ELEM(ELEM_X),
2268de2362d3Smrg                             SRC0_NEG(0),
2269de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2270de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2271de2362d3Smrg                             SRC1_ELEM(ELEM_W),
2272de2362d3Smrg                             SRC1_NEG(0),
2273de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2274de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2275de2362d3Smrg                             LAST(1));
2276de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2277de2362d3Smrg                                 SRC1_ABS(0),
2278de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2279de2362d3Smrg                                 UPDATE_PRED(0),
2280de2362d3Smrg                                 WRITE_MASK(1),
2281de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2282de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2283de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2284de2362d3Smrg                                 DST_GPR(0),
2285de2362d3Smrg                                 DST_REL(ABSOLUTE),
2286de2362d3Smrg                                 DST_ELEM(ELEM_X),
2287de2362d3Smrg                                 CLAMP(0));
2288de2362d3Smrg
2289de2362d3Smrg    /* 43 srcY / h */
2290de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2291de2362d3Smrg                             SRC0_REL(ABSOLUTE),
2292de2362d3Smrg                             SRC0_ELEM(ELEM_Y),
2293de2362d3Smrg                             SRC0_NEG(0),
2294de2362d3Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2295de2362d3Smrg                             SRC1_REL(ABSOLUTE),
2296de2362d3Smrg                             SRC1_ELEM(ELEM_W),
2297de2362d3Smrg                             SRC1_NEG(0),
2298de2362d3Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2299de2362d3Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2300de2362d3Smrg                             LAST(1));
2301de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2302de2362d3Smrg                                 SRC1_ABS(0),
2303de2362d3Smrg                                 UPDATE_EXECUTE_MASK(0),
2304de2362d3Smrg                                 UPDATE_PRED(0),
2305de2362d3Smrg                                 WRITE_MASK(1),
2306de2362d3Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2307de2362d3Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2308de2362d3Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2309de2362d3Smrg                                 DST_GPR(0),
2310de2362d3Smrg                                 DST_REL(ABSOLUTE),
2311de2362d3Smrg                                 DST_ELEM(ELEM_Y),
2312de2362d3Smrg                                 CLAMP(0));
2313de2362d3Smrg
2314de2362d3Smrg    /* mask vfetch - 44/45 - dst */
2315de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2316de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2317de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
2318de2362d3Smrg			     BUFFER_ID(0),
2319de2362d3Smrg			     SRC_GPR(0),
2320de2362d3Smrg			     SRC_REL(ABSOLUTE),
2321de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
2322de2362d3Smrg			     MEGA_FETCH_COUNT(24));
2323de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2324de2362d3Smrg				 DST_REL(0),
2325de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
2326de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
2327de2362d3Smrg				 DST_SEL_Z(SQ_SEL_0),
2328de2362d3Smrg				 DST_SEL_W(SQ_SEL_1),
2329de2362d3Smrg				 USE_CONST_FIELDS(0),
2330de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2331de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2332de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2333de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2334de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2335de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2336de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2337de2362d3Smrg#else
2338de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2339de2362d3Smrg#endif
2340de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
2341de2362d3Smrg			     MEGA_FETCH(1),
2342de2362d3Smrg			     ALT_CONST(0),
2343de2362d3Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2344de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
2345de2362d3Smrg    /* 46/47 - src */
2346de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2347de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2348de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
2349de2362d3Smrg			     BUFFER_ID(0),
2350de2362d3Smrg			     SRC_GPR(0),
2351de2362d3Smrg			     SRC_REL(ABSOLUTE),
2352de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
2353de2362d3Smrg			     MEGA_FETCH_COUNT(8));
2354de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2355de2362d3Smrg				 DST_REL(0),
2356de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
2357de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
2358de2362d3Smrg				 DST_SEL_Z(SQ_SEL_1),
2359de2362d3Smrg				 DST_SEL_W(SQ_SEL_0),
2360de2362d3Smrg				 USE_CONST_FIELDS(0),
2361de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2362de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2363de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2364de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2365de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2366de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2367de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2368de2362d3Smrg#else
2369de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2370de2362d3Smrg#endif
2371de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
2372de2362d3Smrg			     MEGA_FETCH(0),
2373de2362d3Smrg			     ALT_CONST(0),
2374de2362d3Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2375de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
2376de2362d3Smrg    /* 48/49 - mask */
2377de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2378de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2379de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
2380de2362d3Smrg			     BUFFER_ID(0),
2381de2362d3Smrg			     SRC_GPR(0),
2382de2362d3Smrg			     SRC_REL(ABSOLUTE),
2383de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
2384de2362d3Smrg			     MEGA_FETCH_COUNT(8));
2385de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2386de2362d3Smrg				 DST_REL(0),
2387de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
2388de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
2389de2362d3Smrg				 DST_SEL_Z(SQ_SEL_1),
2390de2362d3Smrg				 DST_SEL_W(SQ_SEL_0),
2391de2362d3Smrg				 USE_CONST_FIELDS(0),
2392de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2393de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2394de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2395de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2396de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(16),
2397de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2398de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2399de2362d3Smrg#else
2400de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2401de2362d3Smrg#endif
2402de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
2403de2362d3Smrg			     MEGA_FETCH(0),
2404de2362d3Smrg			     ALT_CONST(0),
2405de2362d3Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2406de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
2407de2362d3Smrg
2408de2362d3Smrg    /* no mask vfetch - 50/51 - dst */
2409de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2410de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2411de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
2412de2362d3Smrg			     BUFFER_ID(0),
2413de2362d3Smrg			     SRC_GPR(0),
2414de2362d3Smrg			     SRC_REL(ABSOLUTE),
2415de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
2416de2362d3Smrg			     MEGA_FETCH_COUNT(16));
2417de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2418de2362d3Smrg				 DST_REL(0),
2419de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
2420de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
2421de2362d3Smrg				 DST_SEL_Z(SQ_SEL_0),
2422de2362d3Smrg				 DST_SEL_W(SQ_SEL_1),
2423de2362d3Smrg				 USE_CONST_FIELDS(0),
2424de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2425de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2426de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2427de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2428de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2429de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2430de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2431de2362d3Smrg#else
2432de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2433de2362d3Smrg#endif
2434de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
2435de2362d3Smrg			     MEGA_FETCH(1),
2436de2362d3Smrg			     ALT_CONST(0),
2437de2362d3Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2438de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
2439de2362d3Smrg    /* 52/53 - src */
2440de2362d3Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2441de2362d3Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2442de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
2443de2362d3Smrg			     BUFFER_ID(0),
2444de2362d3Smrg			     SRC_GPR(0),
2445de2362d3Smrg			     SRC_REL(ABSOLUTE),
2446de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
2447de2362d3Smrg			     MEGA_FETCH_COUNT(8));
2448de2362d3Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2449de2362d3Smrg				 DST_REL(0),
2450de2362d3Smrg				 DST_SEL_X(SQ_SEL_X),
2451de2362d3Smrg				 DST_SEL_Y(SQ_SEL_Y),
2452de2362d3Smrg				 DST_SEL_Z(SQ_SEL_1),
2453de2362d3Smrg				 DST_SEL_W(SQ_SEL_0),
2454de2362d3Smrg				 USE_CONST_FIELDS(0),
2455de2362d3Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2456de2362d3Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2457de2362d3Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2458de2362d3Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2459de2362d3Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2460de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2461de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2462de2362d3Smrg#else
2463de2362d3Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2464de2362d3Smrg#endif
2465de2362d3Smrg			     CONST_BUF_NO_STRIDE(0),
2466de2362d3Smrg			     MEGA_FETCH(0),
2467de2362d3Smrg                             ALT_CONST(0),
2468de2362d3Smrg                             BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2469de2362d3Smrg    shader[i++] = VTX_DWORD_PAD;
2470de2362d3Smrg
2471de2362d3Smrg    return i;
2472de2362d3Smrg}
2473de2362d3Smrg
2474de2362d3Smrg/* comp ps --------------------------------------- */
247518781e08Smrgint evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t *shader)
2476de2362d3Smrg{
2477de2362d3Smrg    int i = 0;
2478de2362d3Smrg
2479de2362d3Smrg    /* 0 */
248018781e08Smrg    /* call interp-fetch-mask if boolean1 == true */
248118781e08Smrg    shader[i++] = CF_DWORD0(ADDR(11),
2482de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2483de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
248418781e08Smrg                            CF_CONST(1),
2485de2362d3Smrg                            COND(SQ_CF_COND_BOOL),
2486de2362d3Smrg                            I_COUNT(0),
2487de2362d3Smrg                            VALID_PIXEL_MODE(0),
2488de2362d3Smrg                            END_OF_PROGRAM(0),
2489de2362d3Smrg                            CF_INST(SQ_CF_INST_CALL),
2490de2362d3Smrg                            WHOLE_QUAD_MODE(0),
2491de2362d3Smrg                            BARRIER(0));
249218781e08Smrg
2493de2362d3Smrg    /* 1 */
249418781e08Smrg    /* call read-constant-mask if boolean1 == false */
249518781e08Smrg    shader[i++] = CF_DWORD0(ADDR(14),
2496de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2497de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
249818781e08Smrg                            CF_CONST(1),
2499de2362d3Smrg                            COND(SQ_CF_COND_NOT_BOOL),
2500de2362d3Smrg                            I_COUNT(0),
2501de2362d3Smrg                            VALID_PIXEL_MODE(0),
2502de2362d3Smrg                            END_OF_PROGRAM(0),
2503de2362d3Smrg                            CF_INST(SQ_CF_INST_CALL),
2504de2362d3Smrg                            WHOLE_QUAD_MODE(0),
2505de2362d3Smrg                            BARRIER(0));
250618781e08Smrg
2507de2362d3Smrg    /* 2 */
250818781e08Smrg    /* call interp-fetch-src if boolean0 == true */
250918781e08Smrg    shader[i++] = CF_DWORD0(ADDR(6),
251018781e08Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2511de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2512de2362d3Smrg                            CF_CONST(0),
251318781e08Smrg                            COND(SQ_CF_COND_BOOL),
2514de2362d3Smrg                            I_COUNT(0),
2515de2362d3Smrg                            VALID_PIXEL_MODE(0),
251618781e08Smrg                            END_OF_PROGRAM(0),
251718781e08Smrg                            CF_INST(SQ_CF_INST_CALL),
2518de2362d3Smrg                            WHOLE_QUAD_MODE(0),
251918781e08Smrg                            BARRIER(0));
2520de2362d3Smrg
252118781e08Smrg    /* 3 */
252218781e08Smrg    /* call read-constant-src if boolean0 == false */
252318781e08Smrg    shader[i++] = CF_DWORD0(ADDR(9),
252418781e08Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
252518781e08Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
252618781e08Smrg                            CF_CONST(0),
252718781e08Smrg                            COND(SQ_CF_COND_NOT_BOOL),
252818781e08Smrg                            I_COUNT(0),
252918781e08Smrg                            VALID_PIXEL_MODE(0),
253018781e08Smrg                            END_OF_PROGRAM(0),
253118781e08Smrg                            CF_INST(SQ_CF_INST_CALL),
253218781e08Smrg                            WHOLE_QUAD_MODE(0),
253318781e08Smrg                            BARRIER(0));
253418781e08Smrg    /* 4 */
253518781e08Smrg    /* src IN mask (GPR2 := GPR1 .* GPR0) */
253618781e08Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(16),
2537de2362d3Smrg				KCACHE_BANK0(0),
2538de2362d3Smrg				KCACHE_BANK1(0),
2539de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2540de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2541de2362d3Smrg				KCACHE_ADDR0(0),
2542de2362d3Smrg				KCACHE_ADDR1(0),
254318781e08Smrg				I_COUNT(4),
2544de2362d3Smrg				ALT_CONST(0),
2545de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
2546de2362d3Smrg				WHOLE_QUAD_MODE(0),
2547de2362d3Smrg				BARRIER(1));
2548de2362d3Smrg
254918781e08Smrg    /* 5 */
255018781e08Smrg    /* export pixel data */
255118781e08Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
255218781e08Smrg					  TYPE(SQ_EXPORT_PIXEL),
255318781e08Smrg					  RW_GPR(0),
255418781e08Smrg					  RW_REL(ABSOLUTE),
255518781e08Smrg					  INDEX_GPR(0),
255618781e08Smrg					  ELEM_SIZE(1));
255718781e08Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
255818781e08Smrg					       SRC_SEL_Y(SQ_SEL_Y),
255918781e08Smrg					       SRC_SEL_Z(SQ_SEL_Z),
256018781e08Smrg					       SRC_SEL_W(SQ_SEL_W),
256118781e08Smrg					       BURST_COUNT(1),
256218781e08Smrg					       VALID_PIXEL_MODE(0),
256318781e08Smrg					       END_OF_PROGRAM(1),
256418781e08Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
256518781e08Smrg					       MARK(0),
256618781e08Smrg					       BARRIER(1));
256718781e08Smrg
256818781e08Smrg    /* subroutine interp-fetch-src */
256918781e08Smrg
257018781e08Smrg    /* 6 */
257118781e08Smrg    /* interpolate src */
257218781e08Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(20),
257318781e08Smrg				KCACHE_BANK0(0),
257418781e08Smrg				KCACHE_BANK1(0),
257518781e08Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
257618781e08Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
257718781e08Smrg				KCACHE_ADDR0(0),
257818781e08Smrg				KCACHE_ADDR1(0),
257918781e08Smrg				I_COUNT(4),
258018781e08Smrg				ALT_CONST(0),
258118781e08Smrg				CF_INST(SQ_CF_INST_ALU),
258218781e08Smrg				WHOLE_QUAD_MODE(0),
258318781e08Smrg				BARRIER(1));
258418781e08Smrg
258518781e08Smrg    /* 7 */
258618781e08Smrg    /* texture fetch src into GPR0 */
258718781e08Smrg    shader[i++] = CF_DWORD0(ADDR(24),
2588de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2589de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2590de2362d3Smrg			    CF_CONST(0),
2591de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
259218781e08Smrg			    I_COUNT(1),
2593de2362d3Smrg			    VALID_PIXEL_MODE(0),
2594de2362d3Smrg			    END_OF_PROGRAM(0),
2595de2362d3Smrg			    CF_INST(SQ_CF_INST_TC),
2596de2362d3Smrg			    WHOLE_QUAD_MODE(0),
2597de2362d3Smrg			    BARRIER(1));
2598de2362d3Smrg
259918781e08Smrg    /* 8 */
260018781e08Smrg    /* return */
260118781e08Smrg    shader[i++] = CF_DWORD0(ADDR(0),
260218781e08Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
260318781e08Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
260418781e08Smrg			    CF_CONST(0),
260518781e08Smrg			    COND(SQ_CF_COND_ACTIVE),
260618781e08Smrg			    I_COUNT(0),
260718781e08Smrg			    VALID_PIXEL_MODE(0),
260818781e08Smrg			    END_OF_PROGRAM(0),
260918781e08Smrg			    CF_INST(SQ_CF_INST_RETURN),
261018781e08Smrg			    WHOLE_QUAD_MODE(0),
261118781e08Smrg			    BARRIER(0));
261218781e08Smrg
261318781e08Smrg    /* subroutine read-constant-src */
261418781e08Smrg
261518781e08Smrg    /* 9 */
261618781e08Smrg    /* read constants into GPR0 */
261718781e08Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(26),
2618de2362d3Smrg				KCACHE_BANK0(0),
2619de2362d3Smrg				KCACHE_BANK1(0),
2620de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2621de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2622de2362d3Smrg				KCACHE_ADDR0(0),
2623de2362d3Smrg				KCACHE_ADDR1(0),
2624de2362d3Smrg				I_COUNT(4),
262518781e08Smrg				ALT_CONST(1),
2626de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
2627de2362d3Smrg				WHOLE_QUAD_MODE(0),
2628de2362d3Smrg				BARRIER(1));
2629de2362d3Smrg
263018781e08Smrg    /* 10 */
263118781e08Smrg    /* return */
2632de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0),
2633de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2634de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2635de2362d3Smrg			    CF_CONST(0),
2636de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
2637de2362d3Smrg			    I_COUNT(0),
2638de2362d3Smrg			    VALID_PIXEL_MODE(0),
2639de2362d3Smrg			    END_OF_PROGRAM(0),
2640de2362d3Smrg			    CF_INST(SQ_CF_INST_RETURN),
2641de2362d3Smrg			    WHOLE_QUAD_MODE(0),
264218781e08Smrg			    BARRIER(0));
264318781e08Smrg
264418781e08Smrg    /* subroutine interp-fetch-mask */
2645de2362d3Smrg
264618781e08Smrg    /* 11 */
264718781e08Smrg    /* interpolate mask */
264818781e08Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(30),
2649de2362d3Smrg				KCACHE_BANK0(0),
2650de2362d3Smrg				KCACHE_BANK1(0),
2651de2362d3Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2652de2362d3Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2653de2362d3Smrg				KCACHE_ADDR0(0),
2654de2362d3Smrg				KCACHE_ADDR1(0),
2655de2362d3Smrg				I_COUNT(4),
2656de2362d3Smrg				ALT_CONST(0),
2657de2362d3Smrg				CF_INST(SQ_CF_INST_ALU),
2658de2362d3Smrg				WHOLE_QUAD_MODE(0),
2659de2362d3Smrg				BARRIER(1));
266018781e08Smrg
266118781e08Smrg    /* 12 */
266218781e08Smrg    /* texture fetch mask into GPR1 */
266318781e08Smrg    shader[i++] = CF_DWORD0(ADDR(34),
2664de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2665de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2666de2362d3Smrg			    CF_CONST(0),
2667de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
2668de2362d3Smrg			    I_COUNT(1),
2669de2362d3Smrg			    VALID_PIXEL_MODE(0),
2670de2362d3Smrg			    END_OF_PROGRAM(0),
2671de2362d3Smrg			    CF_INST(SQ_CF_INST_TC),
2672de2362d3Smrg			    WHOLE_QUAD_MODE(0),
2673de2362d3Smrg			    BARRIER(1));
2674de2362d3Smrg
267518781e08Smrg    /* 13 */
267618781e08Smrg    /* return */
267718781e08Smrg    shader[i++] = CF_DWORD0(ADDR(0),
267818781e08Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
267918781e08Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
268018781e08Smrg			    CF_CONST(0),
268118781e08Smrg			    COND(SQ_CF_COND_ACTIVE),
268218781e08Smrg			    I_COUNT(0),
268318781e08Smrg			    VALID_PIXEL_MODE(0),
268418781e08Smrg			    END_OF_PROGRAM(0),
268518781e08Smrg			    CF_INST(SQ_CF_INST_RETURN),
268618781e08Smrg			    WHOLE_QUAD_MODE(0),
268718781e08Smrg			    BARRIER(0));
2688de2362d3Smrg
268918781e08Smrg    /* subroutine read-constant-src */
269018781e08Smrg
269118781e08Smrg    /* 14 */
269218781e08Smrg    /* read constants into GPR1 */
269318781e08Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(36),
269418781e08Smrg				KCACHE_BANK0(0),
269518781e08Smrg				KCACHE_BANK1(0),
269618781e08Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
269718781e08Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
269818781e08Smrg				KCACHE_ADDR0(0),
269918781e08Smrg				KCACHE_ADDR1(0),
270018781e08Smrg				I_COUNT(4),
270118781e08Smrg				ALT_CONST(1),
270218781e08Smrg				CF_INST(SQ_CF_INST_ALU),
270318781e08Smrg				WHOLE_QUAD_MODE(0),
270418781e08Smrg				BARRIER(1));
270518781e08Smrg
270618781e08Smrg    /* 15 */
270718781e08Smrg    /* return */
2708de2362d3Smrg    shader[i++] = CF_DWORD0(ADDR(0),
2709de2362d3Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2710de2362d3Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2711de2362d3Smrg			    CF_CONST(0),
2712de2362d3Smrg			    COND(SQ_CF_COND_ACTIVE),
2713de2362d3Smrg			    I_COUNT(0),
2714de2362d3Smrg			    VALID_PIXEL_MODE(0),
2715de2362d3Smrg			    END_OF_PROGRAM(0),
2716de2362d3Smrg			    CF_INST(SQ_CF_INST_RETURN),
2717de2362d3Smrg			    WHOLE_QUAD_MODE(0),
271818781e08Smrg			    BARRIER(0));
2719de2362d3Smrg
272018781e08Smrg    /* ALU clauses */
272118781e08Smrg
272218781e08Smrg    /* 16 */
272318781e08Smrg    /* MUL gpr[0].x gpr[0].x gpr[1].x */
2724de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2725de2362d3Smrg			     SRC0_REL(ABSOLUTE),
272618781e08Smrg			     SRC0_ELEM(ELEM_X),
2727de2362d3Smrg			     SRC0_NEG(0),
272818781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2729de2362d3Smrg			     SRC1_REL(ABSOLUTE),
2730de2362d3Smrg			     SRC1_ELEM(ELEM_X),
2731de2362d3Smrg			     SRC1_NEG(0),
273218781e08Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2733de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2734de2362d3Smrg			     LAST(0));
2735de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2736de2362d3Smrg				 SRC1_ABS(0),
2737de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2738de2362d3Smrg				 UPDATE_PRED(0),
2739de2362d3Smrg				 WRITE_MASK(1),
2740de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
274118781e08Smrg				 ALU_INST(SQ_OP2_INST_MUL),
274218781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
274318781e08Smrg				 DST_GPR(0),
2744de2362d3Smrg				 DST_REL(ABSOLUTE),
2745de2362d3Smrg				 DST_ELEM(ELEM_X),
274618781e08Smrg				 CLAMP(1));
274718781e08Smrg
274818781e08Smrg    /* 17 */
274918781e08Smrg    /* MUL gpr[0].y gpr[0].y gpr[1].y */
2750de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2751de2362d3Smrg			     SRC0_REL(ABSOLUTE),
275218781e08Smrg			     SRC0_ELEM(ELEM_Y),
2753de2362d3Smrg			     SRC0_NEG(0),
275418781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2755de2362d3Smrg			     SRC1_REL(ABSOLUTE),
275618781e08Smrg			     SRC1_ELEM(ELEM_Y),
2757de2362d3Smrg			     SRC1_NEG(0),
275818781e08Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2759de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2760de2362d3Smrg			     LAST(0));
2761de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2762de2362d3Smrg				 SRC1_ABS(0),
2763de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2764de2362d3Smrg				 UPDATE_PRED(0),
2765de2362d3Smrg				 WRITE_MASK(1),
2766de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
276718781e08Smrg				 ALU_INST(SQ_OP2_INST_MUL),
276818781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
276918781e08Smrg				 DST_GPR(0),
2770de2362d3Smrg				 DST_REL(ABSOLUTE),
2771de2362d3Smrg				 DST_ELEM(ELEM_Y),
277218781e08Smrg				 CLAMP(1));
277318781e08Smrg    /* 18 */
277418781e08Smrg    /* MUL gpr[0].z gpr[0].z gpr[1].z */
2775de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2776de2362d3Smrg			     SRC0_REL(ABSOLUTE),
277718781e08Smrg			     SRC0_ELEM(ELEM_Z),
2778de2362d3Smrg			     SRC0_NEG(0),
277918781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2780de2362d3Smrg			     SRC1_REL(ABSOLUTE),
278118781e08Smrg			     SRC1_ELEM(ELEM_Z),
2782de2362d3Smrg			     SRC1_NEG(0),
278318781e08Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2784de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2785de2362d3Smrg			     LAST(0));
2786de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2787de2362d3Smrg				 SRC1_ABS(0),
2788de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2789de2362d3Smrg				 UPDATE_PRED(0),
279018781e08Smrg				 WRITE_MASK(1),
2791de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
279218781e08Smrg				 ALU_INST(SQ_OP2_INST_MUL),
279318781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
279418781e08Smrg				 DST_GPR(0),
2795de2362d3Smrg				 DST_REL(ABSOLUTE),
2796de2362d3Smrg				 DST_ELEM(ELEM_Z),
279718781e08Smrg				 CLAMP(1));
279818781e08Smrg    /* 19 */
279918781e08Smrg    /* MUL gpr[0].w gpr[0].w gpr[1].w */
2800de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2801de2362d3Smrg			     SRC0_REL(ABSOLUTE),
280218781e08Smrg			     SRC0_ELEM(ELEM_W),
2803de2362d3Smrg			     SRC0_NEG(0),
280418781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2805de2362d3Smrg			     SRC1_REL(ABSOLUTE),
280618781e08Smrg			     SRC1_ELEM(ELEM_W),
2807de2362d3Smrg			     SRC1_NEG(0),
280818781e08Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2809de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2810de2362d3Smrg			     LAST(1));
2811de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2812de2362d3Smrg				 SRC1_ABS(0),
2813de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2814de2362d3Smrg				 UPDATE_PRED(0),
281518781e08Smrg				 WRITE_MASK(1),
2816de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
281718781e08Smrg				 ALU_INST(SQ_OP2_INST_MUL),
281818781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
281918781e08Smrg				 DST_GPR(0),
2820de2362d3Smrg				 DST_REL(ABSOLUTE),
2821de2362d3Smrg				 DST_ELEM(ELEM_W),
282218781e08Smrg				 CLAMP(1));
2823de2362d3Smrg
282418781e08Smrg    /* 20 */
282518781e08Smrg    /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */
2826de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2827de2362d3Smrg			     SRC0_REL(ABSOLUTE),
2828de2362d3Smrg			     SRC0_ELEM(ELEM_Y),
2829de2362d3Smrg			     SRC0_NEG(0),
283018781e08Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2831de2362d3Smrg			     SRC1_REL(ABSOLUTE),
2832de2362d3Smrg			     SRC1_ELEM(ELEM_X),
2833de2362d3Smrg			     SRC1_NEG(0),
2834de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2835de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2836de2362d3Smrg			     LAST(0));
2837de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2838de2362d3Smrg				 SRC1_ABS(0),
2839de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2840de2362d3Smrg				 UPDATE_PRED(0),
2841de2362d3Smrg				 WRITE_MASK(1),
2842de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2843de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2844de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2845de2362d3Smrg				 DST_GPR(0),
2846de2362d3Smrg				 DST_REL(ABSOLUTE),
2847de2362d3Smrg				 DST_ELEM(ELEM_X),
2848de2362d3Smrg				 CLAMP(0));
284918781e08Smrg    /* 21 */
285018781e08Smrg    /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */
2851de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2852de2362d3Smrg			     SRC0_REL(ABSOLUTE),
2853de2362d3Smrg			     SRC0_ELEM(ELEM_X),
2854de2362d3Smrg			     SRC0_NEG(0),
285518781e08Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2856de2362d3Smrg			     SRC1_REL(ABSOLUTE),
2857de2362d3Smrg			     SRC1_ELEM(ELEM_X),
2858de2362d3Smrg			     SRC1_NEG(0),
2859de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2860de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2861de2362d3Smrg			     LAST(0));
2862de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2863de2362d3Smrg				 SRC1_ABS(0),
2864de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2865de2362d3Smrg				 UPDATE_PRED(0),
2866de2362d3Smrg				 WRITE_MASK(1),
2867de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2868de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2869de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2870de2362d3Smrg				 DST_GPR(0),
2871de2362d3Smrg				 DST_REL(ABSOLUTE),
2872de2362d3Smrg				 DST_ELEM(ELEM_Y),
2873de2362d3Smrg				 CLAMP(0));
287418781e08Smrg    /* 22 */
287518781e08Smrg    /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */
2876de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2877de2362d3Smrg			     SRC0_REL(ABSOLUTE),
2878de2362d3Smrg			     SRC0_ELEM(ELEM_Y),
2879de2362d3Smrg			     SRC0_NEG(0),
288018781e08Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2881de2362d3Smrg			     SRC1_REL(ABSOLUTE),
2882de2362d3Smrg			     SRC1_ELEM(ELEM_X),
2883de2362d3Smrg			     SRC1_NEG(0),
2884de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2885de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2886de2362d3Smrg			     LAST(0));
2887de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2888de2362d3Smrg				 SRC1_ABS(0),
2889de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2890de2362d3Smrg				 UPDATE_PRED(0),
2891de2362d3Smrg				 WRITE_MASK(0),
2892de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2893de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2894de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2895de2362d3Smrg				 DST_GPR(0),
2896de2362d3Smrg				 DST_REL(ABSOLUTE),
2897de2362d3Smrg				 DST_ELEM(ELEM_Z),
2898de2362d3Smrg				 CLAMP(0));
289918781e08Smrg
290018781e08Smrg    /* 23 */
290118781e08Smrg    /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */
2902de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2903de2362d3Smrg			     SRC0_REL(ABSOLUTE),
2904de2362d3Smrg			     SRC0_ELEM(ELEM_X),
2905de2362d3Smrg			     SRC0_NEG(0),
290618781e08Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2907de2362d3Smrg			     SRC1_REL(ABSOLUTE),
2908de2362d3Smrg			     SRC1_ELEM(ELEM_X),
2909de2362d3Smrg			     SRC1_NEG(0),
2910de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2911de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2912de2362d3Smrg			     LAST(1));
2913de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2914de2362d3Smrg				 SRC1_ABS(0),
2915de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2916de2362d3Smrg				 UPDATE_PRED(0),
2917de2362d3Smrg				 WRITE_MASK(0),
2918de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2919de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2920de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2921de2362d3Smrg				 DST_GPR(0),
2922de2362d3Smrg				 DST_REL(ABSOLUTE),
2923de2362d3Smrg				 DST_ELEM(ELEM_W),
2924de2362d3Smrg				 CLAMP(0));
2925de2362d3Smrg
292618781e08Smrg    /* 24/25 */
292718781e08Smrg    /* SAMPLE RID=0 GPR0, GPR0 */
292818781e08Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
292918781e08Smrg			     INST_MOD(0),
293018781e08Smrg			     FETCH_WHOLE_QUAD(0),
293118781e08Smrg			     RESOURCE_ID(0),
293218781e08Smrg			     SRC_GPR(0),
293318781e08Smrg			     SRC_REL(ABSOLUTE),
293418781e08Smrg			     ALT_CONST(0),
293518781e08Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
293618781e08Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
293718781e08Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
293818781e08Smrg			     DST_REL(ABSOLUTE),
293918781e08Smrg			     DST_SEL_X(SQ_SEL_X),
294018781e08Smrg			     DST_SEL_Y(SQ_SEL_Y),
294118781e08Smrg			     DST_SEL_Z(SQ_SEL_Z),
294218781e08Smrg			     DST_SEL_W(SQ_SEL_W),
294318781e08Smrg			     LOD_BIAS(0),
294418781e08Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
294518781e08Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
294618781e08Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
294718781e08Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
294818781e08Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
294918781e08Smrg			     OFFSET_Y(0),
295018781e08Smrg			     OFFSET_Z(0),
295118781e08Smrg			     SAMPLER_ID(0),
295218781e08Smrg			     SRC_SEL_X(SQ_SEL_X),
295318781e08Smrg			     SRC_SEL_Y(SQ_SEL_Y),
295418781e08Smrg			     SRC_SEL_Z(SQ_SEL_0),
295518781e08Smrg			     SRC_SEL_W(SQ_SEL_1));
295618781e08Smrg    shader[i++] = TEX_DWORD_PAD;
295718781e08Smrg
295818781e08Smrg    /* 26 */
295918781e08Smrg    /* MOV GPR0.x, KC4.x */
296018781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
2961de2362d3Smrg			     SRC0_REL(ABSOLUTE),
2962de2362d3Smrg			     SRC0_ELEM(ELEM_X),
2963de2362d3Smrg			     SRC0_NEG(0),
296418781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2965de2362d3Smrg			     SRC1_REL(ABSOLUTE),
2966de2362d3Smrg			     SRC1_ELEM(ELEM_X),
2967de2362d3Smrg			     SRC1_NEG(0),
296818781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2969de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2970de2362d3Smrg			     LAST(0));
2971de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2972de2362d3Smrg				 SRC1_ABS(0),
2973de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
2974de2362d3Smrg				 UPDATE_PRED(0),
2975de2362d3Smrg				 WRITE_MASK(1),
2976de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
297718781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
2978de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
297918781e08Smrg				 DST_GPR(0),
2980de2362d3Smrg				 DST_REL(ABSOLUTE),
2981de2362d3Smrg				 DST_ELEM(ELEM_X),
2982de2362d3Smrg				 CLAMP(1));
298318781e08Smrg
298418781e08Smrg    /* 27 */
298518781e08Smrg    /* MOV GPR0.y, KC4.y */
298618781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
2987de2362d3Smrg			     SRC0_REL(ABSOLUTE),
2988de2362d3Smrg			     SRC0_ELEM(ELEM_Y),
2989de2362d3Smrg			     SRC0_NEG(0),
299018781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2991de2362d3Smrg			     SRC1_REL(ABSOLUTE),
299218781e08Smrg			     SRC1_ELEM(ELEM_X),
2993de2362d3Smrg			     SRC1_NEG(0),
299418781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2995de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2996de2362d3Smrg			     LAST(0));
2997de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2998de2362d3Smrg				 SRC1_ABS(0),
2999de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
3000de2362d3Smrg				 UPDATE_PRED(0),
3001de2362d3Smrg				 WRITE_MASK(1),
3002de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
300318781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
3004de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
300518781e08Smrg				 DST_GPR(0),
3006de2362d3Smrg				 DST_REL(ABSOLUTE),
3007de2362d3Smrg				 DST_ELEM(ELEM_Y),
3008de2362d3Smrg				 CLAMP(1));
300918781e08Smrg
301018781e08Smrg    /* 28  */
301118781e08Smrg    /* MOV GPR0.z, KC4.z */
301218781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
3013de2362d3Smrg			     SRC0_REL(ABSOLUTE),
3014de2362d3Smrg			     SRC0_ELEM(ELEM_Z),
3015de2362d3Smrg			     SRC0_NEG(0),
301618781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3017de2362d3Smrg			     SRC1_REL(ABSOLUTE),
301818781e08Smrg			     SRC1_ELEM(ELEM_X),
3019de2362d3Smrg			     SRC1_NEG(0),
302018781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
3021de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
3022de2362d3Smrg			     LAST(0));
3023de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3024de2362d3Smrg				 SRC1_ABS(0),
3025de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
3026de2362d3Smrg				 UPDATE_PRED(0),
3027de2362d3Smrg				 WRITE_MASK(1),
3028de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
302918781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
3030de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
303118781e08Smrg				 DST_GPR(0),
3032de2362d3Smrg				 DST_REL(ABSOLUTE),
3033de2362d3Smrg				 DST_ELEM(ELEM_Z),
3034de2362d3Smrg				 CLAMP(1));
303518781e08Smrg
303618781e08Smrg    /* 29 */
303718781e08Smrg    /* MOV GPR0.w, KC4.w */
303818781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
3039de2362d3Smrg			     SRC0_REL(ABSOLUTE),
3040de2362d3Smrg			     SRC0_ELEM(ELEM_W),
3041de2362d3Smrg			     SRC0_NEG(0),
304218781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3043de2362d3Smrg			     SRC1_REL(ABSOLUTE),
304418781e08Smrg			     SRC1_ELEM(ELEM_X),
3045de2362d3Smrg			     SRC1_NEG(0),
304618781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
3047de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
3048de2362d3Smrg			     LAST(1));
3049de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3050de2362d3Smrg				 SRC1_ABS(0),
3051de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
3052de2362d3Smrg				 UPDATE_PRED(0),
3053de2362d3Smrg				 WRITE_MASK(1),
3054de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
305518781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
3056de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
305718781e08Smrg				 DST_GPR(0),
3058de2362d3Smrg				 DST_REL(ABSOLUTE),
3059de2362d3Smrg				 DST_ELEM(ELEM_W),
3060de2362d3Smrg				 CLAMP(1));
3061de2362d3Smrg
306218781e08Smrg    /* 30 */
306318781e08Smrg    /* INTERP_XY GPR1.x, PARAM1 */
3064de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3065de2362d3Smrg			     SRC0_REL(ABSOLUTE),
3066de2362d3Smrg			     SRC0_ELEM(ELEM_Y),
3067de2362d3Smrg			     SRC0_NEG(0),
306818781e08Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3069de2362d3Smrg			     SRC1_REL(ABSOLUTE),
3070de2362d3Smrg			     SRC1_ELEM(ELEM_X),
3071de2362d3Smrg			     SRC1_NEG(0),
3072de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
3073de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
3074de2362d3Smrg			     LAST(0));
3075de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3076de2362d3Smrg				 SRC1_ABS(0),
3077de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
3078de2362d3Smrg				 UPDATE_PRED(0),
3079de2362d3Smrg				 WRITE_MASK(1),
3080de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
3081de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3082de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
308318781e08Smrg				 DST_GPR(1),
3084de2362d3Smrg				 DST_REL(ABSOLUTE),
3085de2362d3Smrg				 DST_ELEM(ELEM_X),
3086de2362d3Smrg				 CLAMP(0));
308718781e08Smrg    /* 31 */
308818781e08Smrg    /* INTERP_XY GPR1.y, PARAM1 */
3089de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3090de2362d3Smrg			     SRC0_REL(ABSOLUTE),
3091de2362d3Smrg			     SRC0_ELEM(ELEM_X),
3092de2362d3Smrg			     SRC0_NEG(0),
309318781e08Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3094de2362d3Smrg			     SRC1_REL(ABSOLUTE),
3095de2362d3Smrg			     SRC1_ELEM(ELEM_X),
3096de2362d3Smrg			     SRC1_NEG(0),
3097de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
3098de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
3099de2362d3Smrg			     LAST(0));
3100de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3101de2362d3Smrg				 SRC1_ABS(0),
3102de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
3103de2362d3Smrg				 UPDATE_PRED(0),
3104de2362d3Smrg				 WRITE_MASK(1),
3105de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
3106de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3107de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
310818781e08Smrg				 DST_GPR(1),
3109de2362d3Smrg				 DST_REL(ABSOLUTE),
3110de2362d3Smrg				 DST_ELEM(ELEM_Y),
3111de2362d3Smrg				 CLAMP(0));
311218781e08Smrg    /* 32 */
311318781e08Smrg    /* INTERP_XY GPR1.z, PARAM1 */
3114de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3115de2362d3Smrg			     SRC0_REL(ABSOLUTE),
3116de2362d3Smrg			     SRC0_ELEM(ELEM_Y),
3117de2362d3Smrg			     SRC0_NEG(0),
311818781e08Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3119de2362d3Smrg			     SRC1_REL(ABSOLUTE),
3120de2362d3Smrg			     SRC1_ELEM(ELEM_X),
3121de2362d3Smrg			     SRC1_NEG(0),
3122de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
3123de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
3124de2362d3Smrg			     LAST(0));
3125de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3126de2362d3Smrg				 SRC1_ABS(0),
3127de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
3128de2362d3Smrg				 UPDATE_PRED(0),
3129de2362d3Smrg				 WRITE_MASK(0),
3130de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
3131de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3132de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
313318781e08Smrg				 DST_GPR(1),
3134de2362d3Smrg				 DST_REL(ABSOLUTE),
3135de2362d3Smrg				 DST_ELEM(ELEM_Z),
3136de2362d3Smrg				 CLAMP(0));
313718781e08Smrg    /* 33 */
313818781e08Smrg    /* INTERP_XY GPR1.w, PARAM1 */
3139de2362d3Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3140de2362d3Smrg			     SRC0_REL(ABSOLUTE),
3141de2362d3Smrg			     SRC0_ELEM(ELEM_X),
3142de2362d3Smrg			     SRC0_NEG(0),
314318781e08Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3144de2362d3Smrg			     SRC1_REL(ABSOLUTE),
3145de2362d3Smrg			     SRC1_ELEM(ELEM_X),
3146de2362d3Smrg			     SRC1_NEG(0),
3147de2362d3Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
3148de2362d3Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
3149de2362d3Smrg			     LAST(1));
3150de2362d3Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3151de2362d3Smrg				 SRC1_ABS(0),
3152de2362d3Smrg				 UPDATE_EXECUTE_MASK(0),
3153de2362d3Smrg				 UPDATE_PRED(0),
3154de2362d3Smrg				 WRITE_MASK(0),
3155de2362d3Smrg				 OMOD(SQ_ALU_OMOD_OFF),
3156de2362d3Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3157de2362d3Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
315818781e08Smrg				 DST_GPR(1),
3159de2362d3Smrg				 DST_REL(ABSOLUTE),
3160de2362d3Smrg				 DST_ELEM(ELEM_W),
3161de2362d3Smrg				 CLAMP(0));
3162de2362d3Smrg
316318781e08Smrg    /* 34/35 */
316418781e08Smrg    /* SAMPLE RID=1 GPR1, GPR1 */
3165de2362d3Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3166de2362d3Smrg			     INST_MOD(0),
3167de2362d3Smrg			     FETCH_WHOLE_QUAD(0),
316818781e08Smrg			     RESOURCE_ID(1),
3169de2362d3Smrg			     SRC_GPR(1),
3170de2362d3Smrg			     SRC_REL(ABSOLUTE),
3171de2362d3Smrg			     ALT_CONST(0),
3172de2362d3Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3173de2362d3Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3174de2362d3Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
3175de2362d3Smrg			     DST_REL(ABSOLUTE),
3176de2362d3Smrg			     DST_SEL_X(SQ_SEL_X),
3177de2362d3Smrg			     DST_SEL_Y(SQ_SEL_Y),
3178de2362d3Smrg			     DST_SEL_Z(SQ_SEL_Z),
3179de2362d3Smrg			     DST_SEL_W(SQ_SEL_W),
3180de2362d3Smrg			     LOD_BIAS(0),
3181de2362d3Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
3182de2362d3Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
3183de2362d3Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
3184de2362d3Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
3185de2362d3Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3186de2362d3Smrg			     OFFSET_Y(0),
3187de2362d3Smrg			     OFFSET_Z(0),
3188de2362d3Smrg			     SAMPLER_ID(1),
3189de2362d3Smrg			     SRC_SEL_X(SQ_SEL_X),
3190de2362d3Smrg			     SRC_SEL_Y(SQ_SEL_Y),
3191de2362d3Smrg			     SRC_SEL_Z(SQ_SEL_0),
3192de2362d3Smrg			     SRC_SEL_W(SQ_SEL_1));
3193de2362d3Smrg    shader[i++] = TEX_DWORD_PAD;
3194de2362d3Smrg
319518781e08Smrg    /* 36 */
319618781e08Smrg    /* MOV GPR1.x, KC5.x */
319718781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
319818781e08Smrg			     SRC0_REL(ABSOLUTE),
319918781e08Smrg			     SRC0_ELEM(ELEM_X),
320018781e08Smrg			     SRC0_NEG(0),
320118781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
320218781e08Smrg			     SRC1_REL(ABSOLUTE),
320318781e08Smrg			     SRC1_ELEM(ELEM_X),
320418781e08Smrg			     SRC1_NEG(0),
320518781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
320618781e08Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
320718781e08Smrg			     LAST(0));
320818781e08Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
320918781e08Smrg				 SRC1_ABS(0),
321018781e08Smrg				 UPDATE_EXECUTE_MASK(0),
321118781e08Smrg				 UPDATE_PRED(0),
321218781e08Smrg				 WRITE_MASK(1),
321318781e08Smrg				 OMOD(SQ_ALU_OMOD_OFF),
321418781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
321518781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
321618781e08Smrg				 DST_GPR(1),
321718781e08Smrg				 DST_REL(ABSOLUTE),
321818781e08Smrg				 DST_ELEM(ELEM_X),
321918781e08Smrg				 CLAMP(1));
322018781e08Smrg
322118781e08Smrg    /* 37 */
322218781e08Smrg    /* MOV GPR1.y, KC5.y */
322318781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
322418781e08Smrg			     SRC0_REL(ABSOLUTE),
322518781e08Smrg			     SRC0_ELEM(ELEM_Y),
322618781e08Smrg			     SRC0_NEG(0),
322718781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
322818781e08Smrg			     SRC1_REL(ABSOLUTE),
322918781e08Smrg			     SRC1_ELEM(ELEM_X),
323018781e08Smrg			     SRC1_NEG(0),
323118781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
323218781e08Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
323318781e08Smrg			     LAST(0));
323418781e08Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
323518781e08Smrg				 SRC1_ABS(0),
323618781e08Smrg				 UPDATE_EXECUTE_MASK(0),
323718781e08Smrg				 UPDATE_PRED(0),
323818781e08Smrg				 WRITE_MASK(1),
323918781e08Smrg				 OMOD(SQ_ALU_OMOD_OFF),
324018781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
324118781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
324218781e08Smrg				 DST_GPR(1),
324318781e08Smrg				 DST_REL(ABSOLUTE),
324418781e08Smrg				 DST_ELEM(ELEM_Y),
324518781e08Smrg				 CLAMP(1));
324618781e08Smrg
324718781e08Smrg    /* 38 */
324818781e08Smrg    /* MOV GPR1.z, KC5.z */
324918781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
325018781e08Smrg			     SRC0_REL(ABSOLUTE),
325118781e08Smrg			     SRC0_ELEM(ELEM_Z),
325218781e08Smrg			     SRC0_NEG(0),
325318781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
325418781e08Smrg			     SRC1_REL(ABSOLUTE),
325518781e08Smrg			     SRC1_ELEM(ELEM_X),
325618781e08Smrg			     SRC1_NEG(0),
325718781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
325818781e08Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
325918781e08Smrg			     LAST(0));
326018781e08Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
326118781e08Smrg				 SRC1_ABS(0),
326218781e08Smrg				 UPDATE_EXECUTE_MASK(0),
326318781e08Smrg				 UPDATE_PRED(0),
326418781e08Smrg				 WRITE_MASK(1),
326518781e08Smrg				 OMOD(SQ_ALU_OMOD_OFF),
326618781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
326718781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
326818781e08Smrg				 DST_GPR(1),
326918781e08Smrg				 DST_REL(ABSOLUTE),
327018781e08Smrg				 DST_ELEM(ELEM_Z),
327118781e08Smrg				 CLAMP(1));
327218781e08Smrg
327318781e08Smrg    /* 39 */
327418781e08Smrg    /* MOV GPR1.w, KC5.w */
327518781e08Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
327618781e08Smrg			     SRC0_REL(ABSOLUTE),
327718781e08Smrg			     SRC0_ELEM(ELEM_W),
327818781e08Smrg			     SRC0_NEG(0),
327918781e08Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
328018781e08Smrg			     SRC1_REL(ABSOLUTE),
328118781e08Smrg			     SRC1_ELEM(ELEM_X),
328218781e08Smrg			     SRC1_NEG(0),
328318781e08Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
328418781e08Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
328518781e08Smrg			     LAST(1));
328618781e08Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
328718781e08Smrg				 SRC1_ABS(0),
328818781e08Smrg				 UPDATE_EXECUTE_MASK(0),
328918781e08Smrg				 UPDATE_PRED(0),
329018781e08Smrg				 WRITE_MASK(1),
329118781e08Smrg				 OMOD(SQ_ALU_OMOD_OFF),
329218781e08Smrg				 ALU_INST(SQ_OP2_INST_MOV),
329318781e08Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
329418781e08Smrg				 DST_GPR(1),
329518781e08Smrg				 DST_REL(ABSOLUTE),
329618781e08Smrg				 DST_ELEM(ELEM_W),
329718781e08Smrg				 CLAMP(1));
3298de2362d3Smrg
3299de2362d3Smrg    return i;
3300de2362d3Smrg}
3301