evergreen_shader.c revision 921a55d8
1921a55d8Smrg/*
2921a55d8Smrg * Copyright 2010 Advanced Micro Devices, Inc.
3921a55d8Smrg *
4921a55d8Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5921a55d8Smrg * copy of this software and associated documentation files (the "Software"),
6921a55d8Smrg * to deal in the Software without restriction, including without limitation
7921a55d8Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8921a55d8Smrg * and/or sell copies of the Software, and to permit persons to whom the
9921a55d8Smrg * Software is furnished to do so, subject to the following conditions:
10921a55d8Smrg *
11921a55d8Smrg * The above copyright notice and this permission notice (including the next
12921a55d8Smrg * paragraph) shall be included in all copies or substantial portions of the
13921a55d8Smrg * Software.
14921a55d8Smrg *
15921a55d8Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16921a55d8Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17921a55d8Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18921a55d8Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19921a55d8Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20921a55d8Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21921a55d8Smrg * SOFTWARE.
22921a55d8Smrg *
23921a55d8Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24921a55d8Smrg *
25921a55d8Smrg */
26921a55d8Smrg
27921a55d8Smrg#ifdef HAVE_CONFIG_H
28921a55d8Smrg#include "config.h"
29921a55d8Smrg#endif
30921a55d8Smrg
31921a55d8Smrg#ifdef XF86DRM_MODE
32921a55d8Smrg
33921a55d8Smrg#include "xf86.h"
34921a55d8Smrg
35921a55d8Smrg#include "evergreen_shader.h"
36921a55d8Smrg#include "evergreen_reg.h"
37921a55d8Smrg
38921a55d8Smrg/* solid vs --------------------------------------- */
39921a55d8Smrgint evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
40921a55d8Smrg{
41921a55d8Smrg    int i = 0;
42921a55d8Smrg
43921a55d8Smrg    /* 0 */
44921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(4),
45921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
46921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
47921a55d8Smrg			    CF_CONST(0),
48921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
49921a55d8Smrg			    I_COUNT(1),
50921a55d8Smrg			    VALID_PIXEL_MODE(0),
51921a55d8Smrg			    END_OF_PROGRAM(0),
52921a55d8Smrg			    CF_INST(SQ_CF_INST_VC),
53921a55d8Smrg			    WHOLE_QUAD_MODE(0),
54921a55d8Smrg			    BARRIER(1));
55921a55d8Smrg    /* 1 */
56921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
57921a55d8Smrg					  TYPE(SQ_EXPORT_POS),
58921a55d8Smrg					  RW_GPR(1),
59921a55d8Smrg					  RW_REL(ABSOLUTE),
60921a55d8Smrg					  INDEX_GPR(0),
61921a55d8Smrg					  ELEM_SIZE(0));
62921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
63921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
64921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
65921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
66921a55d8Smrg					       BURST_COUNT(1),
67921a55d8Smrg					       VALID_PIXEL_MODE(0),
68921a55d8Smrg					       END_OF_PROGRAM(0),
69921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70921a55d8Smrg					       MARK(0),
71921a55d8Smrg					       BARRIER(1));
72921a55d8Smrg    /* 2 - always export a param whether it's used or not */
73921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74921a55d8Smrg					  TYPE(SQ_EXPORT_PARAM),
75921a55d8Smrg					  RW_GPR(0),
76921a55d8Smrg					  RW_REL(ABSOLUTE),
77921a55d8Smrg					  INDEX_GPR(0),
78921a55d8Smrg					  ELEM_SIZE(0));
79921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
81921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
82921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
83921a55d8Smrg					       BURST_COUNT(0),
84921a55d8Smrg					       VALID_PIXEL_MODE(0),
85921a55d8Smrg					       END_OF_PROGRAM(1),
86921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
87921a55d8Smrg					       MARK(0),
88921a55d8Smrg					       BARRIER(0));
89921a55d8Smrg    /* 3 - padding */
90921a55d8Smrg    shader[i++] = 0x00000000;
91921a55d8Smrg    shader[i++] = 0x00000000;
92921a55d8Smrg    /* 4/5 */
93921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
94921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
95921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
96921a55d8Smrg			     BUFFER_ID(0),
97921a55d8Smrg			     SRC_GPR(0),
98921a55d8Smrg			     SRC_REL(ABSOLUTE),
99921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
100921a55d8Smrg			     MEGA_FETCH_COUNT(8));
101921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
102921a55d8Smrg				 DST_REL(0),
103921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
104921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
105921a55d8Smrg				 DST_SEL_Z(SQ_SEL_0),
106921a55d8Smrg				 DST_SEL_W(SQ_SEL_1),
107921a55d8Smrg				 USE_CONST_FIELDS(0),
108921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
109921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
110921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
111921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
112921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
113921a55d8Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
114921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
115921a55d8Smrg			     MEGA_FETCH(1),
116921a55d8Smrg			     ALT_CONST(0),
117921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
118921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
119921a55d8Smrg
120921a55d8Smrg    return i;
121921a55d8Smrg}
122921a55d8Smrg
123921a55d8Smrg/* solid ps --------------------------------------- */
124921a55d8Smrgint evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
125921a55d8Smrg{
126921a55d8Smrg    int i = 0;
127921a55d8Smrg
128921a55d8Smrg    /* 0 */
129921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(2),
130921a55d8Smrg				KCACHE_BANK0(0),
131921a55d8Smrg				KCACHE_BANK1(0),
132921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
133921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
134921a55d8Smrg				KCACHE_ADDR0(0),
135921a55d8Smrg				KCACHE_ADDR1(0),
136921a55d8Smrg				I_COUNT(4),
137921a55d8Smrg				ALT_CONST(0),
138921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
139921a55d8Smrg				WHOLE_QUAD_MODE(0),
140921a55d8Smrg				BARRIER(1));
141921a55d8Smrg    /* 1 */
142921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
143921a55d8Smrg					  TYPE(SQ_EXPORT_PIXEL),
144921a55d8Smrg					  RW_GPR(0),
145921a55d8Smrg					  RW_REL(ABSOLUTE),
146921a55d8Smrg					  INDEX_GPR(0),
147921a55d8Smrg					  ELEM_SIZE(1));
148921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
149921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
150921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
151921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
152921a55d8Smrg					       BURST_COUNT(1),
153921a55d8Smrg					       VALID_PIXEL_MODE(0),
154921a55d8Smrg					       END_OF_PROGRAM(1),
155921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
156921a55d8Smrg					       MARK(0),
157921a55d8Smrg					       BARRIER(1));
158921a55d8Smrg
159921a55d8Smrg    /* 2 */
160921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
161921a55d8Smrg			     SRC0_REL(ABSOLUTE),
162921a55d8Smrg			     SRC0_ELEM(ELEM_X),
163921a55d8Smrg			     SRC0_NEG(0),
164921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
165921a55d8Smrg			     SRC1_REL(ABSOLUTE),
166921a55d8Smrg			     SRC1_ELEM(ELEM_X),
167921a55d8Smrg			     SRC1_NEG(0),
168921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
169921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
170921a55d8Smrg			     LAST(0));
171921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
172921a55d8Smrg				 SRC1_ABS(0),
173921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
174921a55d8Smrg				 UPDATE_PRED(0),
175921a55d8Smrg				 WRITE_MASK(1),
176921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
177921a55d8Smrg				 ALU_INST(SQ_OP2_INST_MOV),
178921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
179921a55d8Smrg				 DST_GPR(0),
180921a55d8Smrg				 DST_REL(ABSOLUTE),
181921a55d8Smrg				 DST_ELEM(ELEM_X),
182921a55d8Smrg				 CLAMP(1));
183921a55d8Smrg    /* 3 */
184921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
185921a55d8Smrg			     SRC0_REL(ABSOLUTE),
186921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
187921a55d8Smrg			     SRC0_NEG(0),
188921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
189921a55d8Smrg			     SRC1_REL(ABSOLUTE),
190921a55d8Smrg			     SRC1_ELEM(ELEM_Y),
191921a55d8Smrg			     SRC1_NEG(0),
192921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
193921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
194921a55d8Smrg			     LAST(0));
195921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
196921a55d8Smrg				 SRC1_ABS(0),
197921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
198921a55d8Smrg				 UPDATE_PRED(0),
199921a55d8Smrg				 WRITE_MASK(1),
200921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
201921a55d8Smrg				 ALU_INST(SQ_OP2_INST_MOV),
202921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
203921a55d8Smrg				 DST_GPR(0),
204921a55d8Smrg				 DST_REL(ABSOLUTE),
205921a55d8Smrg				 DST_ELEM(ELEM_Y),
206921a55d8Smrg				 CLAMP(1));
207921a55d8Smrg    /* 4 */
208921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
209921a55d8Smrg			     SRC0_REL(ABSOLUTE),
210921a55d8Smrg			     SRC0_ELEM(ELEM_Z),
211921a55d8Smrg			     SRC0_NEG(0),
212921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
213921a55d8Smrg			     SRC1_REL(ABSOLUTE),
214921a55d8Smrg			     SRC1_ELEM(ELEM_Z),
215921a55d8Smrg			     SRC1_NEG(0),
216921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
217921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
218921a55d8Smrg			     LAST(0));
219921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
220921a55d8Smrg				 SRC1_ABS(0),
221921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
222921a55d8Smrg				 UPDATE_PRED(0),
223921a55d8Smrg				 WRITE_MASK(1),
224921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
225921a55d8Smrg				 ALU_INST(SQ_OP2_INST_MOV),
226921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
227921a55d8Smrg				 DST_GPR(0),
228921a55d8Smrg				 DST_REL(ABSOLUTE),
229921a55d8Smrg				 DST_ELEM(ELEM_Z),
230921a55d8Smrg				 CLAMP(1));
231921a55d8Smrg    /* 5 */
232921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
233921a55d8Smrg			     SRC0_REL(ABSOLUTE),
234921a55d8Smrg			     SRC0_ELEM(ELEM_W),
235921a55d8Smrg			     SRC0_NEG(0),
236921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
237921a55d8Smrg			     SRC1_REL(ABSOLUTE),
238921a55d8Smrg			     SRC1_ELEM(ELEM_W),
239921a55d8Smrg			     SRC1_NEG(0),
240921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
241921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
242921a55d8Smrg			     LAST(1));
243921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
244921a55d8Smrg				 SRC1_ABS(0),
245921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
246921a55d8Smrg				 UPDATE_PRED(0),
247921a55d8Smrg				 WRITE_MASK(1),
248921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
249921a55d8Smrg				 ALU_INST(SQ_OP2_INST_MOV),
250921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
251921a55d8Smrg				 DST_GPR(0),
252921a55d8Smrg				 DST_REL(ABSOLUTE),
253921a55d8Smrg				 DST_ELEM(ELEM_W),
254921a55d8Smrg				 CLAMP(1));
255921a55d8Smrg
256921a55d8Smrg    return i;
257921a55d8Smrg}
258921a55d8Smrg
259921a55d8Smrg/* copy vs --------------------------------------- */
260921a55d8Smrgint evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
261921a55d8Smrg{
262921a55d8Smrg    int i = 0;
263921a55d8Smrg
264921a55d8Smrg    /* 0 */
265921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(4),
266921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
267921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
268921a55d8Smrg			    CF_CONST(0),
269921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
270921a55d8Smrg			    I_COUNT(2),
271921a55d8Smrg			    VALID_PIXEL_MODE(0),
272921a55d8Smrg			    END_OF_PROGRAM(0),
273921a55d8Smrg			    CF_INST(SQ_CF_INST_VC),
274921a55d8Smrg			    WHOLE_QUAD_MODE(0),
275921a55d8Smrg			    BARRIER(1));
276921a55d8Smrg    /* 1 */
277921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
278921a55d8Smrg					  TYPE(SQ_EXPORT_POS),
279921a55d8Smrg					  RW_GPR(1),
280921a55d8Smrg					  RW_REL(ABSOLUTE),
281921a55d8Smrg					  INDEX_GPR(0),
282921a55d8Smrg					  ELEM_SIZE(0));
283921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
284921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
285921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
286921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
287921a55d8Smrg					       BURST_COUNT(0),
288921a55d8Smrg					       VALID_PIXEL_MODE(0),
289921a55d8Smrg					       END_OF_PROGRAM(0),
290921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
291921a55d8Smrg					       MARK(0),
292921a55d8Smrg					       BARRIER(1));
293921a55d8Smrg    /* 2 */
294921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
295921a55d8Smrg					  TYPE(SQ_EXPORT_PARAM),
296921a55d8Smrg					  RW_GPR(0),
297921a55d8Smrg					  RW_REL(ABSOLUTE),
298921a55d8Smrg					  INDEX_GPR(0),
299921a55d8Smrg					  ELEM_SIZE(0));
300921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
301921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
302921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
303921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
304921a55d8Smrg					       BURST_COUNT(0),
305921a55d8Smrg					       VALID_PIXEL_MODE(0),
306921a55d8Smrg					       END_OF_PROGRAM(1),
307921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
308921a55d8Smrg					       MARK(0),
309921a55d8Smrg					       BARRIER(0));
310921a55d8Smrg    /* 3 */
311921a55d8Smrg    shader[i++] = 0x00000000;
312921a55d8Smrg    shader[i++] = 0x00000000;
313921a55d8Smrg    /* 4/5 */
314921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
315921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
316921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
317921a55d8Smrg			     BUFFER_ID(0),
318921a55d8Smrg			     SRC_GPR(0),
319921a55d8Smrg			     SRC_REL(ABSOLUTE),
320921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
321921a55d8Smrg			     MEGA_FETCH_COUNT(16));
322921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
323921a55d8Smrg				 DST_REL(0),
324921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
325921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
326921a55d8Smrg				 DST_SEL_Z(SQ_SEL_0),
327921a55d8Smrg				 DST_SEL_W(SQ_SEL_1),
328921a55d8Smrg				 USE_CONST_FIELDS(0),
329921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
330921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
331921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
332921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
333921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
334921a55d8Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
335921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
336921a55d8Smrg			     MEGA_FETCH(1),
337921a55d8Smrg			     ALT_CONST(0),
338921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
339921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
340921a55d8Smrg    /* 6/7 */
341921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
342921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
343921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
344921a55d8Smrg			     BUFFER_ID(0),
345921a55d8Smrg			     SRC_GPR(0),
346921a55d8Smrg			     SRC_REL(ABSOLUTE),
347921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
348921a55d8Smrg			     MEGA_FETCH_COUNT(8));
349921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
350921a55d8Smrg				 DST_REL(0),
351921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
352921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
353921a55d8Smrg				 DST_SEL_Z(SQ_SEL_0),
354921a55d8Smrg				 DST_SEL_W(SQ_SEL_1),
355921a55d8Smrg				 USE_CONST_FIELDS(0),
356921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
357921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
358921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
359921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
360921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
361921a55d8Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
362921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
363921a55d8Smrg			     MEGA_FETCH(0),
364921a55d8Smrg			     ALT_CONST(0),
365921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
366921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
367921a55d8Smrg
368921a55d8Smrg    return i;
369921a55d8Smrg}
370921a55d8Smrg
371921a55d8Smrg/* copy ps --------------------------------------- */
372921a55d8Smrgint evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
373921a55d8Smrg{
374921a55d8Smrg    int i = 0;
375921a55d8Smrg
376921a55d8Smrg    /* CF INST 0 */
377921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(3),
378921a55d8Smrg				KCACHE_BANK0(0),
379921a55d8Smrg				KCACHE_BANK1(0),
380921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
381921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
382921a55d8Smrg				KCACHE_ADDR0(0),
383921a55d8Smrg				KCACHE_ADDR1(0),
384921a55d8Smrg				I_COUNT(4),
385921a55d8Smrg				ALT_CONST(0),
386921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
387921a55d8Smrg				WHOLE_QUAD_MODE(0),
388921a55d8Smrg				BARRIER(1));
389921a55d8Smrg    /* CF INST 1 */
390921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(8),
391921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
392921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
393921a55d8Smrg			    CF_CONST(0),
394921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
395921a55d8Smrg			    I_COUNT(1),
396921a55d8Smrg			    VALID_PIXEL_MODE(0),
397921a55d8Smrg			    END_OF_PROGRAM(0),
398921a55d8Smrg			    CF_INST(SQ_CF_INST_TC),
399921a55d8Smrg			    WHOLE_QUAD_MODE(0),
400921a55d8Smrg			    BARRIER(1));
401921a55d8Smrg    /* CF INST 2 */
402921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
403921a55d8Smrg					  TYPE(SQ_EXPORT_PIXEL),
404921a55d8Smrg					  RW_GPR(0),
405921a55d8Smrg					  RW_REL(ABSOLUTE),
406921a55d8Smrg					  INDEX_GPR(0),
407921a55d8Smrg					  ELEM_SIZE(1));
408921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
409921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
410921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
411921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
412921a55d8Smrg					       BURST_COUNT(1),
413921a55d8Smrg					       VALID_PIXEL_MODE(0),
414921a55d8Smrg					       END_OF_PROGRAM(1),
415921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
416921a55d8Smrg					       MARK(0),
417921a55d8Smrg					       BARRIER(1));
418921a55d8Smrg
419921a55d8Smrg    /* 3 interpolate tex coords */
420921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
421921a55d8Smrg			     SRC0_REL(ABSOLUTE),
422921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
423921a55d8Smrg			     SRC0_NEG(0),
424921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
425921a55d8Smrg			     SRC1_REL(ABSOLUTE),
426921a55d8Smrg			     SRC1_ELEM(ELEM_X),
427921a55d8Smrg			     SRC1_NEG(0),
428921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
429921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
430921a55d8Smrg			     LAST(0));
431921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
432921a55d8Smrg				 SRC1_ABS(0),
433921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
434921a55d8Smrg				 UPDATE_PRED(0),
435921a55d8Smrg				 WRITE_MASK(1),
436921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
437921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
438921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
439921a55d8Smrg				 DST_GPR(0),
440921a55d8Smrg				 DST_REL(ABSOLUTE),
441921a55d8Smrg				 DST_ELEM(ELEM_X),
442921a55d8Smrg				 CLAMP(0));
443921a55d8Smrg    /* 4 */
444921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
445921a55d8Smrg			     SRC0_REL(ABSOLUTE),
446921a55d8Smrg			     SRC0_ELEM(ELEM_X),
447921a55d8Smrg			     SRC0_NEG(0),
448921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
449921a55d8Smrg			     SRC1_REL(ABSOLUTE),
450921a55d8Smrg			     SRC1_ELEM(ELEM_X),
451921a55d8Smrg			     SRC1_NEG(0),
452921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
453921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
454921a55d8Smrg			     LAST(0));
455921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
456921a55d8Smrg				 SRC1_ABS(0),
457921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
458921a55d8Smrg				 UPDATE_PRED(0),
459921a55d8Smrg				 WRITE_MASK(1),
460921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
461921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
462921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
463921a55d8Smrg				 DST_GPR(0),
464921a55d8Smrg				 DST_REL(ABSOLUTE),
465921a55d8Smrg				 DST_ELEM(ELEM_Y),
466921a55d8Smrg				 CLAMP(0));
467921a55d8Smrg    /* 5 */
468921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
469921a55d8Smrg			     SRC0_REL(ABSOLUTE),
470921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
471921a55d8Smrg			     SRC0_NEG(0),
472921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
473921a55d8Smrg			     SRC1_REL(ABSOLUTE),
474921a55d8Smrg			     SRC1_ELEM(ELEM_X),
475921a55d8Smrg			     SRC1_NEG(0),
476921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
477921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
478921a55d8Smrg			     LAST(0));
479921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
480921a55d8Smrg				 SRC1_ABS(0),
481921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
482921a55d8Smrg				 UPDATE_PRED(0),
483921a55d8Smrg				 WRITE_MASK(0),
484921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
485921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
486921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
487921a55d8Smrg				 DST_GPR(0),
488921a55d8Smrg				 DST_REL(ABSOLUTE),
489921a55d8Smrg				 DST_ELEM(ELEM_Z),
490921a55d8Smrg				 CLAMP(0));
491921a55d8Smrg    /* 6 */
492921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
493921a55d8Smrg			     SRC0_REL(ABSOLUTE),
494921a55d8Smrg			     SRC0_ELEM(ELEM_X),
495921a55d8Smrg			     SRC0_NEG(0),
496921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
497921a55d8Smrg			     SRC1_REL(ABSOLUTE),
498921a55d8Smrg			     SRC1_ELEM(ELEM_X),
499921a55d8Smrg			     SRC1_NEG(0),
500921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
501921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
502921a55d8Smrg			     LAST(1));
503921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
504921a55d8Smrg				 SRC1_ABS(0),
505921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
506921a55d8Smrg				 UPDATE_PRED(0),
507921a55d8Smrg				 WRITE_MASK(0),
508921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
509921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
510921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
511921a55d8Smrg				 DST_GPR(0),
512921a55d8Smrg				 DST_REL(ABSOLUTE),
513921a55d8Smrg				 DST_ELEM(ELEM_W),
514921a55d8Smrg				 CLAMP(0));
515921a55d8Smrg
516921a55d8Smrg    /* 7 */
517921a55d8Smrg    shader[i++] = 0x00000000;
518921a55d8Smrg    shader[i++] = 0x00000000;
519921a55d8Smrg
520921a55d8Smrg    /* 8/9 TEX INST 0 */
521921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
522921a55d8Smrg			     INST_MOD(0),
523921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
524921a55d8Smrg			     RESOURCE_ID(0),
525921a55d8Smrg			     SRC_GPR(0),
526921a55d8Smrg			     SRC_REL(ABSOLUTE),
527921a55d8Smrg			     ALT_CONST(0),
528921a55d8Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
529921a55d8Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
530921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
531921a55d8Smrg			     DST_REL(ABSOLUTE),
532921a55d8Smrg			     DST_SEL_X(SQ_SEL_X), /* R */
533921a55d8Smrg			     DST_SEL_Y(SQ_SEL_Y), /* G */
534921a55d8Smrg			     DST_SEL_Z(SQ_SEL_Z), /* B */
535921a55d8Smrg			     DST_SEL_W(SQ_SEL_W), /* A */
536921a55d8Smrg			     LOD_BIAS(0),
537921a55d8Smrg			     COORD_TYPE_X(TEX_UNNORMALIZED),
538921a55d8Smrg			     COORD_TYPE_Y(TEX_UNNORMALIZED),
539921a55d8Smrg			     COORD_TYPE_Z(TEX_UNNORMALIZED),
540921a55d8Smrg			     COORD_TYPE_W(TEX_UNNORMALIZED));
541921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
542921a55d8Smrg			     OFFSET_Y(0),
543921a55d8Smrg			     OFFSET_Z(0),
544921a55d8Smrg			     SAMPLER_ID(0),
545921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
546921a55d8Smrg			     SRC_SEL_Y(SQ_SEL_Y),
547921a55d8Smrg			     SRC_SEL_Z(SQ_SEL_0),
548921a55d8Smrg			     SRC_SEL_W(SQ_SEL_1));
549921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
550921a55d8Smrg
551921a55d8Smrg    return i;
552921a55d8Smrg}
553921a55d8Smrg
554921a55d8Smrgint evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
555921a55d8Smrg{
556921a55d8Smrg    int i = 0;
557921a55d8Smrg
558921a55d8Smrg    /* 0 */
559921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(6),
560921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
561921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
562921a55d8Smrg                            CF_CONST(0),
563921a55d8Smrg                            COND(SQ_CF_COND_ACTIVE),
564921a55d8Smrg                            I_COUNT(2),
565921a55d8Smrg                            VALID_PIXEL_MODE(0),
566921a55d8Smrg                            END_OF_PROGRAM(0),
567921a55d8Smrg                            CF_INST(SQ_CF_INST_VC),
568921a55d8Smrg                            WHOLE_QUAD_MODE(0),
569921a55d8Smrg                            BARRIER(1));
570921a55d8Smrg
571921a55d8Smrg    /* 1 - ALU */
572921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
573921a55d8Smrg				KCACHE_BANK0(0),
574921a55d8Smrg				KCACHE_BANK1(0),
575921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
576921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
577921a55d8Smrg				KCACHE_ADDR0(0),
578921a55d8Smrg				KCACHE_ADDR1(0),
579921a55d8Smrg				I_COUNT(2),
580921a55d8Smrg				ALT_CONST(0),
581921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
582921a55d8Smrg				WHOLE_QUAD_MODE(0),
583921a55d8Smrg				BARRIER(1));
584921a55d8Smrg
585921a55d8Smrg    /* 2 */
586921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
587921a55d8Smrg                                          TYPE(SQ_EXPORT_POS),
588921a55d8Smrg                                          RW_GPR(1),
589921a55d8Smrg                                          RW_REL(ABSOLUTE),
590921a55d8Smrg                                          INDEX_GPR(0),
591921a55d8Smrg                                          ELEM_SIZE(3));
592921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
593921a55d8Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
594921a55d8Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
595921a55d8Smrg                                               SRC_SEL_W(SQ_SEL_W),
596921a55d8Smrg                                               BURST_COUNT(1),
597921a55d8Smrg                                               VALID_PIXEL_MODE(0),
598921a55d8Smrg                                               END_OF_PROGRAM(0),
599921a55d8Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
600921a55d8Smrg                                               MARK(0),
601921a55d8Smrg                                               BARRIER(1));
602921a55d8Smrg    /* 3 */
603921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
604921a55d8Smrg                                          TYPE(SQ_EXPORT_PARAM),
605921a55d8Smrg                                          RW_GPR(0),
606921a55d8Smrg                                          RW_REL(ABSOLUTE),
607921a55d8Smrg                                          INDEX_GPR(0),
608921a55d8Smrg                                          ELEM_SIZE(3));
609921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
610921a55d8Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
611921a55d8Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
612921a55d8Smrg                                               SRC_SEL_W(SQ_SEL_W),
613921a55d8Smrg                                               BURST_COUNT(1),
614921a55d8Smrg                                               VALID_PIXEL_MODE(0),
615921a55d8Smrg                                               END_OF_PROGRAM(1),
616921a55d8Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
617921a55d8Smrg                                               MARK(0),
618921a55d8Smrg                                               BARRIER(0));
619921a55d8Smrg
620921a55d8Smrg
621921a55d8Smrg    /* 4 texX / w */
622921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
623921a55d8Smrg                             SRC0_REL(ABSOLUTE),
624921a55d8Smrg                             SRC0_ELEM(ELEM_X),
625921a55d8Smrg                             SRC0_NEG(0),
626921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
627921a55d8Smrg                             SRC1_REL(ABSOLUTE),
628921a55d8Smrg                             SRC1_ELEM(ELEM_X),
629921a55d8Smrg                             SRC1_NEG(0),
630921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
631921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
632921a55d8Smrg                             LAST(0));
633921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
634921a55d8Smrg                                 SRC1_ABS(0),
635921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
636921a55d8Smrg                                 UPDATE_PRED(0),
637921a55d8Smrg                                 WRITE_MASK(1),
638921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
639921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
640921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
641921a55d8Smrg                                 DST_GPR(0),
642921a55d8Smrg                                 DST_REL(ABSOLUTE),
643921a55d8Smrg                                 DST_ELEM(ELEM_X),
644921a55d8Smrg                                 CLAMP(0));
645921a55d8Smrg
646921a55d8Smrg    /* 5 texY / h */
647921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
648921a55d8Smrg                             SRC0_REL(ABSOLUTE),
649921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
650921a55d8Smrg                             SRC0_NEG(0),
651921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
652921a55d8Smrg                             SRC1_REL(ABSOLUTE),
653921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
654921a55d8Smrg                             SRC1_NEG(0),
655921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
656921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
657921a55d8Smrg                             LAST(1));
658921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
659921a55d8Smrg                                 SRC1_ABS(0),
660921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
661921a55d8Smrg                                 UPDATE_PRED(0),
662921a55d8Smrg                                 WRITE_MASK(1),
663921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
664921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
665921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
666921a55d8Smrg                                 DST_GPR(0),
667921a55d8Smrg                                 DST_REL(ABSOLUTE),
668921a55d8Smrg                                 DST_ELEM(ELEM_Y),
669921a55d8Smrg                                 CLAMP(0));
670921a55d8Smrg
671921a55d8Smrg    /* 6/7 */
672921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
673921a55d8Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
674921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
675921a55d8Smrg                             BUFFER_ID(0),
676921a55d8Smrg                             SRC_GPR(0),
677921a55d8Smrg                             SRC_REL(ABSOLUTE),
678921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
679921a55d8Smrg                             MEGA_FETCH_COUNT(16));
680921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
681921a55d8Smrg                                 DST_REL(ABSOLUTE),
682921a55d8Smrg                                 DST_SEL_X(SQ_SEL_X),
683921a55d8Smrg                                 DST_SEL_Y(SQ_SEL_Y),
684921a55d8Smrg                                 DST_SEL_Z(SQ_SEL_0),
685921a55d8Smrg                                 DST_SEL_W(SQ_SEL_1),
686921a55d8Smrg                                 USE_CONST_FIELDS(0),
687921a55d8Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
688921a55d8Smrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
689921a55d8Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
690921a55d8Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
691921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
692921a55d8Smrg                             ENDIAN_SWAP(ENDIAN_NONE),
693921a55d8Smrg                             CONST_BUF_NO_STRIDE(0),
694921a55d8Smrg                             MEGA_FETCH(1),
695921a55d8Smrg			     ALT_CONST(0),
696921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
697921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
698921a55d8Smrg    /* 8/9 */
699921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
700921a55d8Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
701921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
702921a55d8Smrg                             BUFFER_ID(0),
703921a55d8Smrg                             SRC_GPR(0),
704921a55d8Smrg                             SRC_REL(ABSOLUTE),
705921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
706921a55d8Smrg                             MEGA_FETCH_COUNT(8));
707921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
708921a55d8Smrg                                 DST_REL(ABSOLUTE),
709921a55d8Smrg                                 DST_SEL_X(SQ_SEL_X),
710921a55d8Smrg                                 DST_SEL_Y(SQ_SEL_Y),
711921a55d8Smrg                                 DST_SEL_Z(SQ_SEL_0),
712921a55d8Smrg                                 DST_SEL_W(SQ_SEL_1),
713921a55d8Smrg                                 USE_CONST_FIELDS(0),
714921a55d8Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
715921a55d8Smrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
716921a55d8Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
717921a55d8Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
718921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
719921a55d8Smrg                             ENDIAN_SWAP(ENDIAN_NONE),
720921a55d8Smrg                             CONST_BUF_NO_STRIDE(0),
721921a55d8Smrg                             MEGA_FETCH(0),
722921a55d8Smrg			     ALT_CONST(0),
723921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
724921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
725921a55d8Smrg
726921a55d8Smrg    return i;
727921a55d8Smrg}
728921a55d8Smrg
729921a55d8Smrgint evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
730921a55d8Smrg{
731921a55d8Smrg    int i = 0;
732921a55d8Smrg
733921a55d8Smrg    /* 0 */
734921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(5),
735921a55d8Smrg				KCACHE_BANK0(0),
736921a55d8Smrg				KCACHE_BANK1(0),
737921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
738921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
739921a55d8Smrg				KCACHE_ADDR0(0),
740921a55d8Smrg				KCACHE_ADDR1(0),
741921a55d8Smrg				I_COUNT(4),
742921a55d8Smrg				ALT_CONST(0),
743921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
744921a55d8Smrg				WHOLE_QUAD_MODE(0),
745921a55d8Smrg				BARRIER(1));
746921a55d8Smrg    /* 1 */
747921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(21),
748921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
749921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
750921a55d8Smrg                            CF_CONST(0),
751921a55d8Smrg                            COND(SQ_CF_COND_BOOL),
752921a55d8Smrg                            I_COUNT(0),
753921a55d8Smrg                            VALID_PIXEL_MODE(0),
754921a55d8Smrg                            END_OF_PROGRAM(0),
755921a55d8Smrg                            CF_INST(SQ_CF_INST_CALL),
756921a55d8Smrg                            WHOLE_QUAD_MODE(0),
757921a55d8Smrg                            BARRIER(0));
758921a55d8Smrg    /* 2 */
759921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(30),
760921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
761921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
762921a55d8Smrg                            CF_CONST(0),
763921a55d8Smrg                            COND(SQ_CF_COND_NOT_BOOL),
764921a55d8Smrg                            I_COUNT(0),
765921a55d8Smrg                            VALID_PIXEL_MODE(0),
766921a55d8Smrg                            END_OF_PROGRAM(0),
767921a55d8Smrg                            CF_INST(SQ_CF_INST_CALL),
768921a55d8Smrg                            WHOLE_QUAD_MODE(0),
769921a55d8Smrg                            BARRIER(0));
770921a55d8Smrg    /* 3 */
771921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(9),
772921a55d8Smrg                                KCACHE_BANK0(0),
773921a55d8Smrg                                KCACHE_BANK1(0),
774921a55d8Smrg                                KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
775921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
776921a55d8Smrg                                KCACHE_ADDR0(0),
777921a55d8Smrg                                KCACHE_ADDR1(0),
778921a55d8Smrg                                I_COUNT(12),
779921a55d8Smrg                                ALT_CONST(0),
780921a55d8Smrg                                CF_INST(SQ_CF_INST_ALU),
781921a55d8Smrg                                WHOLE_QUAD_MODE(0),
782921a55d8Smrg                                BARRIER(1));
783921a55d8Smrg    /* 4 */
784921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
785921a55d8Smrg                                          TYPE(SQ_EXPORT_PIXEL),
786921a55d8Smrg                                          RW_GPR(2),
787921a55d8Smrg                                          RW_REL(ABSOLUTE),
788921a55d8Smrg                                          INDEX_GPR(0),
789921a55d8Smrg                                          ELEM_SIZE(3));
790921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
791921a55d8Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
792921a55d8Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
793921a55d8Smrg                                               SRC_SEL_W(SQ_SEL_W),
794921a55d8Smrg                                               BURST_COUNT(1),
795921a55d8Smrg                                               VALID_PIXEL_MODE(0),
796921a55d8Smrg                                               END_OF_PROGRAM(1),
797921a55d8Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
798921a55d8Smrg                                               MARK(0),
799921a55d8Smrg                                               BARRIER(1));
800921a55d8Smrg    /* 5 interpolate tex coords */
801921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
802921a55d8Smrg			     SRC0_REL(ABSOLUTE),
803921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
804921a55d8Smrg			     SRC0_NEG(0),
805921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
806921a55d8Smrg			     SRC1_REL(ABSOLUTE),
807921a55d8Smrg			     SRC1_ELEM(ELEM_X),
808921a55d8Smrg			     SRC1_NEG(0),
809921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
810921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
811921a55d8Smrg			     LAST(0));
812921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
813921a55d8Smrg				 SRC1_ABS(0),
814921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
815921a55d8Smrg				 UPDATE_PRED(0),
816921a55d8Smrg				 WRITE_MASK(1),
817921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
818921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
819921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
820921a55d8Smrg				 DST_GPR(0),
821921a55d8Smrg				 DST_REL(ABSOLUTE),
822921a55d8Smrg				 DST_ELEM(ELEM_X),
823921a55d8Smrg				 CLAMP(0));
824921a55d8Smrg    /* 6 */
825921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
826921a55d8Smrg			     SRC0_REL(ABSOLUTE),
827921a55d8Smrg			     SRC0_ELEM(ELEM_X),
828921a55d8Smrg			     SRC0_NEG(0),
829921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
830921a55d8Smrg			     SRC1_REL(ABSOLUTE),
831921a55d8Smrg			     SRC1_ELEM(ELEM_X),
832921a55d8Smrg			     SRC1_NEG(0),
833921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
834921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
835921a55d8Smrg			     LAST(0));
836921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
837921a55d8Smrg				 SRC1_ABS(0),
838921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
839921a55d8Smrg				 UPDATE_PRED(0),
840921a55d8Smrg				 WRITE_MASK(1),
841921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
842921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
843921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
844921a55d8Smrg				 DST_GPR(0),
845921a55d8Smrg				 DST_REL(ABSOLUTE),
846921a55d8Smrg				 DST_ELEM(ELEM_Y),
847921a55d8Smrg				 CLAMP(0));
848921a55d8Smrg    /* 7 */
849921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
850921a55d8Smrg			     SRC0_REL(ABSOLUTE),
851921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
852921a55d8Smrg			     SRC0_NEG(0),
853921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
854921a55d8Smrg			     SRC1_REL(ABSOLUTE),
855921a55d8Smrg			     SRC1_ELEM(ELEM_X),
856921a55d8Smrg			     SRC1_NEG(0),
857921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
858921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
859921a55d8Smrg			     LAST(0));
860921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
861921a55d8Smrg				 SRC1_ABS(0),
862921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
863921a55d8Smrg				 UPDATE_PRED(0),
864921a55d8Smrg				 WRITE_MASK(0),
865921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
866921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
867921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
868921a55d8Smrg				 DST_GPR(0),
869921a55d8Smrg				 DST_REL(ABSOLUTE),
870921a55d8Smrg				 DST_ELEM(ELEM_Z),
871921a55d8Smrg				 CLAMP(0));
872921a55d8Smrg    /* 8 */
873921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
874921a55d8Smrg			     SRC0_REL(ABSOLUTE),
875921a55d8Smrg			     SRC0_ELEM(ELEM_X),
876921a55d8Smrg			     SRC0_NEG(0),
877921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
878921a55d8Smrg			     SRC1_REL(ABSOLUTE),
879921a55d8Smrg			     SRC1_ELEM(ELEM_X),
880921a55d8Smrg			     SRC1_NEG(0),
881921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
882921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
883921a55d8Smrg			     LAST(1));
884921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
885921a55d8Smrg				 SRC1_ABS(0),
886921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
887921a55d8Smrg				 UPDATE_PRED(0),
888921a55d8Smrg				 WRITE_MASK(0),
889921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
890921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
891921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
892921a55d8Smrg				 DST_GPR(0),
893921a55d8Smrg				 DST_REL(ABSOLUTE),
894921a55d8Smrg				 DST_ELEM(ELEM_W),
895921a55d8Smrg				 CLAMP(0));
896921a55d8Smrg
897921a55d8Smrg    /* 9,10,11,12 */
898921a55d8Smrg    /* r2.x = MAD(c0.w, r1.x, c0.x) */
899921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
900921a55d8Smrg                             SRC0_REL(ABSOLUTE),
901921a55d8Smrg                             SRC0_ELEM(ELEM_W),
902921a55d8Smrg                             SRC0_NEG(0),
903921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
904921a55d8Smrg                             SRC1_REL(ABSOLUTE),
905921a55d8Smrg                             SRC1_ELEM(ELEM_X),
906921a55d8Smrg                             SRC1_NEG(0),
907921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
908921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
909921a55d8Smrg                             LAST(0));
910921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
911921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
912921a55d8Smrg                                 SRC2_ELEM(ELEM_X),
913921a55d8Smrg                                 SRC2_NEG(0),
914921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
915921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
916921a55d8Smrg                                 DST_GPR(2),
917921a55d8Smrg                                 DST_REL(ABSOLUTE),
918921a55d8Smrg                                 DST_ELEM(ELEM_X),
919921a55d8Smrg                                 CLAMP(0));
920921a55d8Smrg    /* r2.y = MAD(c0.w, r1.x, c0.y) */
921921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
922921a55d8Smrg                             SRC0_REL(ABSOLUTE),
923921a55d8Smrg                             SRC0_ELEM(ELEM_W),
924921a55d8Smrg                             SRC0_NEG(0),
925921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
926921a55d8Smrg                             SRC1_REL(ABSOLUTE),
927921a55d8Smrg                             SRC1_ELEM(ELEM_X),
928921a55d8Smrg                             SRC1_NEG(0),
929921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
930921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
931921a55d8Smrg                             LAST(0));
932921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
933921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
934921a55d8Smrg                                 SRC2_ELEM(ELEM_Y),
935921a55d8Smrg                                 SRC2_NEG(0),
936921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
937921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
938921a55d8Smrg                                 DST_GPR(2),
939921a55d8Smrg                                 DST_REL(ABSOLUTE),
940921a55d8Smrg                                 DST_ELEM(ELEM_Y),
941921a55d8Smrg                                 CLAMP(0));
942921a55d8Smrg    /* r2.z = MAD(c0.w, r1.x, c0.z) */
943921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
944921a55d8Smrg                             SRC0_REL(ABSOLUTE),
945921a55d8Smrg                             SRC0_ELEM(ELEM_W),
946921a55d8Smrg                             SRC0_NEG(0),
947921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
948921a55d8Smrg                             SRC1_REL(ABSOLUTE),
949921a55d8Smrg                             SRC1_ELEM(ELEM_X),
950921a55d8Smrg                             SRC1_NEG(0),
951921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
952921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
953921a55d8Smrg                             LAST(0));
954921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
955921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
956921a55d8Smrg                                 SRC2_ELEM(ELEM_Z),
957921a55d8Smrg                                 SRC2_NEG(0),
958921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
959921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
960921a55d8Smrg                                 DST_GPR(2),
961921a55d8Smrg                                 DST_REL(ABSOLUTE),
962921a55d8Smrg                                 DST_ELEM(ELEM_Z),
963921a55d8Smrg                                 CLAMP(0));
964921a55d8Smrg    /* r2.w = MAD(0, 0, 1) */
965921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
966921a55d8Smrg                             SRC0_REL(ABSOLUTE),
967921a55d8Smrg                             SRC0_ELEM(ELEM_X),
968921a55d8Smrg                             SRC0_NEG(0),
969921a55d8Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
970921a55d8Smrg                             SRC1_REL(ABSOLUTE),
971921a55d8Smrg                             SRC1_ELEM(ELEM_X),
972921a55d8Smrg                             SRC1_NEG(0),
973921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
974921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
975921a55d8Smrg                             LAST(1));
976921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
977921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
978921a55d8Smrg                                 SRC2_ELEM(ELEM_X),
979921a55d8Smrg                                 SRC2_NEG(0),
980921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
981921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
982921a55d8Smrg                                 DST_GPR(2),
983921a55d8Smrg                                 DST_REL(ABSOLUTE),
984921a55d8Smrg                                 DST_ELEM(ELEM_W),
985921a55d8Smrg                                 CLAMP(0));
986921a55d8Smrg
987921a55d8Smrg    /* 13,14,15,16 */
988921a55d8Smrg    /* r2.x = MAD(c1.x, r1.y, pv.x) */
989921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
990921a55d8Smrg                             SRC0_REL(ABSOLUTE),
991921a55d8Smrg                             SRC0_ELEM(ELEM_X),
992921a55d8Smrg                             SRC0_NEG(0),
993921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
994921a55d8Smrg                             SRC1_REL(ABSOLUTE),
995921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
996921a55d8Smrg                             SRC1_NEG(0),
997921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
998921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
999921a55d8Smrg                             LAST(0));
1000921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1001921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1002921a55d8Smrg                                 SRC2_ELEM(ELEM_X),
1003921a55d8Smrg                                 SRC2_NEG(0),
1004921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1005921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1006921a55d8Smrg                                 DST_GPR(2),
1007921a55d8Smrg                                 DST_REL(ABSOLUTE),
1008921a55d8Smrg                                 DST_ELEM(ELEM_X),
1009921a55d8Smrg                                 CLAMP(0));
1010921a55d8Smrg    /* r2.y = MAD(c1.y, r1.y, pv.y) */
1011921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1012921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1013921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1014921a55d8Smrg                             SRC0_NEG(0),
1015921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1016921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1017921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1018921a55d8Smrg                             SRC1_NEG(0),
1019921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1020921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1021921a55d8Smrg                             LAST(0));
1022921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1023921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1024921a55d8Smrg                                 SRC2_ELEM(ELEM_Y),
1025921a55d8Smrg                                 SRC2_NEG(0),
1026921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1027921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1028921a55d8Smrg                                 DST_GPR(2),
1029921a55d8Smrg                                 DST_REL(ABSOLUTE),
1030921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1031921a55d8Smrg                                 CLAMP(0));
1032921a55d8Smrg    /* r2.z = MAD(c1.z, r1.y, pv.z) */
1033921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1034921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1035921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1036921a55d8Smrg                             SRC0_NEG(0),
1037921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1038921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1039921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1040921a55d8Smrg                             SRC1_NEG(0),
1041921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1042921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1043921a55d8Smrg                             LAST(0));
1044921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1045921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1046921a55d8Smrg                                 SRC2_ELEM(ELEM_Z),
1047921a55d8Smrg                                 SRC2_NEG(0),
1048921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1049921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1050921a55d8Smrg                                 DST_GPR(2),
1051921a55d8Smrg                                 DST_REL(ABSOLUTE),
1052921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1053921a55d8Smrg                                 CLAMP(0));
1054921a55d8Smrg    /* r2.w = MAD(0, 0, 1) */
1055921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1056921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1057921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1058921a55d8Smrg                             SRC0_NEG(0),
1059921a55d8Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
1060921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1061921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1062921a55d8Smrg                             SRC1_NEG(0),
1063921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1064921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1065921a55d8Smrg                             LAST(1));
1066921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1067921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1068921a55d8Smrg                                 SRC2_ELEM(ELEM_W),
1069921a55d8Smrg                                 SRC2_NEG(0),
1070921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1071921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1072921a55d8Smrg                                 DST_GPR(2),
1073921a55d8Smrg                                 DST_REL(ABSOLUTE),
1074921a55d8Smrg                                 DST_ELEM(ELEM_W),
1075921a55d8Smrg                                 CLAMP(0));
1076921a55d8Smrg    /* 17,18,19,20 */
1077921a55d8Smrg    /* r2.x = MAD(c2.x, r1.z, pv.x) */
1078921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1079921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1080921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1081921a55d8Smrg                             SRC0_NEG(0),
1082921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1083921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1084921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1085921a55d8Smrg                             SRC1_NEG(0),
1086921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1087921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1088921a55d8Smrg                             LAST(0));
1089921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1090921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1091921a55d8Smrg                                 SRC2_ELEM(ELEM_X),
1092921a55d8Smrg                                 SRC2_NEG(0),
1093921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1094921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1095921a55d8Smrg                                 DST_GPR(2),
1096921a55d8Smrg                                 DST_REL(ABSOLUTE),
1097921a55d8Smrg                                 DST_ELEM(ELEM_X),
1098921a55d8Smrg                                 CLAMP(1));
1099921a55d8Smrg    /* r2.y = MAD(c2.y, r1.z, pv.y) */
1100921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1101921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1102921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1103921a55d8Smrg                             SRC0_NEG(0),
1104921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1105921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1106921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1107921a55d8Smrg                             SRC1_NEG(0),
1108921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1109921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1110921a55d8Smrg                             LAST(0));
1111921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1112921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1113921a55d8Smrg                                 SRC2_ELEM(ELEM_Y),
1114921a55d8Smrg                                 SRC2_NEG(0),
1115921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1116921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1117921a55d8Smrg                                 DST_GPR(2),
1118921a55d8Smrg                                 DST_REL(ABSOLUTE),
1119921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1120921a55d8Smrg                                 CLAMP(1));
1121921a55d8Smrg    /* r2.z = MAD(c2.z, r1.z, pv.z) */
1122921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1123921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1124921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1125921a55d8Smrg                             SRC0_NEG(0),
1126921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1127921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1128921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1129921a55d8Smrg                             SRC1_NEG(0),
1130921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1131921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1132921a55d8Smrg                             LAST(0));
1133921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1134921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1135921a55d8Smrg                                 SRC2_ELEM(ELEM_Z),
1136921a55d8Smrg                                 SRC2_NEG(0),
1137921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1138921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1139921a55d8Smrg                                 DST_GPR(2),
1140921a55d8Smrg                                 DST_REL(ABSOLUTE),
1141921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1142921a55d8Smrg                                 CLAMP(1));
1143921a55d8Smrg    /* r2.w = MAD(0, 0, 1) */
1144921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1145921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1146921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1147921a55d8Smrg                             SRC0_NEG(0),
1148921a55d8Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
1149921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1150921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1151921a55d8Smrg                             SRC1_NEG(0),
1152921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1153921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1154921a55d8Smrg                             LAST(1));
1155921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1156921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1157921a55d8Smrg                                 SRC2_ELEM(ELEM_X),
1158921a55d8Smrg                                 SRC2_NEG(0),
1159921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1160921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1161921a55d8Smrg                                 DST_GPR(2),
1162921a55d8Smrg                                 DST_REL(ABSOLUTE),
1163921a55d8Smrg                                 DST_ELEM(ELEM_W),
1164921a55d8Smrg                                 CLAMP(1));
1165921a55d8Smrg
1166921a55d8Smrg    /* 21 */
1167921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(24),
1168921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1169921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1170921a55d8Smrg                            CF_CONST(0),
1171921a55d8Smrg                            COND(SQ_CF_COND_ACTIVE),
1172921a55d8Smrg                            I_COUNT(3),
1173921a55d8Smrg                            VALID_PIXEL_MODE(0),
1174921a55d8Smrg                            END_OF_PROGRAM(0),
1175921a55d8Smrg                            CF_INST(SQ_CF_INST_TC),
1176921a55d8Smrg                            WHOLE_QUAD_MODE(0),
1177921a55d8Smrg                            BARRIER(1));
1178921a55d8Smrg    /* 22 */
1179921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1180921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1181921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1182921a55d8Smrg			    CF_CONST(0),
1183921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
1184921a55d8Smrg			    I_COUNT(0),
1185921a55d8Smrg			    VALID_PIXEL_MODE(0),
1186921a55d8Smrg			    END_OF_PROGRAM(0),
1187921a55d8Smrg			    CF_INST(SQ_CF_INST_RETURN),
1188921a55d8Smrg			    WHOLE_QUAD_MODE(0),
1189921a55d8Smrg			    BARRIER(1));
1190921a55d8Smrg    /* 23 */
1191921a55d8Smrg    shader[i++] = 0x00000000;
1192921a55d8Smrg    shader[i++] = 0x00000000;
1193921a55d8Smrg    /* 24/25 */
1194921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1195921a55d8Smrg                             INST_MOD(0),
1196921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
1197921a55d8Smrg                             RESOURCE_ID(0),
1198921a55d8Smrg                             SRC_GPR(0),
1199921a55d8Smrg                             SRC_REL(ABSOLUTE),
1200921a55d8Smrg                             ALT_CONST(0),
1201921a55d8Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1202921a55d8Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1203921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1204921a55d8Smrg                             DST_REL(ABSOLUTE),
1205921a55d8Smrg                             DST_SEL_X(SQ_SEL_X),
1206921a55d8Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1207921a55d8Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1208921a55d8Smrg                             DST_SEL_W(SQ_SEL_1),
1209921a55d8Smrg                             LOD_BIAS(0),
1210921a55d8Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1211921a55d8Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1212921a55d8Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1213921a55d8Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1214921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1215921a55d8Smrg                             OFFSET_Y(0),
1216921a55d8Smrg                             OFFSET_Z(0),
1217921a55d8Smrg                             SAMPLER_ID(0),
1218921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
1219921a55d8Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1220921a55d8Smrg                             SRC_SEL_Z(SQ_SEL_0),
1221921a55d8Smrg                             SRC_SEL_W(SQ_SEL_1));
1222921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
1223921a55d8Smrg    /* 26/27 */
1224921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1225921a55d8Smrg                             INST_MOD(0),
1226921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
1227921a55d8Smrg                             RESOURCE_ID(1),
1228921a55d8Smrg                             SRC_GPR(0),
1229921a55d8Smrg                             SRC_REL(ABSOLUTE),
1230921a55d8Smrg                             ALT_CONST(0),
1231921a55d8Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1232921a55d8Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1233921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1234921a55d8Smrg                             DST_REL(ABSOLUTE),
1235921a55d8Smrg                             DST_SEL_X(SQ_SEL_MASK),
1236921a55d8Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1237921a55d8Smrg                             DST_SEL_Z(SQ_SEL_X),
1238921a55d8Smrg                             DST_SEL_W(SQ_SEL_MASK),
1239921a55d8Smrg                             LOD_BIAS(0),
1240921a55d8Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1241921a55d8Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1242921a55d8Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1243921a55d8Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1244921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1245921a55d8Smrg                             OFFSET_Y(0),
1246921a55d8Smrg                             OFFSET_Z(0),
1247921a55d8Smrg                             SAMPLER_ID(1),
1248921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
1249921a55d8Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1250921a55d8Smrg                             SRC_SEL_Z(SQ_SEL_0),
1251921a55d8Smrg                             SRC_SEL_W(SQ_SEL_1));
1252921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
1253921a55d8Smrg    /* 28/29 */
1254921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1255921a55d8Smrg                             INST_MOD(0),
1256921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
1257921a55d8Smrg                             RESOURCE_ID(2),
1258921a55d8Smrg                             SRC_GPR(0),
1259921a55d8Smrg                             SRC_REL(ABSOLUTE),
1260921a55d8Smrg                             ALT_CONST(0),
1261921a55d8Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1262921a55d8Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1263921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1264921a55d8Smrg                             DST_REL(ABSOLUTE),
1265921a55d8Smrg                             DST_SEL_X(SQ_SEL_MASK),
1266921a55d8Smrg                             DST_SEL_Y(SQ_SEL_X),
1267921a55d8Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1268921a55d8Smrg                             DST_SEL_W(SQ_SEL_MASK),
1269921a55d8Smrg                             LOD_BIAS(0),
1270921a55d8Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1271921a55d8Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1272921a55d8Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1273921a55d8Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1274921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1275921a55d8Smrg                             OFFSET_Y(0),
1276921a55d8Smrg                             OFFSET_Z(0),
1277921a55d8Smrg                             SAMPLER_ID(2),
1278921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
1279921a55d8Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1280921a55d8Smrg                             SRC_SEL_Z(SQ_SEL_0),
1281921a55d8Smrg                             SRC_SEL_W(SQ_SEL_1));
1282921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
1283921a55d8Smrg    /* 30 */
1284921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(32),
1285921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1286921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1287921a55d8Smrg                            CF_CONST(0),
1288921a55d8Smrg                            COND(SQ_CF_COND_ACTIVE),
1289921a55d8Smrg                            I_COUNT(2),
1290921a55d8Smrg                            VALID_PIXEL_MODE(0),
1291921a55d8Smrg                            END_OF_PROGRAM(0),
1292921a55d8Smrg                            CF_INST(SQ_CF_INST_TC),
1293921a55d8Smrg                            WHOLE_QUAD_MODE(0),
1294921a55d8Smrg                            BARRIER(1));
1295921a55d8Smrg    /* 31 */
1296921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1297921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1298921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1299921a55d8Smrg			    CF_CONST(0),
1300921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
1301921a55d8Smrg			    I_COUNT(0),
1302921a55d8Smrg			    VALID_PIXEL_MODE(0),
1303921a55d8Smrg			    END_OF_PROGRAM(0),
1304921a55d8Smrg			    CF_INST(SQ_CF_INST_RETURN),
1305921a55d8Smrg			    WHOLE_QUAD_MODE(0),
1306921a55d8Smrg			    BARRIER(1));
1307921a55d8Smrg    /* 32/33 */
1308921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1309921a55d8Smrg                             INST_MOD(0),
1310921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
1311921a55d8Smrg                             RESOURCE_ID(0),
1312921a55d8Smrg                             SRC_GPR(0),
1313921a55d8Smrg                             SRC_REL(ABSOLUTE),
1314921a55d8Smrg                             ALT_CONST(0),
1315921a55d8Smrg                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1316921a55d8Smrg                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1317921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1318921a55d8Smrg                             DST_REL(ABSOLUTE),
1319921a55d8Smrg                             DST_SEL_X(SQ_SEL_X),
1320921a55d8Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1321921a55d8Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1322921a55d8Smrg                             DST_SEL_W(SQ_SEL_1),
1323921a55d8Smrg                             LOD_BIAS(0),
1324921a55d8Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1325921a55d8Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1326921a55d8Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1327921a55d8Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1328921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1329921a55d8Smrg                             OFFSET_Y(0),
1330921a55d8Smrg                             OFFSET_Z(0),
1331921a55d8Smrg                             SAMPLER_ID(0),
1332921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
1333921a55d8Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1334921a55d8Smrg                             SRC_SEL_Z(SQ_SEL_0),
1335921a55d8Smrg                             SRC_SEL_W(SQ_SEL_1));
1336921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
1337921a55d8Smrg    /* 34/35 */
1338921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1339921a55d8Smrg                             INST_MOD(0),
1340921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
1341921a55d8Smrg                             RESOURCE_ID(1),
1342921a55d8Smrg                             SRC_GPR(0),
1343921a55d8Smrg                             SRC_REL(ABSOLUTE),
1344921a55d8Smrg                             ALT_CONST(0),
1345921a55d8Smrg                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1346921a55d8Smrg                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1347921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1348921a55d8Smrg                             DST_REL(ABSOLUTE),
1349921a55d8Smrg                             DST_SEL_X(SQ_SEL_MASK),
1350921a55d8Smrg                             DST_SEL_Y(SQ_SEL_X),
1351921a55d8Smrg                             DST_SEL_Z(SQ_SEL_Y),
1352921a55d8Smrg                             DST_SEL_W(SQ_SEL_MASK),
1353921a55d8Smrg                             LOD_BIAS(0),
1354921a55d8Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1355921a55d8Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1356921a55d8Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1357921a55d8Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1358921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1359921a55d8Smrg                             OFFSET_Y(0),
1360921a55d8Smrg                             OFFSET_Z(0),
1361921a55d8Smrg                             SAMPLER_ID(1),
1362921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
1363921a55d8Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1364921a55d8Smrg                             SRC_SEL_Z(SQ_SEL_0),
1365921a55d8Smrg                             SRC_SEL_W(SQ_SEL_1));
1366921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
1367921a55d8Smrg
1368921a55d8Smrg    return i;
1369921a55d8Smrg}
1370921a55d8Smrg
1371921a55d8Smrg/* comp vs --------------------------------------- */
1372921a55d8Smrgint evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1373921a55d8Smrg{
1374921a55d8Smrg    int i = 0;
1375921a55d8Smrg
1376921a55d8Smrg    /* 0 */
1377921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(3),
1378921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1379921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1380921a55d8Smrg                            CF_CONST(0),
1381921a55d8Smrg                            COND(SQ_CF_COND_BOOL),
1382921a55d8Smrg                            I_COUNT(0),
1383921a55d8Smrg                            VALID_PIXEL_MODE(0),
1384921a55d8Smrg                            END_OF_PROGRAM(0),
1385921a55d8Smrg                            CF_INST(SQ_CF_INST_CALL),
1386921a55d8Smrg                            WHOLE_QUAD_MODE(0),
1387921a55d8Smrg                            BARRIER(0));
1388921a55d8Smrg    /* 1 */
1389921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(9),
1390921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1391921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1392921a55d8Smrg                            CF_CONST(0),
1393921a55d8Smrg                            COND(SQ_CF_COND_NOT_BOOL),
1394921a55d8Smrg                            I_COUNT(0),
1395921a55d8Smrg                            VALID_PIXEL_MODE(0),
1396921a55d8Smrg                            END_OF_PROGRAM(0),
1397921a55d8Smrg                            CF_INST(SQ_CF_INST_CALL),
1398921a55d8Smrg                            WHOLE_QUAD_MODE(0),
1399921a55d8Smrg                            BARRIER(0));
1400921a55d8Smrg    /* 2 */
1401921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1402921a55d8Smrg                            JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1403921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1404921a55d8Smrg                            CF_CONST(0),
1405921a55d8Smrg                            COND(SQ_CF_COND_ACTIVE),
1406921a55d8Smrg                            I_COUNT(0),
1407921a55d8Smrg                            VALID_PIXEL_MODE(0),
1408921a55d8Smrg                            END_OF_PROGRAM(1),
1409921a55d8Smrg                            CF_INST(SQ_CF_INST_NOP),
1410921a55d8Smrg                            WHOLE_QUAD_MODE(0),
1411921a55d8Smrg                            BARRIER(1));
1412921a55d8Smrg    /* 3 - mask sub */
1413921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(44),
1414921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1415921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1416921a55d8Smrg			    CF_CONST(0),
1417921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
1418921a55d8Smrg			    I_COUNT(3),
1419921a55d8Smrg			    VALID_PIXEL_MODE(0),
1420921a55d8Smrg			    END_OF_PROGRAM(0),
1421921a55d8Smrg			    CF_INST(SQ_CF_INST_VC),
1422921a55d8Smrg			    WHOLE_QUAD_MODE(0),
1423921a55d8Smrg			    BARRIER(1));
1424921a55d8Smrg
1425921a55d8Smrg    /* 4 - ALU */
1426921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1427921a55d8Smrg				KCACHE_BANK0(0),
1428921a55d8Smrg				KCACHE_BANK1(0),
1429921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1430921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1431921a55d8Smrg				KCACHE_ADDR0(0),
1432921a55d8Smrg				KCACHE_ADDR1(0),
1433921a55d8Smrg				I_COUNT(20),
1434921a55d8Smrg				ALT_CONST(0),
1435921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
1436921a55d8Smrg				WHOLE_QUAD_MODE(0),
1437921a55d8Smrg				BARRIER(1));
1438921a55d8Smrg
1439921a55d8Smrg    /* 5 - dst */
1440921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1441921a55d8Smrg					  TYPE(SQ_EXPORT_POS),
1442921a55d8Smrg					  RW_GPR(2),
1443921a55d8Smrg					  RW_REL(ABSOLUTE),
1444921a55d8Smrg					  INDEX_GPR(0),
1445921a55d8Smrg					  ELEM_SIZE(0));
1446921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1447921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1448921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_0),
1449921a55d8Smrg					       SRC_SEL_W(SQ_SEL_1),
1450921a55d8Smrg					       BURST_COUNT(1),
1451921a55d8Smrg					       VALID_PIXEL_MODE(0),
1452921a55d8Smrg					       END_OF_PROGRAM(0),
1453921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1454921a55d8Smrg					       MARK(0),
1455921a55d8Smrg					       BARRIER(1));
1456921a55d8Smrg    /* 6 - src */
1457921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1458921a55d8Smrg					  TYPE(SQ_EXPORT_PARAM),
1459921a55d8Smrg					  RW_GPR(1),
1460921a55d8Smrg					  RW_REL(ABSOLUTE),
1461921a55d8Smrg					  INDEX_GPR(0),
1462921a55d8Smrg					  ELEM_SIZE(0));
1463921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1464921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1465921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_0),
1466921a55d8Smrg					       SRC_SEL_W(SQ_SEL_1),
1467921a55d8Smrg					       BURST_COUNT(1),
1468921a55d8Smrg					       VALID_PIXEL_MODE(0),
1469921a55d8Smrg					       END_OF_PROGRAM(0),
1470921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT),
1471921a55d8Smrg					       MARK(0),
1472921a55d8Smrg					       BARRIER(0));
1473921a55d8Smrg    /* 7 - mask */
1474921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1475921a55d8Smrg					  TYPE(SQ_EXPORT_PARAM),
1476921a55d8Smrg					  RW_GPR(0),
1477921a55d8Smrg					  RW_REL(ABSOLUTE),
1478921a55d8Smrg					  INDEX_GPR(0),
1479921a55d8Smrg					  ELEM_SIZE(0));
1480921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1481921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1482921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_0),
1483921a55d8Smrg					       SRC_SEL_W(SQ_SEL_1),
1484921a55d8Smrg					       BURST_COUNT(1),
1485921a55d8Smrg					       VALID_PIXEL_MODE(0),
1486921a55d8Smrg					       END_OF_PROGRAM(0),
1487921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1488921a55d8Smrg					       WHOLE_QUAD_MODE(0),
1489921a55d8Smrg					       BARRIER(0));
1490921a55d8Smrg    /* 8 */
1491921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1492921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1493921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1494921a55d8Smrg			    CF_CONST(0),
1495921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
1496921a55d8Smrg			    I_COUNT(0),
1497921a55d8Smrg			    VALID_PIXEL_MODE(0),
1498921a55d8Smrg			    END_OF_PROGRAM(0),
1499921a55d8Smrg			    CF_INST(SQ_CF_INST_RETURN),
1500921a55d8Smrg			    WHOLE_QUAD_MODE(0),
1501921a55d8Smrg			    BARRIER(1));
1502921a55d8Smrg    /* 9 - non-mask sub */
1503921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(50),
1504921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1505921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1506921a55d8Smrg			    CF_CONST(0),
1507921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
1508921a55d8Smrg			    I_COUNT(2),
1509921a55d8Smrg			    VALID_PIXEL_MODE(0),
1510921a55d8Smrg			    END_OF_PROGRAM(0),
1511921a55d8Smrg			    CF_INST(SQ_CF_INST_VC),
1512921a55d8Smrg			    WHOLE_QUAD_MODE(0),
1513921a55d8Smrg			    BARRIER(1));
1514921a55d8Smrg
1515921a55d8Smrg    /* 10 - ALU */
1516921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(34),
1517921a55d8Smrg				KCACHE_BANK0(0),
1518921a55d8Smrg				KCACHE_BANK1(0),
1519921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1520921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1521921a55d8Smrg				KCACHE_ADDR0(0),
1522921a55d8Smrg				KCACHE_ADDR1(0),
1523921a55d8Smrg				I_COUNT(10),
1524921a55d8Smrg				ALT_CONST(0),
1525921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
1526921a55d8Smrg				WHOLE_QUAD_MODE(0),
1527921a55d8Smrg				BARRIER(1));
1528921a55d8Smrg
1529921a55d8Smrg    /* 11 - dst */
1530921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1531921a55d8Smrg					  TYPE(SQ_EXPORT_POS),
1532921a55d8Smrg					  RW_GPR(1),
1533921a55d8Smrg					  RW_REL(ABSOLUTE),
1534921a55d8Smrg					  INDEX_GPR(0),
1535921a55d8Smrg					  ELEM_SIZE(0));
1536921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1537921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1538921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_0),
1539921a55d8Smrg					       SRC_SEL_W(SQ_SEL_1),
1540921a55d8Smrg					       BURST_COUNT(0),
1541921a55d8Smrg					       VALID_PIXEL_MODE(0),
1542921a55d8Smrg					       END_OF_PROGRAM(0),
1543921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1544921a55d8Smrg					       MARK(0),
1545921a55d8Smrg					       BARRIER(1));
1546921a55d8Smrg    /* 12 - src */
1547921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1548921a55d8Smrg					  TYPE(SQ_EXPORT_PARAM),
1549921a55d8Smrg					  RW_GPR(0),
1550921a55d8Smrg					  RW_REL(ABSOLUTE),
1551921a55d8Smrg					  INDEX_GPR(0),
1552921a55d8Smrg					  ELEM_SIZE(0));
1553921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1554921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1555921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_0),
1556921a55d8Smrg					       SRC_SEL_W(SQ_SEL_1),
1557921a55d8Smrg					       BURST_COUNT(0),
1558921a55d8Smrg					       VALID_PIXEL_MODE(0),
1559921a55d8Smrg					       END_OF_PROGRAM(0),
1560921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1561921a55d8Smrg					       MARK(0),
1562921a55d8Smrg					       BARRIER(0));
1563921a55d8Smrg    /* 13 */
1564921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1565921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1566921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1567921a55d8Smrg			    CF_CONST(0),
1568921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
1569921a55d8Smrg			    I_COUNT(0),
1570921a55d8Smrg			    VALID_PIXEL_MODE(0),
1571921a55d8Smrg			    END_OF_PROGRAM(0),
1572921a55d8Smrg			    CF_INST(SQ_CF_INST_RETURN),
1573921a55d8Smrg			    WHOLE_QUAD_MODE(0),
1574921a55d8Smrg			    BARRIER(1));
1575921a55d8Smrg
1576921a55d8Smrg    /* 14 srcX.x DOT4 - mask */
1577921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1578921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1579921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1580921a55d8Smrg                             SRC0_NEG(0),
1581921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1582921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1583921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1584921a55d8Smrg                             SRC1_NEG(0),
1585921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1586921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1587921a55d8Smrg                             LAST(0));
1588921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1589921a55d8Smrg                                 SRC1_ABS(0),
1590921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1591921a55d8Smrg                                 UPDATE_PRED(0),
1592921a55d8Smrg                                 WRITE_MASK(1),
1593921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1594921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1595921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1596921a55d8Smrg                                 DST_GPR(3),
1597921a55d8Smrg                                 DST_REL(ABSOLUTE),
1598921a55d8Smrg                                 DST_ELEM(ELEM_X),
1599921a55d8Smrg                                 CLAMP(0));
1600921a55d8Smrg
1601921a55d8Smrg    /* 15 srcX.y DOT4 - mask */
1602921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1603921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1604921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1605921a55d8Smrg                             SRC0_NEG(0),
1606921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1607921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1608921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1609921a55d8Smrg                             SRC1_NEG(0),
1610921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1611921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1612921a55d8Smrg                             LAST(0));
1613921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1614921a55d8Smrg                                 SRC1_ABS(0),
1615921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1616921a55d8Smrg                                 UPDATE_PRED(0),
1617921a55d8Smrg                                 WRITE_MASK(0),
1618921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1619921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1620921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1621921a55d8Smrg                                 DST_GPR(3),
1622921a55d8Smrg                                 DST_REL(ABSOLUTE),
1623921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1624921a55d8Smrg                                 CLAMP(0));
1625921a55d8Smrg
1626921a55d8Smrg    /* 16 srcX.z DOT4 - mask */
1627921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1628921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1629921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1630921a55d8Smrg                             SRC0_NEG(0),
1631921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1632921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1633921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1634921a55d8Smrg                             SRC1_NEG(0),
1635921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1636921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1637921a55d8Smrg                             LAST(0));
1638921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1639921a55d8Smrg                                 SRC1_ABS(0),
1640921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1641921a55d8Smrg                                 UPDATE_PRED(0),
1642921a55d8Smrg                                 WRITE_MASK(0),
1643921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1644921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1645921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1646921a55d8Smrg                                 DST_GPR(3),
1647921a55d8Smrg                                 DST_REL(ABSOLUTE),
1648921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1649921a55d8Smrg                                 CLAMP(0));
1650921a55d8Smrg
1651921a55d8Smrg    /* 17 srcX.w DOT4 - mask */
1652921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1653921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1654921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1655921a55d8Smrg                             SRC0_NEG(0),
1656921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1657921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1658921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1659921a55d8Smrg                             SRC1_NEG(0),
1660921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1661921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1662921a55d8Smrg                             LAST(1));
1663921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1664921a55d8Smrg                                 SRC1_ABS(0),
1665921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1666921a55d8Smrg                                 UPDATE_PRED(0),
1667921a55d8Smrg                                 WRITE_MASK(0),
1668921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1669921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1670921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1671921a55d8Smrg                                 DST_GPR(3),
1672921a55d8Smrg                                 DST_REL(ABSOLUTE),
1673921a55d8Smrg                                 DST_ELEM(ELEM_W),
1674921a55d8Smrg                                 CLAMP(0));
1675921a55d8Smrg
1676921a55d8Smrg    /* 18 srcY.x DOT4 - mask */
1677921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1678921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1679921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1680921a55d8Smrg                             SRC0_NEG(0),
1681921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1682921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1683921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1684921a55d8Smrg                             SRC1_NEG(0),
1685921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1686921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1687921a55d8Smrg                             LAST(0));
1688921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1689921a55d8Smrg                                 SRC1_ABS(0),
1690921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1691921a55d8Smrg                                 UPDATE_PRED(0),
1692921a55d8Smrg                                 WRITE_MASK(0),
1693921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1694921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1695921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1696921a55d8Smrg                                 DST_GPR(3),
1697921a55d8Smrg                                 DST_REL(ABSOLUTE),
1698921a55d8Smrg                                 DST_ELEM(ELEM_X),
1699921a55d8Smrg                                 CLAMP(0));
1700921a55d8Smrg
1701921a55d8Smrg    /* 19 srcY.y DOT4 - mask */
1702921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1703921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1704921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1705921a55d8Smrg                             SRC0_NEG(0),
1706921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1707921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1708921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1709921a55d8Smrg                             SRC1_NEG(0),
1710921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1711921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1712921a55d8Smrg                             LAST(0));
1713921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1714921a55d8Smrg                                 SRC1_ABS(0),
1715921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1716921a55d8Smrg                                 UPDATE_PRED(0),
1717921a55d8Smrg                                 WRITE_MASK(1),
1718921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1719921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1720921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1721921a55d8Smrg                                 DST_GPR(3),
1722921a55d8Smrg                                 DST_REL(ABSOLUTE),
1723921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1724921a55d8Smrg                                 CLAMP(0));
1725921a55d8Smrg
1726921a55d8Smrg    /* 20 srcY.z DOT4 - mask */
1727921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1728921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1729921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1730921a55d8Smrg                             SRC0_NEG(0),
1731921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1732921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1733921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1734921a55d8Smrg                             SRC1_NEG(0),
1735921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1736921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1737921a55d8Smrg                             LAST(0));
1738921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1739921a55d8Smrg                                 SRC1_ABS(0),
1740921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1741921a55d8Smrg                                 UPDATE_PRED(0),
1742921a55d8Smrg                                 WRITE_MASK(0),
1743921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1744921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1745921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1746921a55d8Smrg                                 DST_GPR(3),
1747921a55d8Smrg                                 DST_REL(ABSOLUTE),
1748921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1749921a55d8Smrg                                 CLAMP(0));
1750921a55d8Smrg
1751921a55d8Smrg    /* 21 srcY.w DOT4 - mask */
1752921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1753921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1754921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1755921a55d8Smrg                             SRC0_NEG(0),
1756921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1757921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1758921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1759921a55d8Smrg                             SRC1_NEG(0),
1760921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1761921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1762921a55d8Smrg                             LAST(1));
1763921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1764921a55d8Smrg                                 SRC1_ABS(0),
1765921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1766921a55d8Smrg                                 UPDATE_PRED(0),
1767921a55d8Smrg                                 WRITE_MASK(0),
1768921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1769921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1770921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1771921a55d8Smrg                                 DST_GPR(3),
1772921a55d8Smrg                                 DST_REL(ABSOLUTE),
1773921a55d8Smrg                                 DST_ELEM(ELEM_W),
1774921a55d8Smrg                                 CLAMP(0));
1775921a55d8Smrg
1776921a55d8Smrg    /* 22 maskX.x DOT4 - mask */
1777921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1778921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1779921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1780921a55d8Smrg                             SRC0_NEG(0),
1781921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1782921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1783921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1784921a55d8Smrg                             SRC1_NEG(0),
1785921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1786921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1787921a55d8Smrg                             LAST(0));
1788921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1789921a55d8Smrg                                 SRC1_ABS(0),
1790921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1791921a55d8Smrg                                 UPDATE_PRED(0),
1792921a55d8Smrg                                 WRITE_MASK(1),
1793921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1794921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1795921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1796921a55d8Smrg                                 DST_GPR(4),
1797921a55d8Smrg                                 DST_REL(ABSOLUTE),
1798921a55d8Smrg                                 DST_ELEM(ELEM_X),
1799921a55d8Smrg                                 CLAMP(0));
1800921a55d8Smrg
1801921a55d8Smrg    /* 23 maskX.y DOT4 - mask */
1802921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1803921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1804921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1805921a55d8Smrg                             SRC0_NEG(0),
1806921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1807921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1808921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1809921a55d8Smrg                             SRC1_NEG(0),
1810921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1811921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1812921a55d8Smrg                             LAST(0));
1813921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1814921a55d8Smrg                                 SRC1_ABS(0),
1815921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1816921a55d8Smrg                                 UPDATE_PRED(0),
1817921a55d8Smrg                                 WRITE_MASK(0),
1818921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1819921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1820921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1821921a55d8Smrg                                 DST_GPR(4),
1822921a55d8Smrg                                 DST_REL(ABSOLUTE),
1823921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1824921a55d8Smrg                                 CLAMP(0));
1825921a55d8Smrg
1826921a55d8Smrg    /* 24 maskX.z DOT4 - mask */
1827921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1828921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1829921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1830921a55d8Smrg                             SRC0_NEG(0),
1831921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1832921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1833921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1834921a55d8Smrg                             SRC1_NEG(0),
1835921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1836921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1837921a55d8Smrg                             LAST(0));
1838921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1839921a55d8Smrg                                 SRC1_ABS(0),
1840921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1841921a55d8Smrg                                 UPDATE_PRED(0),
1842921a55d8Smrg                                 WRITE_MASK(0),
1843921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1844921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1845921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1846921a55d8Smrg                                 DST_GPR(4),
1847921a55d8Smrg                                 DST_REL(ABSOLUTE),
1848921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1849921a55d8Smrg                                 CLAMP(0));
1850921a55d8Smrg
1851921a55d8Smrg    /* 25 maskX.w DOT4 - mask */
1852921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1853921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1854921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1855921a55d8Smrg                             SRC0_NEG(0),
1856921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1857921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1858921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1859921a55d8Smrg                             SRC1_NEG(0),
1860921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1861921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1862921a55d8Smrg                             LAST(1));
1863921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1864921a55d8Smrg                                 SRC1_ABS(0),
1865921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1866921a55d8Smrg                                 UPDATE_PRED(0),
1867921a55d8Smrg                                 WRITE_MASK(0),
1868921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1869921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1870921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1871921a55d8Smrg                                 DST_GPR(4),
1872921a55d8Smrg                                 DST_REL(ABSOLUTE),
1873921a55d8Smrg                                 DST_ELEM(ELEM_W),
1874921a55d8Smrg                                 CLAMP(0));
1875921a55d8Smrg
1876921a55d8Smrg    /* 26 maskY.x DOT4 - mask */
1877921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1878921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1879921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1880921a55d8Smrg                             SRC0_NEG(0),
1881921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1882921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1883921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1884921a55d8Smrg                             SRC1_NEG(0),
1885921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1886921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1887921a55d8Smrg                             LAST(0));
1888921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1889921a55d8Smrg                                 SRC1_ABS(0),
1890921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1891921a55d8Smrg                                 UPDATE_PRED(0),
1892921a55d8Smrg                                 WRITE_MASK(0),
1893921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1894921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1895921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1896921a55d8Smrg                                 DST_GPR(4),
1897921a55d8Smrg                                 DST_REL(ABSOLUTE),
1898921a55d8Smrg                                 DST_ELEM(ELEM_X),
1899921a55d8Smrg                                 CLAMP(0));
1900921a55d8Smrg
1901921a55d8Smrg    /* 27 maskY.y DOT4 - mask */
1902921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1903921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1904921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1905921a55d8Smrg                             SRC0_NEG(0),
1906921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1907921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1908921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1909921a55d8Smrg                             SRC1_NEG(0),
1910921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1911921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1912921a55d8Smrg                             LAST(0));
1913921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1914921a55d8Smrg                                 SRC1_ABS(0),
1915921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1916921a55d8Smrg                                 UPDATE_PRED(0),
1917921a55d8Smrg                                 WRITE_MASK(1),
1918921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1919921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1920921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1921921a55d8Smrg                                 DST_GPR(4),
1922921a55d8Smrg                                 DST_REL(ABSOLUTE),
1923921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1924921a55d8Smrg                                 CLAMP(0));
1925921a55d8Smrg
1926921a55d8Smrg    /* 28 maskY.z DOT4 - mask */
1927921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1928921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1929921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1930921a55d8Smrg                             SRC0_NEG(0),
1931921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1932921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1933921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1934921a55d8Smrg                             SRC1_NEG(0),
1935921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1936921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1937921a55d8Smrg                             LAST(0));
1938921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1939921a55d8Smrg                                 SRC1_ABS(0),
1940921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1941921a55d8Smrg                                 UPDATE_PRED(0),
1942921a55d8Smrg                                 WRITE_MASK(0),
1943921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1944921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1945921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1946921a55d8Smrg                                 DST_GPR(4),
1947921a55d8Smrg                                 DST_REL(ABSOLUTE),
1948921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1949921a55d8Smrg                                 CLAMP(0));
1950921a55d8Smrg
1951921a55d8Smrg    /* 29 maskY.w DOT4 - mask */
1952921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1953921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1954921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1955921a55d8Smrg                             SRC0_NEG(0),
1956921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1957921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1958921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1959921a55d8Smrg                             SRC1_NEG(0),
1960921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1961921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1962921a55d8Smrg                             LAST(1));
1963921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1964921a55d8Smrg                                 SRC1_ABS(0),
1965921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1966921a55d8Smrg                                 UPDATE_PRED(0),
1967921a55d8Smrg                                 WRITE_MASK(0),
1968921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1969921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1970921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1971921a55d8Smrg                                 DST_GPR(4),
1972921a55d8Smrg                                 DST_REL(ABSOLUTE),
1973921a55d8Smrg                                 DST_ELEM(ELEM_W),
1974921a55d8Smrg                                 CLAMP(0));
1975921a55d8Smrg
1976921a55d8Smrg    /* 30 srcX / w */
1977921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1978921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1979921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1980921a55d8Smrg                             SRC0_NEG(0),
1981921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1982921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1983921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1984921a55d8Smrg                             SRC1_NEG(0),
1985921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
1986921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1987921a55d8Smrg                             LAST(1));
1988921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1989921a55d8Smrg                                 SRC1_ABS(0),
1990921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1991921a55d8Smrg                                 UPDATE_PRED(0),
1992921a55d8Smrg                                 WRITE_MASK(1),
1993921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1994921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
1995921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1996921a55d8Smrg                                 DST_GPR(1),
1997921a55d8Smrg                                 DST_REL(ABSOLUTE),
1998921a55d8Smrg                                 DST_ELEM(ELEM_X),
1999921a55d8Smrg                                 CLAMP(0));
2000921a55d8Smrg
2001921a55d8Smrg    /* 31 srcY / h */
2002921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
2003921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2004921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
2005921a55d8Smrg                             SRC0_NEG(0),
2006921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2007921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2008921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2009921a55d8Smrg                             SRC1_NEG(0),
2010921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2011921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2012921a55d8Smrg                             LAST(1));
2013921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2014921a55d8Smrg                                 SRC1_ABS(0),
2015921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2016921a55d8Smrg                                 UPDATE_PRED(0),
2017921a55d8Smrg                                 WRITE_MASK(1),
2018921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2019921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2020921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2021921a55d8Smrg                                 DST_GPR(1),
2022921a55d8Smrg                                 DST_REL(ABSOLUTE),
2023921a55d8Smrg                                 DST_ELEM(ELEM_Y),
2024921a55d8Smrg                                 CLAMP(0));
2025921a55d8Smrg
2026921a55d8Smrg    /* 32 maskX / w */
2027921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2028921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2029921a55d8Smrg                             SRC0_ELEM(ELEM_X),
2030921a55d8Smrg                             SRC0_NEG(0),
2031921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
2032921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2033921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2034921a55d8Smrg                             SRC1_NEG(0),
2035921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2036921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2037921a55d8Smrg                             LAST(1));
2038921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2039921a55d8Smrg                                 SRC1_ABS(0),
2040921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2041921a55d8Smrg                                 UPDATE_PRED(0),
2042921a55d8Smrg                                 WRITE_MASK(1),
2043921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2044921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2045921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2046921a55d8Smrg                                 DST_GPR(0),
2047921a55d8Smrg                                 DST_REL(ABSOLUTE),
2048921a55d8Smrg                                 DST_ELEM(ELEM_X),
2049921a55d8Smrg                                 CLAMP(0));
2050921a55d8Smrg
2051921a55d8Smrg    /* 33 maskY / h */
2052921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2053921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2054921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
2055921a55d8Smrg                             SRC0_NEG(0),
2056921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
2057921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2058921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2059921a55d8Smrg                             SRC1_NEG(0),
2060921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2061921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2062921a55d8Smrg                             LAST(1));
2063921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2064921a55d8Smrg                                 SRC1_ABS(0),
2065921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2066921a55d8Smrg                                 UPDATE_PRED(0),
2067921a55d8Smrg                                 WRITE_MASK(1),
2068921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2069921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2070921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2071921a55d8Smrg                                 DST_GPR(0),
2072921a55d8Smrg                                 DST_REL(ABSOLUTE),
2073921a55d8Smrg                                 DST_ELEM(ELEM_Y),
2074921a55d8Smrg                                 CLAMP(0));
2075921a55d8Smrg
2076921a55d8Smrg    /* 34 srcX.x DOT4 - non-mask */
2077921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2078921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2079921a55d8Smrg                             SRC0_ELEM(ELEM_X),
2080921a55d8Smrg                             SRC0_NEG(0),
2081921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2082921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2083921a55d8Smrg                             SRC1_ELEM(ELEM_X),
2084921a55d8Smrg                             SRC1_NEG(0),
2085921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2086921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2087921a55d8Smrg                             LAST(0));
2088921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2089921a55d8Smrg                                 SRC1_ABS(0),
2090921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2091921a55d8Smrg                                 UPDATE_PRED(0),
2092921a55d8Smrg                                 WRITE_MASK(1),
2093921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2094921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2095921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2096921a55d8Smrg                                 DST_GPR(2),
2097921a55d8Smrg                                 DST_REL(ABSOLUTE),
2098921a55d8Smrg                                 DST_ELEM(ELEM_X),
2099921a55d8Smrg                                 CLAMP(0));
2100921a55d8Smrg
2101921a55d8Smrg    /* 35 srcX.y DOT4 - non-mask */
2102921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2103921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2104921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
2105921a55d8Smrg                             SRC0_NEG(0),
2106921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2107921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2108921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
2109921a55d8Smrg                             SRC1_NEG(0),
2110921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2111921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2112921a55d8Smrg                             LAST(0));
2113921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2114921a55d8Smrg                                 SRC1_ABS(0),
2115921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2116921a55d8Smrg                                 UPDATE_PRED(0),
2117921a55d8Smrg                                 WRITE_MASK(0),
2118921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2119921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2120921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2121921a55d8Smrg                                 DST_GPR(2),
2122921a55d8Smrg                                 DST_REL(ABSOLUTE),
2123921a55d8Smrg                                 DST_ELEM(ELEM_Y),
2124921a55d8Smrg                                 CLAMP(0));
2125921a55d8Smrg
2126921a55d8Smrg    /* 36 srcX.z DOT4 - non-mask */
2127921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2128921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2129921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
2130921a55d8Smrg                             SRC0_NEG(0),
2131921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2132921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2133921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
2134921a55d8Smrg                             SRC1_NEG(0),
2135921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2136921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2137921a55d8Smrg                             LAST(0));
2138921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2139921a55d8Smrg                                 SRC1_ABS(0),
2140921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2141921a55d8Smrg                                 UPDATE_PRED(0),
2142921a55d8Smrg                                 WRITE_MASK(0),
2143921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2144921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2145921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2146921a55d8Smrg                                 DST_GPR(2),
2147921a55d8Smrg                                 DST_REL(ABSOLUTE),
2148921a55d8Smrg                                 DST_ELEM(ELEM_Z),
2149921a55d8Smrg                                 CLAMP(0));
2150921a55d8Smrg
2151921a55d8Smrg    /* 37 srcX.w DOT4 - non-mask */
2152921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2153921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2154921a55d8Smrg                             SRC0_ELEM(ELEM_W),
2155921a55d8Smrg                             SRC0_NEG(0),
2156921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2157921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2158921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2159921a55d8Smrg                             SRC1_NEG(0),
2160921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2161921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2162921a55d8Smrg                             LAST(1));
2163921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2164921a55d8Smrg                                 SRC1_ABS(0),
2165921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2166921a55d8Smrg                                 UPDATE_PRED(0),
2167921a55d8Smrg                                 WRITE_MASK(0),
2168921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2169921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2170921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2171921a55d8Smrg                                 DST_GPR(2),
2172921a55d8Smrg                                 DST_REL(ABSOLUTE),
2173921a55d8Smrg                                 DST_ELEM(ELEM_W),
2174921a55d8Smrg                                 CLAMP(0));
2175921a55d8Smrg
2176921a55d8Smrg    /* 38 srcY.x DOT4 - non-mask */
2177921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2178921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2179921a55d8Smrg                             SRC0_ELEM(ELEM_X),
2180921a55d8Smrg                             SRC0_NEG(0),
2181921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2182921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2183921a55d8Smrg                             SRC1_ELEM(ELEM_X),
2184921a55d8Smrg                             SRC1_NEG(0),
2185921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2186921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2187921a55d8Smrg                             LAST(0));
2188921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2189921a55d8Smrg                                 SRC1_ABS(0),
2190921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2191921a55d8Smrg                                 UPDATE_PRED(0),
2192921a55d8Smrg                                 WRITE_MASK(0),
2193921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2194921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2195921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2196921a55d8Smrg                                 DST_GPR(2),
2197921a55d8Smrg                                 DST_REL(ABSOLUTE),
2198921a55d8Smrg                                 DST_ELEM(ELEM_X),
2199921a55d8Smrg                                 CLAMP(0));
2200921a55d8Smrg
2201921a55d8Smrg    /* 39 srcY.y DOT4 - non-mask */
2202921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2203921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2204921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
2205921a55d8Smrg                             SRC0_NEG(0),
2206921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2207921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2208921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
2209921a55d8Smrg                             SRC1_NEG(0),
2210921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2211921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2212921a55d8Smrg                             LAST(0));
2213921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2214921a55d8Smrg                                 SRC1_ABS(0),
2215921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2216921a55d8Smrg                                 UPDATE_PRED(0),
2217921a55d8Smrg                                 WRITE_MASK(1),
2218921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2219921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2220921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2221921a55d8Smrg                                 DST_GPR(2),
2222921a55d8Smrg                                 DST_REL(ABSOLUTE),
2223921a55d8Smrg                                 DST_ELEM(ELEM_Y),
2224921a55d8Smrg                                 CLAMP(0));
2225921a55d8Smrg
2226921a55d8Smrg    /* 40 srcY.z DOT4 - non-mask */
2227921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2228921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2229921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
2230921a55d8Smrg                             SRC0_NEG(0),
2231921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2232921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2233921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
2234921a55d8Smrg                             SRC1_NEG(0),
2235921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2236921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2237921a55d8Smrg                             LAST(0));
2238921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2239921a55d8Smrg                                 SRC1_ABS(0),
2240921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2241921a55d8Smrg                                 UPDATE_PRED(0),
2242921a55d8Smrg                                 WRITE_MASK(0),
2243921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2244921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2245921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2246921a55d8Smrg                                 DST_GPR(2),
2247921a55d8Smrg                                 DST_REL(ABSOLUTE),
2248921a55d8Smrg                                 DST_ELEM(ELEM_Z),
2249921a55d8Smrg                                 CLAMP(0));
2250921a55d8Smrg
2251921a55d8Smrg    /* 41 srcY.w DOT4 - non-mask */
2252921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2253921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2254921a55d8Smrg                             SRC0_ELEM(ELEM_W),
2255921a55d8Smrg                             SRC0_NEG(0),
2256921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2257921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2258921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2259921a55d8Smrg                             SRC1_NEG(0),
2260921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2261921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2262921a55d8Smrg                             LAST(1));
2263921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2264921a55d8Smrg                                 SRC1_ABS(0),
2265921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2266921a55d8Smrg                                 UPDATE_PRED(0),
2267921a55d8Smrg                                 WRITE_MASK(0),
2268921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2269921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2270921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2271921a55d8Smrg                                 DST_GPR(2),
2272921a55d8Smrg                                 DST_REL(ABSOLUTE),
2273921a55d8Smrg                                 DST_ELEM(ELEM_W),
2274921a55d8Smrg                                 CLAMP(0));
2275921a55d8Smrg
2276921a55d8Smrg    /* 42 srcX / w */
2277921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2278921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2279921a55d8Smrg                             SRC0_ELEM(ELEM_X),
2280921a55d8Smrg                             SRC0_NEG(0),
2281921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2282921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2283921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2284921a55d8Smrg                             SRC1_NEG(0),
2285921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2286921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2287921a55d8Smrg                             LAST(1));
2288921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2289921a55d8Smrg                                 SRC1_ABS(0),
2290921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2291921a55d8Smrg                                 UPDATE_PRED(0),
2292921a55d8Smrg                                 WRITE_MASK(1),
2293921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2294921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2295921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2296921a55d8Smrg                                 DST_GPR(0),
2297921a55d8Smrg                                 DST_REL(ABSOLUTE),
2298921a55d8Smrg                                 DST_ELEM(ELEM_X),
2299921a55d8Smrg                                 CLAMP(0));
2300921a55d8Smrg
2301921a55d8Smrg    /* 43 srcY / h */
2302921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2303921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2304921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
2305921a55d8Smrg                             SRC0_NEG(0),
2306921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2307921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2308921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2309921a55d8Smrg                             SRC1_NEG(0),
2310921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2311921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2312921a55d8Smrg                             LAST(1));
2313921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2314921a55d8Smrg                                 SRC1_ABS(0),
2315921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2316921a55d8Smrg                                 UPDATE_PRED(0),
2317921a55d8Smrg                                 WRITE_MASK(1),
2318921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2319921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2320921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2321921a55d8Smrg                                 DST_GPR(0),
2322921a55d8Smrg                                 DST_REL(ABSOLUTE),
2323921a55d8Smrg                                 DST_ELEM(ELEM_Y),
2324921a55d8Smrg                                 CLAMP(0));
2325921a55d8Smrg
2326921a55d8Smrg    /* mask vfetch - 44/45 - dst */
2327921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2328921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2329921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
2330921a55d8Smrg			     BUFFER_ID(0),
2331921a55d8Smrg			     SRC_GPR(0),
2332921a55d8Smrg			     SRC_REL(ABSOLUTE),
2333921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
2334921a55d8Smrg			     MEGA_FETCH_COUNT(24));
2335921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2336921a55d8Smrg				 DST_REL(0),
2337921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
2338921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
2339921a55d8Smrg				 DST_SEL_Z(SQ_SEL_0),
2340921a55d8Smrg				 DST_SEL_W(SQ_SEL_1),
2341921a55d8Smrg				 USE_CONST_FIELDS(0),
2342921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2343921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2344921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2345921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2346921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2347921a55d8Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
2348921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
2349921a55d8Smrg			     MEGA_FETCH(1),
2350921a55d8Smrg			     ALT_CONST(0),
2351921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2352921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
2353921a55d8Smrg    /* 46/47 - src */
2354921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2355921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2356921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
2357921a55d8Smrg			     BUFFER_ID(0),
2358921a55d8Smrg			     SRC_GPR(0),
2359921a55d8Smrg			     SRC_REL(ABSOLUTE),
2360921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
2361921a55d8Smrg			     MEGA_FETCH_COUNT(8));
2362921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2363921a55d8Smrg				 DST_REL(0),
2364921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
2365921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
2366921a55d8Smrg				 DST_SEL_Z(SQ_SEL_1),
2367921a55d8Smrg				 DST_SEL_W(SQ_SEL_0),
2368921a55d8Smrg				 USE_CONST_FIELDS(0),
2369921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2370921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2371921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2372921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2373921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2374921a55d8Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
2375921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
2376921a55d8Smrg			     MEGA_FETCH(0),
2377921a55d8Smrg			     ALT_CONST(0),
2378921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2379921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
2380921a55d8Smrg    /* 48/49 - mask */
2381921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2382921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2383921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
2384921a55d8Smrg			     BUFFER_ID(0),
2385921a55d8Smrg			     SRC_GPR(0),
2386921a55d8Smrg			     SRC_REL(ABSOLUTE),
2387921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
2388921a55d8Smrg			     MEGA_FETCH_COUNT(8));
2389921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2390921a55d8Smrg				 DST_REL(0),
2391921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
2392921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
2393921a55d8Smrg				 DST_SEL_Z(SQ_SEL_1),
2394921a55d8Smrg				 DST_SEL_W(SQ_SEL_0),
2395921a55d8Smrg				 USE_CONST_FIELDS(0),
2396921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2397921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2398921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2399921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2400921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(16),
2401921a55d8Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
2402921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
2403921a55d8Smrg			     MEGA_FETCH(0),
2404921a55d8Smrg			     ALT_CONST(0),
2405921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2406921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
2407921a55d8Smrg
2408921a55d8Smrg    /* no mask vfetch - 50/51 - dst */
2409921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2410921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2411921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
2412921a55d8Smrg			     BUFFER_ID(0),
2413921a55d8Smrg			     SRC_GPR(0),
2414921a55d8Smrg			     SRC_REL(ABSOLUTE),
2415921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
2416921a55d8Smrg			     MEGA_FETCH_COUNT(16));
2417921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2418921a55d8Smrg				 DST_REL(0),
2419921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
2420921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
2421921a55d8Smrg				 DST_SEL_Z(SQ_SEL_0),
2422921a55d8Smrg				 DST_SEL_W(SQ_SEL_1),
2423921a55d8Smrg				 USE_CONST_FIELDS(0),
2424921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2425921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2426921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2427921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2428921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2429921a55d8Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
2430921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
2431921a55d8Smrg			     MEGA_FETCH(1),
2432921a55d8Smrg			     ALT_CONST(0),
2433921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2434921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
2435921a55d8Smrg    /* 52/53 - src */
2436921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2437921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2438921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
2439921a55d8Smrg			     BUFFER_ID(0),
2440921a55d8Smrg			     SRC_GPR(0),
2441921a55d8Smrg			     SRC_REL(ABSOLUTE),
2442921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
2443921a55d8Smrg			     MEGA_FETCH_COUNT(8));
2444921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2445921a55d8Smrg				 DST_REL(0),
2446921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
2447921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
2448921a55d8Smrg				 DST_SEL_Z(SQ_SEL_1),
2449921a55d8Smrg				 DST_SEL_W(SQ_SEL_0),
2450921a55d8Smrg				 USE_CONST_FIELDS(0),
2451921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2452921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2453921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2454921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2455921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2456921a55d8Smrg			     ENDIAN_SWAP(ENDIAN_NONE),
2457921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
2458921a55d8Smrg			     MEGA_FETCH(0),
2459921a55d8Smrg                             ALT_CONST(0),
2460921a55d8Smrg                             BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2461921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
2462921a55d8Smrg
2463921a55d8Smrg    return i;
2464921a55d8Smrg}
2465921a55d8Smrg
2466921a55d8Smrg/* comp ps --------------------------------------- */
2467921a55d8Smrgint evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2468921a55d8Smrg{
2469921a55d8Smrg    int i = 0;
2470921a55d8Smrg
2471921a55d8Smrg    /* 0 */
2472921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(3),
2473921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2474921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2475921a55d8Smrg                            CF_CONST(0),
2476921a55d8Smrg                            COND(SQ_CF_COND_BOOL),
2477921a55d8Smrg                            I_COUNT(0),
2478921a55d8Smrg                            VALID_PIXEL_MODE(0),
2479921a55d8Smrg                            END_OF_PROGRAM(0),
2480921a55d8Smrg                            CF_INST(SQ_CF_INST_CALL),
2481921a55d8Smrg                            WHOLE_QUAD_MODE(0),
2482921a55d8Smrg                            BARRIER(0));
2483921a55d8Smrg    /* 1 */
2484921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(8),
2485921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2486921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2487921a55d8Smrg                            CF_CONST(0),
2488921a55d8Smrg                            COND(SQ_CF_COND_NOT_BOOL),
2489921a55d8Smrg                            I_COUNT(0),
2490921a55d8Smrg                            VALID_PIXEL_MODE(0),
2491921a55d8Smrg                            END_OF_PROGRAM(0),
2492921a55d8Smrg                            CF_INST(SQ_CF_INST_CALL),
2493921a55d8Smrg                            WHOLE_QUAD_MODE(0),
2494921a55d8Smrg                            BARRIER(0));
2495921a55d8Smrg    /* 2 */
2496921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
2497921a55d8Smrg                            JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2498921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2499921a55d8Smrg                            CF_CONST(0),
2500921a55d8Smrg                            COND(SQ_CF_COND_ACTIVE),
2501921a55d8Smrg                            I_COUNT(0),
2502921a55d8Smrg                            VALID_PIXEL_MODE(0),
2503921a55d8Smrg                            END_OF_PROGRAM(1),
2504921a55d8Smrg                            CF_INST(SQ_CF_INST_NOP),
2505921a55d8Smrg                            WHOLE_QUAD_MODE(0),
2506921a55d8Smrg                            BARRIER(1));
2507921a55d8Smrg
2508921a55d8Smrg    /* 3 - mask sub */
2509921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(12),
2510921a55d8Smrg				KCACHE_BANK0(0),
2511921a55d8Smrg				KCACHE_BANK1(0),
2512921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2513921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2514921a55d8Smrg				KCACHE_ADDR0(0),
2515921a55d8Smrg				KCACHE_ADDR1(0),
2516921a55d8Smrg				I_COUNT(8),
2517921a55d8Smrg				ALT_CONST(0),
2518921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
2519921a55d8Smrg				WHOLE_QUAD_MODE(0),
2520921a55d8Smrg				BARRIER(1));
2521921a55d8Smrg
2522921a55d8Smrg    /* 4 */
2523921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(28),
2524921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2525921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2526921a55d8Smrg			    CF_CONST(0),
2527921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
2528921a55d8Smrg			    I_COUNT(2),
2529921a55d8Smrg			    VALID_PIXEL_MODE(0),
2530921a55d8Smrg			    END_OF_PROGRAM(0),
2531921a55d8Smrg			    CF_INST(SQ_CF_INST_TC),
2532921a55d8Smrg			    WHOLE_QUAD_MODE(0),
2533921a55d8Smrg			    BARRIER(1));
2534921a55d8Smrg
2535921a55d8Smrg    /* 5 */
2536921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(20),
2537921a55d8Smrg				KCACHE_BANK0(0),
2538921a55d8Smrg				KCACHE_BANK1(0),
2539921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2540921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2541921a55d8Smrg				KCACHE_ADDR0(0),
2542921a55d8Smrg				KCACHE_ADDR1(0),
2543921a55d8Smrg				I_COUNT(4),
2544921a55d8Smrg				ALT_CONST(0),
2545921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
2546921a55d8Smrg				WHOLE_QUAD_MODE(0),
2547921a55d8Smrg				BARRIER(1));
2548921a55d8Smrg
2549921a55d8Smrg    /* 6 */
2550921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2551921a55d8Smrg					  TYPE(SQ_EXPORT_PIXEL),
2552921a55d8Smrg					  RW_GPR(2),
2553921a55d8Smrg					  RW_REL(ABSOLUTE),
2554921a55d8Smrg					  INDEX_GPR(0),
2555921a55d8Smrg					  ELEM_SIZE(1));
2556921a55d8Smrg
2557921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2558921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
2559921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
2560921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
2561921a55d8Smrg					       BURST_COUNT(1),
2562921a55d8Smrg					       VALID_PIXEL_MODE(0),
2563921a55d8Smrg					       END_OF_PROGRAM(0),
2564921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2565921a55d8Smrg					       MARK(0),
2566921a55d8Smrg					       BARRIER(1));
2567921a55d8Smrg    /* 7 */
2568921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
2569921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2570921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2571921a55d8Smrg			    CF_CONST(0),
2572921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
2573921a55d8Smrg			    I_COUNT(0),
2574921a55d8Smrg			    VALID_PIXEL_MODE(0),
2575921a55d8Smrg			    END_OF_PROGRAM(0),
2576921a55d8Smrg			    CF_INST(SQ_CF_INST_RETURN),
2577921a55d8Smrg			    WHOLE_QUAD_MODE(0),
2578921a55d8Smrg			    BARRIER(1));
2579921a55d8Smrg
2580921a55d8Smrg    /* 8 - non-mask sub */
2581921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(24),
2582921a55d8Smrg				KCACHE_BANK0(0),
2583921a55d8Smrg				KCACHE_BANK1(0),
2584921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2585921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2586921a55d8Smrg				KCACHE_ADDR0(0),
2587921a55d8Smrg				KCACHE_ADDR1(0),
2588921a55d8Smrg				I_COUNT(4),
2589921a55d8Smrg				ALT_CONST(0),
2590921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
2591921a55d8Smrg				WHOLE_QUAD_MODE(0),
2592921a55d8Smrg				BARRIER(1));
2593921a55d8Smrg    /* 9 */
2594921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(32),
2595921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2596921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2597921a55d8Smrg			    CF_CONST(0),
2598921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
2599921a55d8Smrg			    I_COUNT(1),
2600921a55d8Smrg			    VALID_PIXEL_MODE(0),
2601921a55d8Smrg			    END_OF_PROGRAM(0),
2602921a55d8Smrg			    CF_INST(SQ_CF_INST_TC),
2603921a55d8Smrg			    WHOLE_QUAD_MODE(0),
2604921a55d8Smrg			    BARRIER(1));
2605921a55d8Smrg
2606921a55d8Smrg    /* 10 */
2607921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2608921a55d8Smrg					  TYPE(SQ_EXPORT_PIXEL),
2609921a55d8Smrg					  RW_GPR(0),
2610921a55d8Smrg					  RW_REL(ABSOLUTE),
2611921a55d8Smrg					  INDEX_GPR(0),
2612921a55d8Smrg					  ELEM_SIZE(1));
2613921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2614921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
2615921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
2616921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
2617921a55d8Smrg					       BURST_COUNT(1),
2618921a55d8Smrg					       VALID_PIXEL_MODE(0),
2619921a55d8Smrg					       END_OF_PROGRAM(0),
2620921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2621921a55d8Smrg					       MARK(0),
2622921a55d8Smrg					       BARRIER(1));
2623921a55d8Smrg
2624921a55d8Smrg    /* 11 */
2625921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
2626921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2627921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2628921a55d8Smrg			    CF_CONST(0),
2629921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
2630921a55d8Smrg			    I_COUNT(0),
2631921a55d8Smrg			    VALID_PIXEL_MODE(0),
2632921a55d8Smrg			    END_OF_PROGRAM(0),
2633921a55d8Smrg			    CF_INST(SQ_CF_INST_RETURN),
2634921a55d8Smrg			    WHOLE_QUAD_MODE(0),
2635921a55d8Smrg			    BARRIER(1));
2636921a55d8Smrg
2637921a55d8Smrg    /* 12 interpolate src tex coords - mask */
2638921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2639921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2640921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
2641921a55d8Smrg			     SRC0_NEG(0),
2642921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2643921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2644921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2645921a55d8Smrg			     SRC1_NEG(0),
2646921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2647921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2648921a55d8Smrg			     LAST(0));
2649921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2650921a55d8Smrg				 SRC1_ABS(0),
2651921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2652921a55d8Smrg				 UPDATE_PRED(0),
2653921a55d8Smrg				 WRITE_MASK(1),
2654921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2655921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2656921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2657921a55d8Smrg				 DST_GPR(1),
2658921a55d8Smrg				 DST_REL(ABSOLUTE),
2659921a55d8Smrg				 DST_ELEM(ELEM_X),
2660921a55d8Smrg				 CLAMP(0));
2661921a55d8Smrg    /* 13 */
2662921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2663921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2664921a55d8Smrg			     SRC0_ELEM(ELEM_X),
2665921a55d8Smrg			     SRC0_NEG(0),
2666921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2667921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2668921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2669921a55d8Smrg			     SRC1_NEG(0),
2670921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2671921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2672921a55d8Smrg			     LAST(0));
2673921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2674921a55d8Smrg				 SRC1_ABS(0),
2675921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2676921a55d8Smrg				 UPDATE_PRED(0),
2677921a55d8Smrg				 WRITE_MASK(1),
2678921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2679921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2680921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2681921a55d8Smrg				 DST_GPR(1),
2682921a55d8Smrg				 DST_REL(ABSOLUTE),
2683921a55d8Smrg				 DST_ELEM(ELEM_Y),
2684921a55d8Smrg				 CLAMP(0));
2685921a55d8Smrg    /* 14 */
2686921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2687921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2688921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
2689921a55d8Smrg			     SRC0_NEG(0),
2690921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2691921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2692921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2693921a55d8Smrg			     SRC1_NEG(0),
2694921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2695921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2696921a55d8Smrg			     LAST(0));
2697921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2698921a55d8Smrg				 SRC1_ABS(0),
2699921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2700921a55d8Smrg				 UPDATE_PRED(0),
2701921a55d8Smrg				 WRITE_MASK(0),
2702921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2703921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2704921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2705921a55d8Smrg				 DST_GPR(1),
2706921a55d8Smrg				 DST_REL(ABSOLUTE),
2707921a55d8Smrg				 DST_ELEM(ELEM_Z),
2708921a55d8Smrg				 CLAMP(0));
2709921a55d8Smrg    /* 15 */
2710921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2711921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2712921a55d8Smrg			     SRC0_ELEM(ELEM_X),
2713921a55d8Smrg			     SRC0_NEG(0),
2714921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2715921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2716921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2717921a55d8Smrg			     SRC1_NEG(0),
2718921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2719921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2720921a55d8Smrg			     LAST(1));
2721921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2722921a55d8Smrg				 SRC1_ABS(0),
2723921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2724921a55d8Smrg				 UPDATE_PRED(0),
2725921a55d8Smrg				 WRITE_MASK(0),
2726921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2727921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2728921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2729921a55d8Smrg				 DST_GPR(1),
2730921a55d8Smrg				 DST_REL(ABSOLUTE),
2731921a55d8Smrg				 DST_ELEM(ELEM_W),
2732921a55d8Smrg				 CLAMP(0));
2733921a55d8Smrg
2734921a55d8Smrg    /* 16 interpolate mask tex coords */
2735921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2736921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2737921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
2738921a55d8Smrg			     SRC0_NEG(0),
2739921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2740921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2741921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2742921a55d8Smrg			     SRC1_NEG(0),
2743921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2744921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2745921a55d8Smrg			     LAST(0));
2746921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2747921a55d8Smrg				 SRC1_ABS(0),
2748921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2749921a55d8Smrg				 UPDATE_PRED(0),
2750921a55d8Smrg				 WRITE_MASK(1),
2751921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2752921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2753921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2754921a55d8Smrg				 DST_GPR(0),
2755921a55d8Smrg				 DST_REL(ABSOLUTE),
2756921a55d8Smrg				 DST_ELEM(ELEM_X),
2757921a55d8Smrg				 CLAMP(0));
2758921a55d8Smrg    /* 17 */
2759921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2760921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2761921a55d8Smrg			     SRC0_ELEM(ELEM_X),
2762921a55d8Smrg			     SRC0_NEG(0),
2763921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2764921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2765921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2766921a55d8Smrg			     SRC1_NEG(0),
2767921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2768921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2769921a55d8Smrg			     LAST(0));
2770921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2771921a55d8Smrg				 SRC1_ABS(0),
2772921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2773921a55d8Smrg				 UPDATE_PRED(0),
2774921a55d8Smrg				 WRITE_MASK(1),
2775921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2776921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2777921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2778921a55d8Smrg				 DST_GPR(0),
2779921a55d8Smrg				 DST_REL(ABSOLUTE),
2780921a55d8Smrg				 DST_ELEM(ELEM_Y),
2781921a55d8Smrg				 CLAMP(0));
2782921a55d8Smrg    /* 18 */
2783921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2784921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2785921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
2786921a55d8Smrg			     SRC0_NEG(0),
2787921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2788921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2789921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2790921a55d8Smrg			     SRC1_NEG(0),
2791921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2792921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2793921a55d8Smrg			     LAST(0));
2794921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2795921a55d8Smrg				 SRC1_ABS(0),
2796921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2797921a55d8Smrg				 UPDATE_PRED(0),
2798921a55d8Smrg				 WRITE_MASK(0),
2799921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2800921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2801921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2802921a55d8Smrg				 DST_GPR(0),
2803921a55d8Smrg				 DST_REL(ABSOLUTE),
2804921a55d8Smrg				 DST_ELEM(ELEM_Z),
2805921a55d8Smrg				 CLAMP(0));
2806921a55d8Smrg    /* 19 */
2807921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2808921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2809921a55d8Smrg			     SRC0_ELEM(ELEM_X),
2810921a55d8Smrg			     SRC0_NEG(0),
2811921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2812921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2813921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2814921a55d8Smrg			     SRC1_NEG(0),
2815921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2816921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2817921a55d8Smrg			     LAST(1));
2818921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2819921a55d8Smrg				 SRC1_ABS(0),
2820921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2821921a55d8Smrg				 UPDATE_PRED(0),
2822921a55d8Smrg				 WRITE_MASK(0),
2823921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2824921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2825921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2826921a55d8Smrg				 DST_GPR(0),
2827921a55d8Smrg				 DST_REL(ABSOLUTE),
2828921a55d8Smrg				 DST_ELEM(ELEM_W),
2829921a55d8Smrg				 CLAMP(0));
2830921a55d8Smrg
2831921a55d8Smrg    /* 20 - alu 0 */
2832921a55d8Smrg    /* MUL gpr[2].x gpr[0].x gpr[1].x */
2833921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2834921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2835921a55d8Smrg			     SRC0_ELEM(ELEM_X),
2836921a55d8Smrg			     SRC0_NEG(0),
2837921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2838921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2839921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2840921a55d8Smrg			     SRC1_NEG(0),
2841921a55d8Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2842921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2843921a55d8Smrg			     LAST(0));
2844921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2845921a55d8Smrg				 SRC1_ABS(0),
2846921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2847921a55d8Smrg				 UPDATE_PRED(0),
2848921a55d8Smrg				 WRITE_MASK(1),
2849921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2850921a55d8Smrg				 ALU_INST(SQ_OP2_INST_MUL),
2851921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2852921a55d8Smrg				 DST_GPR(2),
2853921a55d8Smrg				 DST_REL(ABSOLUTE),
2854921a55d8Smrg				 DST_ELEM(ELEM_X),
2855921a55d8Smrg				 CLAMP(1));
2856921a55d8Smrg    /* 21 - alu 1 */
2857921a55d8Smrg    /* MUL gpr[2].y gpr[0].y gpr[1].y */
2858921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2859921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2860921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
2861921a55d8Smrg			     SRC0_NEG(0),
2862921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2863921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2864921a55d8Smrg			     SRC1_ELEM(ELEM_Y),
2865921a55d8Smrg			     SRC1_NEG(0),
2866921a55d8Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2867921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2868921a55d8Smrg			     LAST(0));
2869921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2870921a55d8Smrg				 SRC1_ABS(0),
2871921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2872921a55d8Smrg				 UPDATE_PRED(0),
2873921a55d8Smrg				 WRITE_MASK(1),
2874921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2875921a55d8Smrg				 ALU_INST(SQ_OP2_INST_MUL),
2876921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2877921a55d8Smrg				 DST_GPR(2),
2878921a55d8Smrg				 DST_REL(ABSOLUTE),
2879921a55d8Smrg				 DST_ELEM(ELEM_Y),
2880921a55d8Smrg				 CLAMP(1));
2881921a55d8Smrg    /* 22 - alu 2 */
2882921a55d8Smrg    /* MUL gpr[2].z gpr[0].z gpr[1].z */
2883921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2884921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2885921a55d8Smrg			     SRC0_ELEM(ELEM_Z),
2886921a55d8Smrg			     SRC0_NEG(0),
2887921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2888921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2889921a55d8Smrg			     SRC1_ELEM(ELEM_Z),
2890921a55d8Smrg			     SRC1_NEG(0),
2891921a55d8Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2892921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2893921a55d8Smrg			     LAST(0));
2894921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2895921a55d8Smrg				 SRC1_ABS(0),
2896921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2897921a55d8Smrg				 UPDATE_PRED(0),
2898921a55d8Smrg				 WRITE_MASK(1),
2899921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2900921a55d8Smrg				 ALU_INST(SQ_OP2_INST_MUL),
2901921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2902921a55d8Smrg				 DST_GPR(2),
2903921a55d8Smrg				 DST_REL(ABSOLUTE),
2904921a55d8Smrg				 DST_ELEM(ELEM_Z),
2905921a55d8Smrg				 CLAMP(1));
2906921a55d8Smrg    /* 23 - alu 3 */
2907921a55d8Smrg    /* MUL gpr[2].w gpr[0].w gpr[1].w */
2908921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2909921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2910921a55d8Smrg			     SRC0_ELEM(ELEM_W),
2911921a55d8Smrg			     SRC0_NEG(0),
2912921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2913921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2914921a55d8Smrg			     SRC1_ELEM(ELEM_W),
2915921a55d8Smrg			     SRC1_NEG(0),
2916921a55d8Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2917921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2918921a55d8Smrg			     LAST(1));
2919921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2920921a55d8Smrg				 SRC1_ABS(0),
2921921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2922921a55d8Smrg				 UPDATE_PRED(0),
2923921a55d8Smrg				 WRITE_MASK(1),
2924921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2925921a55d8Smrg				 ALU_INST(SQ_OP2_INST_MUL),
2926921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2927921a55d8Smrg				 DST_GPR(2),
2928921a55d8Smrg				 DST_REL(ABSOLUTE),
2929921a55d8Smrg				 DST_ELEM(ELEM_W),
2930921a55d8Smrg				 CLAMP(1));
2931921a55d8Smrg
2932921a55d8Smrg    /* 24 - interpolate tex coords - non-mask */
2933921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2934921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2935921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
2936921a55d8Smrg			     SRC0_NEG(0),
2937921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2938921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2939921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2940921a55d8Smrg			     SRC1_NEG(0),
2941921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2942921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2943921a55d8Smrg			     LAST(0));
2944921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2945921a55d8Smrg				 SRC1_ABS(0),
2946921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2947921a55d8Smrg				 UPDATE_PRED(0),
2948921a55d8Smrg				 WRITE_MASK(1),
2949921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2950921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2951921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2952921a55d8Smrg				 DST_GPR(0),
2953921a55d8Smrg				 DST_REL(ABSOLUTE),
2954921a55d8Smrg				 DST_ELEM(ELEM_X),
2955921a55d8Smrg				 CLAMP(0));
2956921a55d8Smrg    /* 25 */
2957921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2958921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2959921a55d8Smrg			     SRC0_ELEM(ELEM_X),
2960921a55d8Smrg			     SRC0_NEG(0),
2961921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2962921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2963921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2964921a55d8Smrg			     SRC1_NEG(0),
2965921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2966921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2967921a55d8Smrg			     LAST(0));
2968921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2969921a55d8Smrg				 SRC1_ABS(0),
2970921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2971921a55d8Smrg				 UPDATE_PRED(0),
2972921a55d8Smrg				 WRITE_MASK(1),
2973921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2974921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2975921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2976921a55d8Smrg				 DST_GPR(0),
2977921a55d8Smrg				 DST_REL(ABSOLUTE),
2978921a55d8Smrg				 DST_ELEM(ELEM_Y),
2979921a55d8Smrg				 CLAMP(0));
2980921a55d8Smrg    /* 26 */
2981921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2982921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2983921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
2984921a55d8Smrg			     SRC0_NEG(0),
2985921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2986921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2987921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2988921a55d8Smrg			     SRC1_NEG(0),
2989921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2990921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2991921a55d8Smrg			     LAST(0));
2992921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2993921a55d8Smrg				 SRC1_ABS(0),
2994921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2995921a55d8Smrg				 UPDATE_PRED(0),
2996921a55d8Smrg				 WRITE_MASK(0),
2997921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2998921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2999921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3000921a55d8Smrg				 DST_GPR(0),
3001921a55d8Smrg				 DST_REL(ABSOLUTE),
3002921a55d8Smrg				 DST_ELEM(ELEM_Z),
3003921a55d8Smrg				 CLAMP(0));
3004921a55d8Smrg    /* 27 */
3005921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3006921a55d8Smrg			     SRC0_REL(ABSOLUTE),
3007921a55d8Smrg			     SRC0_ELEM(ELEM_X),
3008921a55d8Smrg			     SRC0_NEG(0),
3009921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
3010921a55d8Smrg			     SRC1_REL(ABSOLUTE),
3011921a55d8Smrg			     SRC1_ELEM(ELEM_X),
3012921a55d8Smrg			     SRC1_NEG(0),
3013921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
3014921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
3015921a55d8Smrg			     LAST(1));
3016921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3017921a55d8Smrg				 SRC1_ABS(0),
3018921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
3019921a55d8Smrg				 UPDATE_PRED(0),
3020921a55d8Smrg				 WRITE_MASK(0),
3021921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
3022921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3023921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3024921a55d8Smrg				 DST_GPR(0),
3025921a55d8Smrg				 DST_REL(ABSOLUTE),
3026921a55d8Smrg				 DST_ELEM(ELEM_W),
3027921a55d8Smrg				 CLAMP(0));
3028921a55d8Smrg
3029921a55d8Smrg    /* 28/29 - src - mask */
3030921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3031921a55d8Smrg			     INST_MOD(0),
3032921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
3033921a55d8Smrg			     RESOURCE_ID(0),
3034921a55d8Smrg			     SRC_GPR(1),
3035921a55d8Smrg			     SRC_REL(ABSOLUTE),
3036921a55d8Smrg			     ALT_CONST(0),
3037921a55d8Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3038921a55d8Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3039921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
3040921a55d8Smrg			     DST_REL(ABSOLUTE),
3041921a55d8Smrg			     DST_SEL_X(SQ_SEL_X),
3042921a55d8Smrg			     DST_SEL_Y(SQ_SEL_Y),
3043921a55d8Smrg			     DST_SEL_Z(SQ_SEL_Z),
3044921a55d8Smrg			     DST_SEL_W(SQ_SEL_W),
3045921a55d8Smrg			     LOD_BIAS(0),
3046921a55d8Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
3047921a55d8Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
3048921a55d8Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
3049921a55d8Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
3050921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3051921a55d8Smrg			     OFFSET_Y(0),
3052921a55d8Smrg			     OFFSET_Z(0),
3053921a55d8Smrg			     SAMPLER_ID(0),
3054921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
3055921a55d8Smrg			     SRC_SEL_Y(SQ_SEL_Y),
3056921a55d8Smrg			     SRC_SEL_Z(SQ_SEL_0),
3057921a55d8Smrg			     SRC_SEL_W(SQ_SEL_1));
3058921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
3059921a55d8Smrg    /* 30/31 - mask */
3060921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3061921a55d8Smrg			     INST_MOD(0),
3062921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
3063921a55d8Smrg			     RESOURCE_ID(1),
3064921a55d8Smrg			     SRC_GPR(0),
3065921a55d8Smrg			     SRC_REL(ABSOLUTE),
3066921a55d8Smrg                             ALT_CONST(0),
3067921a55d8Smrg                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3068921a55d8Smrg                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3069921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
3070921a55d8Smrg			     DST_REL(ABSOLUTE),
3071921a55d8Smrg			     DST_SEL_X(SQ_SEL_X),
3072921a55d8Smrg			     DST_SEL_Y(SQ_SEL_Y),
3073921a55d8Smrg			     DST_SEL_Z(SQ_SEL_Z),
3074921a55d8Smrg			     DST_SEL_W(SQ_SEL_W),
3075921a55d8Smrg			     LOD_BIAS(0),
3076921a55d8Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
3077921a55d8Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
3078921a55d8Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
3079921a55d8Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
3080921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3081921a55d8Smrg			     OFFSET_Y(0),
3082921a55d8Smrg			     OFFSET_Z(0),
3083921a55d8Smrg			     SAMPLER_ID(1),
3084921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
3085921a55d8Smrg			     SRC_SEL_Y(SQ_SEL_Y),
3086921a55d8Smrg			     SRC_SEL_Z(SQ_SEL_0),
3087921a55d8Smrg			     SRC_SEL_W(SQ_SEL_1));
3088921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
3089921a55d8Smrg
3090921a55d8Smrg    /* 32/33 - src - non-mask */
3091921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3092921a55d8Smrg			     INST_MOD(0),
3093921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
3094921a55d8Smrg			     RESOURCE_ID(0),
3095921a55d8Smrg			     SRC_GPR(0),
3096921a55d8Smrg			     SRC_REL(ABSOLUTE),
3097921a55d8Smrg			     ALT_CONST(0),
3098921a55d8Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3099921a55d8Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3100921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
3101921a55d8Smrg			     DST_REL(ABSOLUTE),
3102921a55d8Smrg			     DST_SEL_X(SQ_SEL_X),
3103921a55d8Smrg			     DST_SEL_Y(SQ_SEL_Y),
3104921a55d8Smrg			     DST_SEL_Z(SQ_SEL_Z),
3105921a55d8Smrg			     DST_SEL_W(SQ_SEL_W),
3106921a55d8Smrg			     LOD_BIAS(0),
3107921a55d8Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
3108921a55d8Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
3109921a55d8Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
3110921a55d8Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
3111921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3112921a55d8Smrg			     OFFSET_Y(0),
3113921a55d8Smrg			     OFFSET_Z(0),
3114921a55d8Smrg			     SAMPLER_ID(0),
3115921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
3116921a55d8Smrg			     SRC_SEL_Y(SQ_SEL_Y),
3117921a55d8Smrg			     SRC_SEL_Z(SQ_SEL_0),
3118921a55d8Smrg			     SRC_SEL_W(SQ_SEL_1));
3119921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
3120921a55d8Smrg
3121921a55d8Smrg    return i;
3122921a55d8Smrg}
3123921a55d8Smrg
3124921a55d8Smrg#endif
3125