1921a55d8Smrg/*
2921a55d8Smrg * Copyright 2010 Advanced Micro Devices, Inc.
3921a55d8Smrg *
4921a55d8Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5921a55d8Smrg * copy of this software and associated documentation files (the "Software"),
6921a55d8Smrg * to deal in the Software without restriction, including without limitation
7921a55d8Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8921a55d8Smrg * and/or sell copies of the Software, and to permit persons to whom the
9921a55d8Smrg * Software is furnished to do so, subject to the following conditions:
10921a55d8Smrg *
11921a55d8Smrg * The above copyright notice and this permission notice (including the next
12921a55d8Smrg * paragraph) shall be included in all copies or substantial portions of the
13921a55d8Smrg * Software.
14921a55d8Smrg *
15921a55d8Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16921a55d8Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17921a55d8Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18921a55d8Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19921a55d8Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20921a55d8Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21921a55d8Smrg * SOFTWARE.
22921a55d8Smrg *
23921a55d8Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24921a55d8Smrg *
25921a55d8Smrg */
26921a55d8Smrg
27921a55d8Smrg#ifdef HAVE_CONFIG_H
28921a55d8Smrg#include "config.h"
29921a55d8Smrg#endif
30921a55d8Smrg
3143df4709Smrg#ifdef XF86DRM_MODE
3243df4709Smrg
33921a55d8Smrg#include "xf86.h"
34921a55d8Smrg
35921a55d8Smrg#include "evergreen_shader.h"
36921a55d8Smrg#include "evergreen_reg.h"
37921a55d8Smrg
38921a55d8Smrg/* solid vs --------------------------------------- */
39921a55d8Smrgint evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
40921a55d8Smrg{
41921a55d8Smrg    int i = 0;
42921a55d8Smrg
43921a55d8Smrg    /* 0 */
44921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(4),
45921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
46921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
47921a55d8Smrg			    CF_CONST(0),
48921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
49921a55d8Smrg			    I_COUNT(1),
50921a55d8Smrg			    VALID_PIXEL_MODE(0),
51921a55d8Smrg			    END_OF_PROGRAM(0),
52921a55d8Smrg			    CF_INST(SQ_CF_INST_VC),
53921a55d8Smrg			    WHOLE_QUAD_MODE(0),
54921a55d8Smrg			    BARRIER(1));
55921a55d8Smrg    /* 1 */
56921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
57921a55d8Smrg					  TYPE(SQ_EXPORT_POS),
58921a55d8Smrg					  RW_GPR(1),
59921a55d8Smrg					  RW_REL(ABSOLUTE),
60921a55d8Smrg					  INDEX_GPR(0),
61921a55d8Smrg					  ELEM_SIZE(0));
62921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
63921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
64921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
65921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
66921a55d8Smrg					       BURST_COUNT(1),
67921a55d8Smrg					       VALID_PIXEL_MODE(0),
68921a55d8Smrg					       END_OF_PROGRAM(0),
69921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
70921a55d8Smrg					       MARK(0),
71921a55d8Smrg					       BARRIER(1));
72921a55d8Smrg    /* 2 - always export a param whether it's used or not */
73921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
74921a55d8Smrg					  TYPE(SQ_EXPORT_PARAM),
75921a55d8Smrg					  RW_GPR(0),
76921a55d8Smrg					  RW_REL(ABSOLUTE),
77921a55d8Smrg					  INDEX_GPR(0),
78921a55d8Smrg					  ELEM_SIZE(0));
79921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
80921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
81921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
82921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
83921a55d8Smrg					       BURST_COUNT(0),
84921a55d8Smrg					       VALID_PIXEL_MODE(0),
85921a55d8Smrg					       END_OF_PROGRAM(1),
86921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
87921a55d8Smrg					       MARK(0),
88921a55d8Smrg					       BARRIER(0));
89921a55d8Smrg    /* 3 - padding */
90921a55d8Smrg    shader[i++] = 0x00000000;
91921a55d8Smrg    shader[i++] = 0x00000000;
92921a55d8Smrg    /* 4/5 */
93921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
94921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
95921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
96921a55d8Smrg			     BUFFER_ID(0),
97921a55d8Smrg			     SRC_GPR(0),
98921a55d8Smrg			     SRC_REL(ABSOLUTE),
99921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
100921a55d8Smrg			     MEGA_FETCH_COUNT(8));
101921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
102921a55d8Smrg				 DST_REL(0),
103921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
104921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
105921a55d8Smrg				 DST_SEL_Z(SQ_SEL_0),
106921a55d8Smrg				 DST_SEL_W(SQ_SEL_1),
107921a55d8Smrg				 USE_CONST_FIELDS(0),
108921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
109921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
110921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
111921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
112921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
113b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
114b13dfe66Smrg			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
115b13dfe66Smrg#else
116b13dfe66Smrg			     ENDIAN_SWAP(SQ_ENDIAN_NONE),
117b13dfe66Smrg#endif
118921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
119921a55d8Smrg			     MEGA_FETCH(1),
120921a55d8Smrg			     ALT_CONST(0),
121921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
122921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
123921a55d8Smrg
124921a55d8Smrg    return i;
125921a55d8Smrg}
126921a55d8Smrg
127921a55d8Smrg/* solid ps --------------------------------------- */
128921a55d8Smrgint evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
129921a55d8Smrg{
130921a55d8Smrg    int i = 0;
131921a55d8Smrg
132921a55d8Smrg    /* 0 */
133921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(2),
134921a55d8Smrg				KCACHE_BANK0(0),
135921a55d8Smrg				KCACHE_BANK1(0),
136921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
137921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
138921a55d8Smrg				KCACHE_ADDR0(0),
139921a55d8Smrg				KCACHE_ADDR1(0),
140921a55d8Smrg				I_COUNT(4),
141921a55d8Smrg				ALT_CONST(0),
142921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
143921a55d8Smrg				WHOLE_QUAD_MODE(0),
144921a55d8Smrg				BARRIER(1));
145921a55d8Smrg    /* 1 */
146921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
147921a55d8Smrg					  TYPE(SQ_EXPORT_PIXEL),
148921a55d8Smrg					  RW_GPR(0),
149921a55d8Smrg					  RW_REL(ABSOLUTE),
150921a55d8Smrg					  INDEX_GPR(0),
151921a55d8Smrg					  ELEM_SIZE(1));
152921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
153921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
154921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
155921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
156921a55d8Smrg					       BURST_COUNT(1),
157921a55d8Smrg					       VALID_PIXEL_MODE(0),
158921a55d8Smrg					       END_OF_PROGRAM(1),
159921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
160921a55d8Smrg					       MARK(0),
161921a55d8Smrg					       BARRIER(1));
162921a55d8Smrg
163921a55d8Smrg    /* 2 */
164921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
165921a55d8Smrg			     SRC0_REL(ABSOLUTE),
166921a55d8Smrg			     SRC0_ELEM(ELEM_X),
167921a55d8Smrg			     SRC0_NEG(0),
168921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
169921a55d8Smrg			     SRC1_REL(ABSOLUTE),
170921a55d8Smrg			     SRC1_ELEM(ELEM_X),
171921a55d8Smrg			     SRC1_NEG(0),
172921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
173921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
174921a55d8Smrg			     LAST(0));
175921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
176921a55d8Smrg				 SRC1_ABS(0),
177921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
178921a55d8Smrg				 UPDATE_PRED(0),
179921a55d8Smrg				 WRITE_MASK(1),
180921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
181921a55d8Smrg				 ALU_INST(SQ_OP2_INST_MOV),
182921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
183921a55d8Smrg				 DST_GPR(0),
184921a55d8Smrg				 DST_REL(ABSOLUTE),
185921a55d8Smrg				 DST_ELEM(ELEM_X),
186921a55d8Smrg				 CLAMP(1));
187921a55d8Smrg    /* 3 */
188921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
189921a55d8Smrg			     SRC0_REL(ABSOLUTE),
190921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
191921a55d8Smrg			     SRC0_NEG(0),
192921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
193921a55d8Smrg			     SRC1_REL(ABSOLUTE),
194921a55d8Smrg			     SRC1_ELEM(ELEM_Y),
195921a55d8Smrg			     SRC1_NEG(0),
196921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
197921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
198921a55d8Smrg			     LAST(0));
199921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
200921a55d8Smrg				 SRC1_ABS(0),
201921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
202921a55d8Smrg				 UPDATE_PRED(0),
203921a55d8Smrg				 WRITE_MASK(1),
204921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
205921a55d8Smrg				 ALU_INST(SQ_OP2_INST_MOV),
206921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
207921a55d8Smrg				 DST_GPR(0),
208921a55d8Smrg				 DST_REL(ABSOLUTE),
209921a55d8Smrg				 DST_ELEM(ELEM_Y),
210921a55d8Smrg				 CLAMP(1));
211921a55d8Smrg    /* 4 */
212921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
213921a55d8Smrg			     SRC0_REL(ABSOLUTE),
214921a55d8Smrg			     SRC0_ELEM(ELEM_Z),
215921a55d8Smrg			     SRC0_NEG(0),
216921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
217921a55d8Smrg			     SRC1_REL(ABSOLUTE),
218921a55d8Smrg			     SRC1_ELEM(ELEM_Z),
219921a55d8Smrg			     SRC1_NEG(0),
220921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
221921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
222921a55d8Smrg			     LAST(0));
223921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
224921a55d8Smrg				 SRC1_ABS(0),
225921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
226921a55d8Smrg				 UPDATE_PRED(0),
227921a55d8Smrg				 WRITE_MASK(1),
228921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
229921a55d8Smrg				 ALU_INST(SQ_OP2_INST_MOV),
230921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
231921a55d8Smrg				 DST_GPR(0),
232921a55d8Smrg				 DST_REL(ABSOLUTE),
233921a55d8Smrg				 DST_ELEM(ELEM_Z),
234921a55d8Smrg				 CLAMP(1));
235921a55d8Smrg    /* 5 */
236921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
237921a55d8Smrg			     SRC0_REL(ABSOLUTE),
238921a55d8Smrg			     SRC0_ELEM(ELEM_W),
239921a55d8Smrg			     SRC0_NEG(0),
240921a55d8Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
241921a55d8Smrg			     SRC1_REL(ABSOLUTE),
242921a55d8Smrg			     SRC1_ELEM(ELEM_W),
243921a55d8Smrg			     SRC1_NEG(0),
244921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
245921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
246921a55d8Smrg			     LAST(1));
247921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
248921a55d8Smrg				 SRC1_ABS(0),
249921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
250921a55d8Smrg				 UPDATE_PRED(0),
251921a55d8Smrg				 WRITE_MASK(1),
252921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
253921a55d8Smrg				 ALU_INST(SQ_OP2_INST_MOV),
254921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
255921a55d8Smrg				 DST_GPR(0),
256921a55d8Smrg				 DST_REL(ABSOLUTE),
257921a55d8Smrg				 DST_ELEM(ELEM_W),
258921a55d8Smrg				 CLAMP(1));
259921a55d8Smrg
260921a55d8Smrg    return i;
261921a55d8Smrg}
262921a55d8Smrg
263921a55d8Smrg/* copy vs --------------------------------------- */
264921a55d8Smrgint evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
265921a55d8Smrg{
266921a55d8Smrg    int i = 0;
267921a55d8Smrg
268921a55d8Smrg    /* 0 */
269921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(4),
270921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
271921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
272921a55d8Smrg			    CF_CONST(0),
273921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
274921a55d8Smrg			    I_COUNT(2),
275921a55d8Smrg			    VALID_PIXEL_MODE(0),
276921a55d8Smrg			    END_OF_PROGRAM(0),
277921a55d8Smrg			    CF_INST(SQ_CF_INST_VC),
278921a55d8Smrg			    WHOLE_QUAD_MODE(0),
279921a55d8Smrg			    BARRIER(1));
280921a55d8Smrg    /* 1 */
281921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
282921a55d8Smrg					  TYPE(SQ_EXPORT_POS),
283921a55d8Smrg					  RW_GPR(1),
284921a55d8Smrg					  RW_REL(ABSOLUTE),
285921a55d8Smrg					  INDEX_GPR(0),
286921a55d8Smrg					  ELEM_SIZE(0));
287921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
288921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
289921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
290921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
291921a55d8Smrg					       BURST_COUNT(0),
292921a55d8Smrg					       VALID_PIXEL_MODE(0),
293921a55d8Smrg					       END_OF_PROGRAM(0),
294921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
295921a55d8Smrg					       MARK(0),
296921a55d8Smrg					       BARRIER(1));
297921a55d8Smrg    /* 2 */
298921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
299921a55d8Smrg					  TYPE(SQ_EXPORT_PARAM),
300921a55d8Smrg					  RW_GPR(0),
301921a55d8Smrg					  RW_REL(ABSOLUTE),
302921a55d8Smrg					  INDEX_GPR(0),
303921a55d8Smrg					  ELEM_SIZE(0));
304921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
305921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
306921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
307921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
308921a55d8Smrg					       BURST_COUNT(0),
309921a55d8Smrg					       VALID_PIXEL_MODE(0),
310921a55d8Smrg					       END_OF_PROGRAM(1),
311921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
312921a55d8Smrg					       MARK(0),
313921a55d8Smrg					       BARRIER(0));
314921a55d8Smrg    /* 3 */
315921a55d8Smrg    shader[i++] = 0x00000000;
316921a55d8Smrg    shader[i++] = 0x00000000;
317921a55d8Smrg    /* 4/5 */
318921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
319921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
320921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
321921a55d8Smrg			     BUFFER_ID(0),
322921a55d8Smrg			     SRC_GPR(0),
323921a55d8Smrg			     SRC_REL(ABSOLUTE),
324921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
325921a55d8Smrg			     MEGA_FETCH_COUNT(16));
326921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
327921a55d8Smrg				 DST_REL(0),
328921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
329921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
330921a55d8Smrg				 DST_SEL_Z(SQ_SEL_0),
331921a55d8Smrg				 DST_SEL_W(SQ_SEL_1),
332921a55d8Smrg				 USE_CONST_FIELDS(0),
333921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
334921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
335921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
336921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
337921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
338b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
339b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
340b13dfe66Smrg#else
341b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
342b13dfe66Smrg#endif
343921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
344921a55d8Smrg			     MEGA_FETCH(1),
345921a55d8Smrg			     ALT_CONST(0),
346921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
347921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
348921a55d8Smrg    /* 6/7 */
349921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
350921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
351921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
352921a55d8Smrg			     BUFFER_ID(0),
353921a55d8Smrg			     SRC_GPR(0),
354921a55d8Smrg			     SRC_REL(ABSOLUTE),
355921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
356921a55d8Smrg			     MEGA_FETCH_COUNT(8));
357921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
358921a55d8Smrg				 DST_REL(0),
359921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
360921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
361921a55d8Smrg				 DST_SEL_Z(SQ_SEL_0),
362921a55d8Smrg				 DST_SEL_W(SQ_SEL_1),
363921a55d8Smrg				 USE_CONST_FIELDS(0),
364921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
365921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
366921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
367921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
368921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
369b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
370b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
371b13dfe66Smrg#else
372b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
373b13dfe66Smrg#endif
374921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
375921a55d8Smrg			     MEGA_FETCH(0),
376921a55d8Smrg			     ALT_CONST(0),
377921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
378921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
379921a55d8Smrg
380921a55d8Smrg    return i;
381921a55d8Smrg}
382921a55d8Smrg
383921a55d8Smrg/* copy ps --------------------------------------- */
384921a55d8Smrgint evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
385921a55d8Smrg{
386921a55d8Smrg    int i = 0;
387921a55d8Smrg
388921a55d8Smrg    /* CF INST 0 */
389921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(3),
390921a55d8Smrg				KCACHE_BANK0(0),
391921a55d8Smrg				KCACHE_BANK1(0),
392921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
393921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
394921a55d8Smrg				KCACHE_ADDR0(0),
395921a55d8Smrg				KCACHE_ADDR1(0),
396921a55d8Smrg				I_COUNT(4),
397921a55d8Smrg				ALT_CONST(0),
398921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
399921a55d8Smrg				WHOLE_QUAD_MODE(0),
400921a55d8Smrg				BARRIER(1));
401921a55d8Smrg    /* CF INST 1 */
402921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(8),
403921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
404921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
405921a55d8Smrg			    CF_CONST(0),
406921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
407921a55d8Smrg			    I_COUNT(1),
408921a55d8Smrg			    VALID_PIXEL_MODE(0),
409921a55d8Smrg			    END_OF_PROGRAM(0),
410921a55d8Smrg			    CF_INST(SQ_CF_INST_TC),
411921a55d8Smrg			    WHOLE_QUAD_MODE(0),
412921a55d8Smrg			    BARRIER(1));
413921a55d8Smrg    /* CF INST 2 */
414921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
415921a55d8Smrg					  TYPE(SQ_EXPORT_PIXEL),
416921a55d8Smrg					  RW_GPR(0),
417921a55d8Smrg					  RW_REL(ABSOLUTE),
418921a55d8Smrg					  INDEX_GPR(0),
419921a55d8Smrg					  ELEM_SIZE(1));
420921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
421921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
422921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_Z),
423921a55d8Smrg					       SRC_SEL_W(SQ_SEL_W),
424921a55d8Smrg					       BURST_COUNT(1),
425921a55d8Smrg					       VALID_PIXEL_MODE(0),
426921a55d8Smrg					       END_OF_PROGRAM(1),
427921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
428921a55d8Smrg					       MARK(0),
429921a55d8Smrg					       BARRIER(1));
430921a55d8Smrg
431921a55d8Smrg    /* 3 interpolate tex coords */
432921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
433921a55d8Smrg			     SRC0_REL(ABSOLUTE),
434921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
435921a55d8Smrg			     SRC0_NEG(0),
436921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
437921a55d8Smrg			     SRC1_REL(ABSOLUTE),
438921a55d8Smrg			     SRC1_ELEM(ELEM_X),
439921a55d8Smrg			     SRC1_NEG(0),
440921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
441921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
442921a55d8Smrg			     LAST(0));
443921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
444921a55d8Smrg				 SRC1_ABS(0),
445921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
446921a55d8Smrg				 UPDATE_PRED(0),
447921a55d8Smrg				 WRITE_MASK(1),
448921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
449921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
450921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
451921a55d8Smrg				 DST_GPR(0),
452921a55d8Smrg				 DST_REL(ABSOLUTE),
453921a55d8Smrg				 DST_ELEM(ELEM_X),
454921a55d8Smrg				 CLAMP(0));
455921a55d8Smrg    /* 4 */
456921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
457921a55d8Smrg			     SRC0_REL(ABSOLUTE),
458921a55d8Smrg			     SRC0_ELEM(ELEM_X),
459921a55d8Smrg			     SRC0_NEG(0),
460921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
461921a55d8Smrg			     SRC1_REL(ABSOLUTE),
462921a55d8Smrg			     SRC1_ELEM(ELEM_X),
463921a55d8Smrg			     SRC1_NEG(0),
464921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
465921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
466921a55d8Smrg			     LAST(0));
467921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
468921a55d8Smrg				 SRC1_ABS(0),
469921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
470921a55d8Smrg				 UPDATE_PRED(0),
471921a55d8Smrg				 WRITE_MASK(1),
472921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
473921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
474921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
475921a55d8Smrg				 DST_GPR(0),
476921a55d8Smrg				 DST_REL(ABSOLUTE),
477921a55d8Smrg				 DST_ELEM(ELEM_Y),
478921a55d8Smrg				 CLAMP(0));
479921a55d8Smrg    /* 5 */
480921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
481921a55d8Smrg			     SRC0_REL(ABSOLUTE),
482921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
483921a55d8Smrg			     SRC0_NEG(0),
484921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
485921a55d8Smrg			     SRC1_REL(ABSOLUTE),
486921a55d8Smrg			     SRC1_ELEM(ELEM_X),
487921a55d8Smrg			     SRC1_NEG(0),
488921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
489921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
490921a55d8Smrg			     LAST(0));
491921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
492921a55d8Smrg				 SRC1_ABS(0),
493921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
494921a55d8Smrg				 UPDATE_PRED(0),
495921a55d8Smrg				 WRITE_MASK(0),
496921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
497921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
498921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
499921a55d8Smrg				 DST_GPR(0),
500921a55d8Smrg				 DST_REL(ABSOLUTE),
501921a55d8Smrg				 DST_ELEM(ELEM_Z),
502921a55d8Smrg				 CLAMP(0));
503921a55d8Smrg    /* 6 */
504921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
505921a55d8Smrg			     SRC0_REL(ABSOLUTE),
506921a55d8Smrg			     SRC0_ELEM(ELEM_X),
507921a55d8Smrg			     SRC0_NEG(0),
508921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
509921a55d8Smrg			     SRC1_REL(ABSOLUTE),
510921a55d8Smrg			     SRC1_ELEM(ELEM_X),
511921a55d8Smrg			     SRC1_NEG(0),
512921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
513921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
514921a55d8Smrg			     LAST(1));
515921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
516921a55d8Smrg				 SRC1_ABS(0),
517921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
518921a55d8Smrg				 UPDATE_PRED(0),
519921a55d8Smrg				 WRITE_MASK(0),
520921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
521921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
522921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
523921a55d8Smrg				 DST_GPR(0),
524921a55d8Smrg				 DST_REL(ABSOLUTE),
525921a55d8Smrg				 DST_ELEM(ELEM_W),
526921a55d8Smrg				 CLAMP(0));
527921a55d8Smrg
528921a55d8Smrg    /* 7 */
529921a55d8Smrg    shader[i++] = 0x00000000;
530921a55d8Smrg    shader[i++] = 0x00000000;
531921a55d8Smrg
532921a55d8Smrg    /* 8/9 TEX INST 0 */
533921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
534921a55d8Smrg			     INST_MOD(0),
535921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
536921a55d8Smrg			     RESOURCE_ID(0),
537921a55d8Smrg			     SRC_GPR(0),
538921a55d8Smrg			     SRC_REL(ABSOLUTE),
539921a55d8Smrg			     ALT_CONST(0),
540921a55d8Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
541921a55d8Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
542921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
543921a55d8Smrg			     DST_REL(ABSOLUTE),
544921a55d8Smrg			     DST_SEL_X(SQ_SEL_X), /* R */
545921a55d8Smrg			     DST_SEL_Y(SQ_SEL_Y), /* G */
546921a55d8Smrg			     DST_SEL_Z(SQ_SEL_Z), /* B */
547921a55d8Smrg			     DST_SEL_W(SQ_SEL_W), /* A */
548921a55d8Smrg			     LOD_BIAS(0),
549921a55d8Smrg			     COORD_TYPE_X(TEX_UNNORMALIZED),
550921a55d8Smrg			     COORD_TYPE_Y(TEX_UNNORMALIZED),
551921a55d8Smrg			     COORD_TYPE_Z(TEX_UNNORMALIZED),
552921a55d8Smrg			     COORD_TYPE_W(TEX_UNNORMALIZED));
553921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
554921a55d8Smrg			     OFFSET_Y(0),
555921a55d8Smrg			     OFFSET_Z(0),
556921a55d8Smrg			     SAMPLER_ID(0),
557921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
558921a55d8Smrg			     SRC_SEL_Y(SQ_SEL_Y),
559921a55d8Smrg			     SRC_SEL_Z(SQ_SEL_0),
560921a55d8Smrg			     SRC_SEL_W(SQ_SEL_1));
561921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
562921a55d8Smrg
563921a55d8Smrg    return i;
564921a55d8Smrg}
565921a55d8Smrg
566921a55d8Smrgint evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
567921a55d8Smrg{
568921a55d8Smrg    int i = 0;
569921a55d8Smrg
570921a55d8Smrg    /* 0 */
571921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(6),
572921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
573921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
574921a55d8Smrg                            CF_CONST(0),
575921a55d8Smrg                            COND(SQ_CF_COND_ACTIVE),
576921a55d8Smrg                            I_COUNT(2),
577921a55d8Smrg                            VALID_PIXEL_MODE(0),
578921a55d8Smrg                            END_OF_PROGRAM(0),
579921a55d8Smrg                            CF_INST(SQ_CF_INST_VC),
580921a55d8Smrg                            WHOLE_QUAD_MODE(0),
581921a55d8Smrg                            BARRIER(1));
582921a55d8Smrg
583921a55d8Smrg    /* 1 - ALU */
584921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(4),
585921a55d8Smrg				KCACHE_BANK0(0),
586921a55d8Smrg				KCACHE_BANK1(0),
587921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
588921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
589921a55d8Smrg				KCACHE_ADDR0(0),
590921a55d8Smrg				KCACHE_ADDR1(0),
591921a55d8Smrg				I_COUNT(2),
592921a55d8Smrg				ALT_CONST(0),
593921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
594921a55d8Smrg				WHOLE_QUAD_MODE(0),
595921a55d8Smrg				BARRIER(1));
596921a55d8Smrg
597921a55d8Smrg    /* 2 */
598921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
599921a55d8Smrg                                          TYPE(SQ_EXPORT_POS),
600921a55d8Smrg                                          RW_GPR(1),
601921a55d8Smrg                                          RW_REL(ABSOLUTE),
602921a55d8Smrg                                          INDEX_GPR(0),
603921a55d8Smrg                                          ELEM_SIZE(3));
604921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
605921a55d8Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
606921a55d8Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
607921a55d8Smrg                                               SRC_SEL_W(SQ_SEL_W),
608921a55d8Smrg                                               BURST_COUNT(1),
609921a55d8Smrg                                               VALID_PIXEL_MODE(0),
610921a55d8Smrg                                               END_OF_PROGRAM(0),
611921a55d8Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
612921a55d8Smrg                                               MARK(0),
613921a55d8Smrg                                               BARRIER(1));
614921a55d8Smrg    /* 3 */
615921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
616921a55d8Smrg                                          TYPE(SQ_EXPORT_PARAM),
617921a55d8Smrg                                          RW_GPR(0),
618921a55d8Smrg                                          RW_REL(ABSOLUTE),
619921a55d8Smrg                                          INDEX_GPR(0),
620921a55d8Smrg                                          ELEM_SIZE(3));
621921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
622921a55d8Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
623921a55d8Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
624921a55d8Smrg                                               SRC_SEL_W(SQ_SEL_W),
625921a55d8Smrg                                               BURST_COUNT(1),
626921a55d8Smrg                                               VALID_PIXEL_MODE(0),
627921a55d8Smrg                                               END_OF_PROGRAM(1),
628921a55d8Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
629921a55d8Smrg                                               MARK(0),
630921a55d8Smrg                                               BARRIER(0));
631921a55d8Smrg
632921a55d8Smrg
633921a55d8Smrg    /* 4 texX / w */
634921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
635921a55d8Smrg                             SRC0_REL(ABSOLUTE),
636921a55d8Smrg                             SRC0_ELEM(ELEM_X),
637921a55d8Smrg                             SRC0_NEG(0),
638921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
639921a55d8Smrg                             SRC1_REL(ABSOLUTE),
640921a55d8Smrg                             SRC1_ELEM(ELEM_X),
641921a55d8Smrg                             SRC1_NEG(0),
642921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
643921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
644921a55d8Smrg                             LAST(0));
645921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
646921a55d8Smrg                                 SRC1_ABS(0),
647921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
648921a55d8Smrg                                 UPDATE_PRED(0),
649921a55d8Smrg                                 WRITE_MASK(1),
650921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
651921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
652921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
653921a55d8Smrg                                 DST_GPR(0),
654921a55d8Smrg                                 DST_REL(ABSOLUTE),
655921a55d8Smrg                                 DST_ELEM(ELEM_X),
656921a55d8Smrg                                 CLAMP(0));
657921a55d8Smrg
658921a55d8Smrg    /* 5 texY / h */
659921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
660921a55d8Smrg                             SRC0_REL(ABSOLUTE),
661921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
662921a55d8Smrg                             SRC0_NEG(0),
663921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
664921a55d8Smrg                             SRC1_REL(ABSOLUTE),
665921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
666921a55d8Smrg                             SRC1_NEG(0),
667921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
668921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
669921a55d8Smrg                             LAST(1));
670921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
671921a55d8Smrg                                 SRC1_ABS(0),
672921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
673921a55d8Smrg                                 UPDATE_PRED(0),
674921a55d8Smrg                                 WRITE_MASK(1),
675921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
676921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
677921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
678921a55d8Smrg                                 DST_GPR(0),
679921a55d8Smrg                                 DST_REL(ABSOLUTE),
680921a55d8Smrg                                 DST_ELEM(ELEM_Y),
681921a55d8Smrg                                 CLAMP(0));
682921a55d8Smrg
683921a55d8Smrg    /* 6/7 */
684921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
685921a55d8Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
686921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
687921a55d8Smrg                             BUFFER_ID(0),
688921a55d8Smrg                             SRC_GPR(0),
689921a55d8Smrg                             SRC_REL(ABSOLUTE),
690921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
691921a55d8Smrg                             MEGA_FETCH_COUNT(16));
692921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
693921a55d8Smrg                                 DST_REL(ABSOLUTE),
694921a55d8Smrg                                 DST_SEL_X(SQ_SEL_X),
695921a55d8Smrg                                 DST_SEL_Y(SQ_SEL_Y),
696921a55d8Smrg                                 DST_SEL_Z(SQ_SEL_0),
697921a55d8Smrg                                 DST_SEL_W(SQ_SEL_1),
698921a55d8Smrg                                 USE_CONST_FIELDS(0),
699921a55d8Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
700921a55d8Smrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
701921a55d8Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
702921a55d8Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
703921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
704b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
705b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
706b13dfe66Smrg#else
707b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
708b13dfe66Smrg#endif
709921a55d8Smrg                             CONST_BUF_NO_STRIDE(0),
710921a55d8Smrg                             MEGA_FETCH(1),
711921a55d8Smrg			     ALT_CONST(0),
712921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
713921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
714921a55d8Smrg    /* 8/9 */
715921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
716921a55d8Smrg                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
717921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
718921a55d8Smrg                             BUFFER_ID(0),
719921a55d8Smrg                             SRC_GPR(0),
720921a55d8Smrg                             SRC_REL(ABSOLUTE),
721921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
722921a55d8Smrg                             MEGA_FETCH_COUNT(8));
723921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
724921a55d8Smrg                                 DST_REL(ABSOLUTE),
725921a55d8Smrg                                 DST_SEL_X(SQ_SEL_X),
726921a55d8Smrg                                 DST_SEL_Y(SQ_SEL_Y),
727921a55d8Smrg                                 DST_SEL_Z(SQ_SEL_0),
728921a55d8Smrg                                 DST_SEL_W(SQ_SEL_1),
729921a55d8Smrg                                 USE_CONST_FIELDS(0),
730921a55d8Smrg                                 DATA_FORMAT(FMT_32_32_FLOAT),
731921a55d8Smrg                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
732921a55d8Smrg                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
733921a55d8Smrg                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
734921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
735b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
736b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
737b13dfe66Smrg#else
738b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
739b13dfe66Smrg#endif
740921a55d8Smrg                             CONST_BUF_NO_STRIDE(0),
741921a55d8Smrg                             MEGA_FETCH(0),
742921a55d8Smrg			     ALT_CONST(0),
743921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
744921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
745921a55d8Smrg
746921a55d8Smrg    return i;
747921a55d8Smrg}
748921a55d8Smrg
749921a55d8Smrgint evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
750921a55d8Smrg{
751921a55d8Smrg    int i = 0;
752921a55d8Smrg
753921a55d8Smrg    /* 0 */
754921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(5),
755921a55d8Smrg				KCACHE_BANK0(0),
756921a55d8Smrg				KCACHE_BANK1(0),
757921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
758921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
759921a55d8Smrg				KCACHE_ADDR0(0),
760921a55d8Smrg				KCACHE_ADDR1(0),
761921a55d8Smrg				I_COUNT(4),
762921a55d8Smrg				ALT_CONST(0),
763921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
764921a55d8Smrg				WHOLE_QUAD_MODE(0),
765921a55d8Smrg				BARRIER(1));
766921a55d8Smrg    /* 1 */
767921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(21),
768921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
769921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
770921a55d8Smrg                            CF_CONST(0),
771921a55d8Smrg                            COND(SQ_CF_COND_BOOL),
772921a55d8Smrg                            I_COUNT(0),
773921a55d8Smrg                            VALID_PIXEL_MODE(0),
774921a55d8Smrg                            END_OF_PROGRAM(0),
775921a55d8Smrg                            CF_INST(SQ_CF_INST_CALL),
776921a55d8Smrg                            WHOLE_QUAD_MODE(0),
777921a55d8Smrg                            BARRIER(0));
778921a55d8Smrg    /* 2 */
779921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(30),
780921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
781921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
782921a55d8Smrg                            CF_CONST(0),
783921a55d8Smrg                            COND(SQ_CF_COND_NOT_BOOL),
784921a55d8Smrg                            I_COUNT(0),
785921a55d8Smrg                            VALID_PIXEL_MODE(0),
786921a55d8Smrg                            END_OF_PROGRAM(0),
787921a55d8Smrg                            CF_INST(SQ_CF_INST_CALL),
788921a55d8Smrg                            WHOLE_QUAD_MODE(0),
789921a55d8Smrg                            BARRIER(0));
790921a55d8Smrg    /* 3 */
791921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(9),
792921a55d8Smrg                                KCACHE_BANK0(0),
793921a55d8Smrg                                KCACHE_BANK1(0),
794921a55d8Smrg                                KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
795921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
796921a55d8Smrg                                KCACHE_ADDR0(0),
797921a55d8Smrg                                KCACHE_ADDR1(0),
798921a55d8Smrg                                I_COUNT(12),
799921a55d8Smrg                                ALT_CONST(0),
800921a55d8Smrg                                CF_INST(SQ_CF_INST_ALU),
801921a55d8Smrg                                WHOLE_QUAD_MODE(0),
802921a55d8Smrg                                BARRIER(1));
803921a55d8Smrg    /* 4 */
804921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
805921a55d8Smrg                                          TYPE(SQ_EXPORT_PIXEL),
806921a55d8Smrg                                          RW_GPR(2),
807921a55d8Smrg                                          RW_REL(ABSOLUTE),
808921a55d8Smrg                                          INDEX_GPR(0),
809921a55d8Smrg                                          ELEM_SIZE(3));
810921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
811921a55d8Smrg                                               SRC_SEL_Y(SQ_SEL_Y),
812921a55d8Smrg                                               SRC_SEL_Z(SQ_SEL_Z),
813921a55d8Smrg                                               SRC_SEL_W(SQ_SEL_W),
814921a55d8Smrg                                               BURST_COUNT(1),
815921a55d8Smrg                                               VALID_PIXEL_MODE(0),
816921a55d8Smrg                                               END_OF_PROGRAM(1),
817921a55d8Smrg                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
818921a55d8Smrg                                               MARK(0),
819921a55d8Smrg                                               BARRIER(1));
820921a55d8Smrg    /* 5 interpolate tex coords */
821921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
822921a55d8Smrg			     SRC0_REL(ABSOLUTE),
823921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
824921a55d8Smrg			     SRC0_NEG(0),
825921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
826921a55d8Smrg			     SRC1_REL(ABSOLUTE),
827921a55d8Smrg			     SRC1_ELEM(ELEM_X),
828921a55d8Smrg			     SRC1_NEG(0),
829921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
830921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
831921a55d8Smrg			     LAST(0));
832921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
833921a55d8Smrg				 SRC1_ABS(0),
834921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
835921a55d8Smrg				 UPDATE_PRED(0),
836921a55d8Smrg				 WRITE_MASK(1),
837921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
838921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
839921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
840921a55d8Smrg				 DST_GPR(0),
841921a55d8Smrg				 DST_REL(ABSOLUTE),
842921a55d8Smrg				 DST_ELEM(ELEM_X),
843921a55d8Smrg				 CLAMP(0));
844921a55d8Smrg    /* 6 */
845921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
846921a55d8Smrg			     SRC0_REL(ABSOLUTE),
847921a55d8Smrg			     SRC0_ELEM(ELEM_X),
848921a55d8Smrg			     SRC0_NEG(0),
849921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
850921a55d8Smrg			     SRC1_REL(ABSOLUTE),
851921a55d8Smrg			     SRC1_ELEM(ELEM_X),
852921a55d8Smrg			     SRC1_NEG(0),
853921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
854921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
855921a55d8Smrg			     LAST(0));
856921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
857921a55d8Smrg				 SRC1_ABS(0),
858921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
859921a55d8Smrg				 UPDATE_PRED(0),
860921a55d8Smrg				 WRITE_MASK(1),
861921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
862921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
863921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
864921a55d8Smrg				 DST_GPR(0),
865921a55d8Smrg				 DST_REL(ABSOLUTE),
866921a55d8Smrg				 DST_ELEM(ELEM_Y),
867921a55d8Smrg				 CLAMP(0));
868921a55d8Smrg    /* 7 */
869921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
870921a55d8Smrg			     SRC0_REL(ABSOLUTE),
871921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
872921a55d8Smrg			     SRC0_NEG(0),
873921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
874921a55d8Smrg			     SRC1_REL(ABSOLUTE),
875921a55d8Smrg			     SRC1_ELEM(ELEM_X),
876921a55d8Smrg			     SRC1_NEG(0),
877921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
878921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
879921a55d8Smrg			     LAST(0));
880921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
881921a55d8Smrg				 SRC1_ABS(0),
882921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
883921a55d8Smrg				 UPDATE_PRED(0),
884921a55d8Smrg				 WRITE_MASK(0),
885921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
886921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
887921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
888921a55d8Smrg				 DST_GPR(0),
889921a55d8Smrg				 DST_REL(ABSOLUTE),
890921a55d8Smrg				 DST_ELEM(ELEM_Z),
891921a55d8Smrg				 CLAMP(0));
892921a55d8Smrg    /* 8 */
893921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
894921a55d8Smrg			     SRC0_REL(ABSOLUTE),
895921a55d8Smrg			     SRC0_ELEM(ELEM_X),
896921a55d8Smrg			     SRC0_NEG(0),
897921a55d8Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
898921a55d8Smrg			     SRC1_REL(ABSOLUTE),
899921a55d8Smrg			     SRC1_ELEM(ELEM_X),
900921a55d8Smrg			     SRC1_NEG(0),
901921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
902921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
903921a55d8Smrg			     LAST(1));
904921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
905921a55d8Smrg				 SRC1_ABS(0),
906921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
907921a55d8Smrg				 UPDATE_PRED(0),
908921a55d8Smrg				 WRITE_MASK(0),
909921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
910921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
911921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
912921a55d8Smrg				 DST_GPR(0),
913921a55d8Smrg				 DST_REL(ABSOLUTE),
914921a55d8Smrg				 DST_ELEM(ELEM_W),
915921a55d8Smrg				 CLAMP(0));
916921a55d8Smrg
917921a55d8Smrg    /* 9,10,11,12 */
918921a55d8Smrg    /* r2.x = MAD(c0.w, r1.x, c0.x) */
919921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
920921a55d8Smrg                             SRC0_REL(ABSOLUTE),
921921a55d8Smrg                             SRC0_ELEM(ELEM_W),
922921a55d8Smrg                             SRC0_NEG(0),
923921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
924921a55d8Smrg                             SRC1_REL(ABSOLUTE),
925921a55d8Smrg                             SRC1_ELEM(ELEM_X),
926921a55d8Smrg                             SRC1_NEG(0),
927921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
928921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
929921a55d8Smrg                             LAST(0));
930921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
931921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
932921a55d8Smrg                                 SRC2_ELEM(ELEM_X),
933921a55d8Smrg                                 SRC2_NEG(0),
934921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
935921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
936921a55d8Smrg                                 DST_GPR(2),
937921a55d8Smrg                                 DST_REL(ABSOLUTE),
938921a55d8Smrg                                 DST_ELEM(ELEM_X),
939921a55d8Smrg                                 CLAMP(0));
940921a55d8Smrg    /* r2.y = MAD(c0.w, r1.x, c0.y) */
941921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
942921a55d8Smrg                             SRC0_REL(ABSOLUTE),
943921a55d8Smrg                             SRC0_ELEM(ELEM_W),
944921a55d8Smrg                             SRC0_NEG(0),
945921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
946921a55d8Smrg                             SRC1_REL(ABSOLUTE),
947921a55d8Smrg                             SRC1_ELEM(ELEM_X),
948921a55d8Smrg                             SRC1_NEG(0),
949921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
950921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
951921a55d8Smrg                             LAST(0));
952921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
953921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
954921a55d8Smrg                                 SRC2_ELEM(ELEM_Y),
955921a55d8Smrg                                 SRC2_NEG(0),
956921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
957921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
958921a55d8Smrg                                 DST_GPR(2),
959921a55d8Smrg                                 DST_REL(ABSOLUTE),
960921a55d8Smrg                                 DST_ELEM(ELEM_Y),
961921a55d8Smrg                                 CLAMP(0));
962921a55d8Smrg    /* r2.z = MAD(c0.w, r1.x, c0.z) */
963921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
964921a55d8Smrg                             SRC0_REL(ABSOLUTE),
965921a55d8Smrg                             SRC0_ELEM(ELEM_W),
966921a55d8Smrg                             SRC0_NEG(0),
967921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
968921a55d8Smrg                             SRC1_REL(ABSOLUTE),
969921a55d8Smrg                             SRC1_ELEM(ELEM_X),
970921a55d8Smrg                             SRC1_NEG(0),
971921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
972921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
973921a55d8Smrg                             LAST(0));
974921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
975921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
976921a55d8Smrg                                 SRC2_ELEM(ELEM_Z),
977921a55d8Smrg                                 SRC2_NEG(0),
978921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
979921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
980921a55d8Smrg                                 DST_GPR(2),
981921a55d8Smrg                                 DST_REL(ABSOLUTE),
982921a55d8Smrg                                 DST_ELEM(ELEM_Z),
983921a55d8Smrg                                 CLAMP(0));
984921a55d8Smrg    /* r2.w = MAD(0, 0, 1) */
985921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
986921a55d8Smrg                             SRC0_REL(ABSOLUTE),
987921a55d8Smrg                             SRC0_ELEM(ELEM_X),
988921a55d8Smrg                             SRC0_NEG(0),
989921a55d8Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
990921a55d8Smrg                             SRC1_REL(ABSOLUTE),
991921a55d8Smrg                             SRC1_ELEM(ELEM_X),
992921a55d8Smrg                             SRC1_NEG(0),
993921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
994921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
995921a55d8Smrg                             LAST(1));
996921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
997921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
998921a55d8Smrg                                 SRC2_ELEM(ELEM_X),
999921a55d8Smrg                                 SRC2_NEG(0),
1000921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1001921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1002921a55d8Smrg                                 DST_GPR(2),
1003921a55d8Smrg                                 DST_REL(ABSOLUTE),
1004921a55d8Smrg                                 DST_ELEM(ELEM_W),
1005921a55d8Smrg                                 CLAMP(0));
1006921a55d8Smrg
1007921a55d8Smrg    /* 13,14,15,16 */
1008921a55d8Smrg    /* r2.x = MAD(c1.x, r1.y, pv.x) */
1009921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1010921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1011921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1012921a55d8Smrg                             SRC0_NEG(0),
1013921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1014921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1015921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1016921a55d8Smrg                             SRC1_NEG(0),
1017921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1018921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1019921a55d8Smrg                             LAST(0));
1020921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1021921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1022921a55d8Smrg                                 SRC2_ELEM(ELEM_X),
1023921a55d8Smrg                                 SRC2_NEG(0),
1024921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1025921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1026921a55d8Smrg                                 DST_GPR(2),
1027921a55d8Smrg                                 DST_REL(ABSOLUTE),
1028921a55d8Smrg                                 DST_ELEM(ELEM_X),
1029921a55d8Smrg                                 CLAMP(0));
1030921a55d8Smrg    /* r2.y = MAD(c1.y, r1.y, pv.y) */
1031921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1032921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1033921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1034921a55d8Smrg                             SRC0_NEG(0),
1035921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1036921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1037921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1038921a55d8Smrg                             SRC1_NEG(0),
1039921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1040921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1041921a55d8Smrg                             LAST(0));
1042921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1043921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1044921a55d8Smrg                                 SRC2_ELEM(ELEM_Y),
1045921a55d8Smrg                                 SRC2_NEG(0),
1046921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1047921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1048921a55d8Smrg                                 DST_GPR(2),
1049921a55d8Smrg                                 DST_REL(ABSOLUTE),
1050921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1051921a55d8Smrg                                 CLAMP(0));
1052921a55d8Smrg    /* r2.z = MAD(c1.z, r1.y, pv.z) */
1053921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1054921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1055921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1056921a55d8Smrg                             SRC0_NEG(0),
1057921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1058921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1059921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1060921a55d8Smrg                             SRC1_NEG(0),
1061921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1062921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1063921a55d8Smrg                             LAST(0));
1064921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1065921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1066921a55d8Smrg                                 SRC2_ELEM(ELEM_Z),
1067921a55d8Smrg                                 SRC2_NEG(0),
1068921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1069921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1070921a55d8Smrg                                 DST_GPR(2),
1071921a55d8Smrg                                 DST_REL(ABSOLUTE),
1072921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1073921a55d8Smrg                                 CLAMP(0));
1074921a55d8Smrg    /* r2.w = MAD(0, 0, 1) */
1075921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1076921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1077921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1078921a55d8Smrg                             SRC0_NEG(0),
1079921a55d8Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
1080921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1081921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1082921a55d8Smrg                             SRC1_NEG(0),
1083921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1084921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1085921a55d8Smrg                             LAST(1));
1086921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1087921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1088921a55d8Smrg                                 SRC2_ELEM(ELEM_W),
1089921a55d8Smrg                                 SRC2_NEG(0),
1090921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1091921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1092921a55d8Smrg                                 DST_GPR(2),
1093921a55d8Smrg                                 DST_REL(ABSOLUTE),
1094921a55d8Smrg                                 DST_ELEM(ELEM_W),
1095921a55d8Smrg                                 CLAMP(0));
1096921a55d8Smrg    /* 17,18,19,20 */
1097921a55d8Smrg    /* r2.x = MAD(c2.x, r1.z, pv.x) */
1098921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1099921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1100921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1101921a55d8Smrg                             SRC0_NEG(0),
1102921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1103921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1104921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1105921a55d8Smrg                             SRC1_NEG(0),
1106921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1107921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1108921a55d8Smrg                             LAST(0));
1109921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1110921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1111921a55d8Smrg                                 SRC2_ELEM(ELEM_X),
1112921a55d8Smrg                                 SRC2_NEG(0),
1113921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1114921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1115921a55d8Smrg                                 DST_GPR(2),
1116921a55d8Smrg                                 DST_REL(ABSOLUTE),
1117921a55d8Smrg                                 DST_ELEM(ELEM_X),
1118921a55d8Smrg                                 CLAMP(1));
1119921a55d8Smrg    /* r2.y = MAD(c2.y, r1.z, pv.y) */
1120921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1121921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1122921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1123921a55d8Smrg                             SRC0_NEG(0),
1124921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1125921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1126921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1127921a55d8Smrg                             SRC1_NEG(0),
1128921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1129921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1130921a55d8Smrg                             LAST(0));
1131921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1132921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1133921a55d8Smrg                                 SRC2_ELEM(ELEM_Y),
1134921a55d8Smrg                                 SRC2_NEG(0),
1135921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1136921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1137921a55d8Smrg                                 DST_GPR(2),
1138921a55d8Smrg                                 DST_REL(ABSOLUTE),
1139921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1140921a55d8Smrg                                 CLAMP(1));
1141921a55d8Smrg    /* r2.z = MAD(c2.z, r1.z, pv.z) */
1142921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1143921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1144921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1145921a55d8Smrg                             SRC0_NEG(0),
1146921a55d8Smrg                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1147921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1148921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1149921a55d8Smrg                             SRC1_NEG(0),
1150921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1151921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1152921a55d8Smrg                             LAST(0));
1153921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1154921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1155921a55d8Smrg                                 SRC2_ELEM(ELEM_Z),
1156921a55d8Smrg                                 SRC2_NEG(0),
1157921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1158921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1159921a55d8Smrg                                 DST_GPR(2),
1160921a55d8Smrg                                 DST_REL(ABSOLUTE),
1161921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1162921a55d8Smrg                                 CLAMP(1));
1163921a55d8Smrg    /* r2.w = MAD(0, 0, 1) */
1164921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1165921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1166921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1167921a55d8Smrg                             SRC0_NEG(0),
1168921a55d8Smrg                             SRC1_SEL(SQ_ALU_SRC_0),
1169921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1170921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1171921a55d8Smrg                             SRC1_NEG(0),
1172921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1173921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1174921a55d8Smrg                             LAST(1));
1175921a55d8Smrg    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1176921a55d8Smrg                                 SRC2_REL(ABSOLUTE),
1177921a55d8Smrg                                 SRC2_ELEM(ELEM_X),
1178921a55d8Smrg                                 SRC2_NEG(0),
1179921a55d8Smrg                                 ALU_INST(SQ_OP3_INST_MULADD),
1180921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1181921a55d8Smrg                                 DST_GPR(2),
1182921a55d8Smrg                                 DST_REL(ABSOLUTE),
1183921a55d8Smrg                                 DST_ELEM(ELEM_W),
1184921a55d8Smrg                                 CLAMP(1));
1185921a55d8Smrg
1186921a55d8Smrg    /* 21 */
1187921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(24),
1188921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1189921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1190921a55d8Smrg                            CF_CONST(0),
1191921a55d8Smrg                            COND(SQ_CF_COND_ACTIVE),
1192921a55d8Smrg                            I_COUNT(3),
1193921a55d8Smrg                            VALID_PIXEL_MODE(0),
1194921a55d8Smrg                            END_OF_PROGRAM(0),
1195921a55d8Smrg                            CF_INST(SQ_CF_INST_TC),
1196921a55d8Smrg                            WHOLE_QUAD_MODE(0),
1197921a55d8Smrg                            BARRIER(1));
1198921a55d8Smrg    /* 22 */
1199921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1200921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1201921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1202921a55d8Smrg			    CF_CONST(0),
1203921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
1204921a55d8Smrg			    I_COUNT(0),
1205921a55d8Smrg			    VALID_PIXEL_MODE(0),
1206921a55d8Smrg			    END_OF_PROGRAM(0),
1207921a55d8Smrg			    CF_INST(SQ_CF_INST_RETURN),
1208921a55d8Smrg			    WHOLE_QUAD_MODE(0),
1209921a55d8Smrg			    BARRIER(1));
1210921a55d8Smrg    /* 23 */
1211921a55d8Smrg    shader[i++] = 0x00000000;
1212921a55d8Smrg    shader[i++] = 0x00000000;
1213921a55d8Smrg    /* 24/25 */
1214921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1215921a55d8Smrg                             INST_MOD(0),
1216921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
1217921a55d8Smrg                             RESOURCE_ID(0),
1218921a55d8Smrg                             SRC_GPR(0),
1219921a55d8Smrg                             SRC_REL(ABSOLUTE),
1220921a55d8Smrg                             ALT_CONST(0),
1221921a55d8Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1222921a55d8Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1223921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1224921a55d8Smrg                             DST_REL(ABSOLUTE),
1225921a55d8Smrg                             DST_SEL_X(SQ_SEL_X),
1226921a55d8Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1227921a55d8Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1228921a55d8Smrg                             DST_SEL_W(SQ_SEL_1),
1229921a55d8Smrg                             LOD_BIAS(0),
1230921a55d8Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1231921a55d8Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1232921a55d8Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1233921a55d8Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1234921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1235921a55d8Smrg                             OFFSET_Y(0),
1236921a55d8Smrg                             OFFSET_Z(0),
1237921a55d8Smrg                             SAMPLER_ID(0),
1238921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
1239921a55d8Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1240921a55d8Smrg                             SRC_SEL_Z(SQ_SEL_0),
1241921a55d8Smrg                             SRC_SEL_W(SQ_SEL_1));
1242921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
1243921a55d8Smrg    /* 26/27 */
1244921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1245921a55d8Smrg                             INST_MOD(0),
1246921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
1247921a55d8Smrg                             RESOURCE_ID(1),
1248921a55d8Smrg                             SRC_GPR(0),
1249921a55d8Smrg                             SRC_REL(ABSOLUTE),
1250921a55d8Smrg                             ALT_CONST(0),
1251921a55d8Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1252921a55d8Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1253921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1254921a55d8Smrg                             DST_REL(ABSOLUTE),
1255921a55d8Smrg                             DST_SEL_X(SQ_SEL_MASK),
1256921a55d8Smrg                             DST_SEL_Y(SQ_SEL_MASK),
1257921a55d8Smrg                             DST_SEL_Z(SQ_SEL_X),
1258921a55d8Smrg                             DST_SEL_W(SQ_SEL_MASK),
1259921a55d8Smrg                             LOD_BIAS(0),
1260921a55d8Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1261921a55d8Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1262921a55d8Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1263921a55d8Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1264921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1265921a55d8Smrg                             OFFSET_Y(0),
1266921a55d8Smrg                             OFFSET_Z(0),
1267921a55d8Smrg                             SAMPLER_ID(1),
1268921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
1269921a55d8Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1270921a55d8Smrg                             SRC_SEL_Z(SQ_SEL_0),
1271921a55d8Smrg                             SRC_SEL_W(SQ_SEL_1));
1272921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
1273921a55d8Smrg    /* 28/29 */
1274921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1275921a55d8Smrg                             INST_MOD(0),
1276921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
1277921a55d8Smrg                             RESOURCE_ID(2),
1278921a55d8Smrg                             SRC_GPR(0),
1279921a55d8Smrg                             SRC_REL(ABSOLUTE),
1280921a55d8Smrg                             ALT_CONST(0),
1281921a55d8Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1282921a55d8Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1283921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1284921a55d8Smrg                             DST_REL(ABSOLUTE),
1285921a55d8Smrg                             DST_SEL_X(SQ_SEL_MASK),
1286921a55d8Smrg                             DST_SEL_Y(SQ_SEL_X),
1287921a55d8Smrg                             DST_SEL_Z(SQ_SEL_MASK),
1288921a55d8Smrg                             DST_SEL_W(SQ_SEL_MASK),
1289921a55d8Smrg                             LOD_BIAS(0),
1290921a55d8Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1291921a55d8Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1292921a55d8Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1293921a55d8Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1294921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1295921a55d8Smrg                             OFFSET_Y(0),
1296921a55d8Smrg                             OFFSET_Z(0),
1297921a55d8Smrg                             SAMPLER_ID(2),
1298921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
1299921a55d8Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1300921a55d8Smrg                             SRC_SEL_Z(SQ_SEL_0),
1301921a55d8Smrg                             SRC_SEL_W(SQ_SEL_1));
1302921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
1303921a55d8Smrg    /* 30 */
1304921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(32),
1305921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1306921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1307921a55d8Smrg                            CF_CONST(0),
1308921a55d8Smrg                            COND(SQ_CF_COND_ACTIVE),
130968105dcbSveego                            I_COUNT(1),
1310921a55d8Smrg                            VALID_PIXEL_MODE(0),
1311921a55d8Smrg                            END_OF_PROGRAM(0),
1312921a55d8Smrg                            CF_INST(SQ_CF_INST_TC),
1313921a55d8Smrg                            WHOLE_QUAD_MODE(0),
1314921a55d8Smrg                            BARRIER(1));
1315921a55d8Smrg    /* 31 */
1316921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1317921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1318921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1319921a55d8Smrg			    CF_CONST(0),
1320921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
1321921a55d8Smrg			    I_COUNT(0),
1322921a55d8Smrg			    VALID_PIXEL_MODE(0),
1323921a55d8Smrg			    END_OF_PROGRAM(0),
1324921a55d8Smrg			    CF_INST(SQ_CF_INST_RETURN),
1325921a55d8Smrg			    WHOLE_QUAD_MODE(0),
1326921a55d8Smrg			    BARRIER(1));
1327921a55d8Smrg    /* 32/33 */
1328921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1329921a55d8Smrg                             INST_MOD(0),
1330921a55d8Smrg                             FETCH_WHOLE_QUAD(0),
1331921a55d8Smrg                             RESOURCE_ID(0),
1332921a55d8Smrg                             SRC_GPR(0),
1333921a55d8Smrg                             SRC_REL(ABSOLUTE),
1334921a55d8Smrg                             ALT_CONST(0),
1335921a55d8Smrg                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1336921a55d8Smrg                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1337921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
1338921a55d8Smrg                             DST_REL(ABSOLUTE),
1339921a55d8Smrg                             DST_SEL_X(SQ_SEL_X),
134068105dcbSveego                             DST_SEL_Y(SQ_SEL_Y),
134168105dcbSveego                             DST_SEL_Z(SQ_SEL_Z),
1342921a55d8Smrg                             DST_SEL_W(SQ_SEL_1),
1343921a55d8Smrg                             LOD_BIAS(0),
1344921a55d8Smrg                             COORD_TYPE_X(TEX_NORMALIZED),
1345921a55d8Smrg                             COORD_TYPE_Y(TEX_NORMALIZED),
1346921a55d8Smrg                             COORD_TYPE_Z(TEX_NORMALIZED),
1347921a55d8Smrg                             COORD_TYPE_W(TEX_NORMALIZED));
1348921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1349921a55d8Smrg                             OFFSET_Y(0),
1350921a55d8Smrg                             OFFSET_Z(0),
1351921a55d8Smrg                             SAMPLER_ID(0),
1352921a55d8Smrg                             SRC_SEL_X(SQ_SEL_X),
1353921a55d8Smrg                             SRC_SEL_Y(SQ_SEL_Y),
1354921a55d8Smrg                             SRC_SEL_Z(SQ_SEL_0),
1355921a55d8Smrg                             SRC_SEL_W(SQ_SEL_1));
1356921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
1357921a55d8Smrg
1358921a55d8Smrg    return i;
1359921a55d8Smrg}
1360921a55d8Smrg
1361921a55d8Smrg/* comp vs --------------------------------------- */
1362921a55d8Smrgint evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1363921a55d8Smrg{
1364921a55d8Smrg    int i = 0;
1365921a55d8Smrg
1366921a55d8Smrg    /* 0 */
1367921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(3),
1368921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1369921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1370921a55d8Smrg                            CF_CONST(0),
1371921a55d8Smrg                            COND(SQ_CF_COND_BOOL),
1372921a55d8Smrg                            I_COUNT(0),
1373921a55d8Smrg                            VALID_PIXEL_MODE(0),
1374921a55d8Smrg                            END_OF_PROGRAM(0),
1375921a55d8Smrg                            CF_INST(SQ_CF_INST_CALL),
1376921a55d8Smrg                            WHOLE_QUAD_MODE(0),
1377921a55d8Smrg                            BARRIER(0));
1378921a55d8Smrg    /* 1 */
1379921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(9),
1380921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1381921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1382921a55d8Smrg                            CF_CONST(0),
1383921a55d8Smrg                            COND(SQ_CF_COND_NOT_BOOL),
1384921a55d8Smrg                            I_COUNT(0),
1385921a55d8Smrg                            VALID_PIXEL_MODE(0),
1386921a55d8Smrg                            END_OF_PROGRAM(0),
1387921a55d8Smrg                            CF_INST(SQ_CF_INST_CALL),
1388921a55d8Smrg                            WHOLE_QUAD_MODE(0),
1389921a55d8Smrg                            BARRIER(0));
1390921a55d8Smrg    /* 2 */
1391921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1392921a55d8Smrg                            JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1393921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1394921a55d8Smrg                            CF_CONST(0),
1395921a55d8Smrg                            COND(SQ_CF_COND_ACTIVE),
1396921a55d8Smrg                            I_COUNT(0),
1397921a55d8Smrg                            VALID_PIXEL_MODE(0),
1398921a55d8Smrg                            END_OF_PROGRAM(1),
1399921a55d8Smrg                            CF_INST(SQ_CF_INST_NOP),
1400921a55d8Smrg                            WHOLE_QUAD_MODE(0),
1401921a55d8Smrg                            BARRIER(1));
1402921a55d8Smrg    /* 3 - mask sub */
1403921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(44),
1404921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1405921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1406921a55d8Smrg			    CF_CONST(0),
1407921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
1408921a55d8Smrg			    I_COUNT(3),
1409921a55d8Smrg			    VALID_PIXEL_MODE(0),
1410921a55d8Smrg			    END_OF_PROGRAM(0),
1411921a55d8Smrg			    CF_INST(SQ_CF_INST_VC),
1412921a55d8Smrg			    WHOLE_QUAD_MODE(0),
1413921a55d8Smrg			    BARRIER(1));
1414921a55d8Smrg
1415921a55d8Smrg    /* 4 - ALU */
1416921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1417921a55d8Smrg				KCACHE_BANK0(0),
1418921a55d8Smrg				KCACHE_BANK1(0),
1419921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1420921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1421921a55d8Smrg				KCACHE_ADDR0(0),
1422921a55d8Smrg				KCACHE_ADDR1(0),
1423921a55d8Smrg				I_COUNT(20),
1424921a55d8Smrg				ALT_CONST(0),
1425921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
1426921a55d8Smrg				WHOLE_QUAD_MODE(0),
1427921a55d8Smrg				BARRIER(1));
1428921a55d8Smrg
1429921a55d8Smrg    /* 5 - dst */
1430921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1431921a55d8Smrg					  TYPE(SQ_EXPORT_POS),
1432921a55d8Smrg					  RW_GPR(2),
1433921a55d8Smrg					  RW_REL(ABSOLUTE),
1434921a55d8Smrg					  INDEX_GPR(0),
1435921a55d8Smrg					  ELEM_SIZE(0));
1436921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1437921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1438921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_0),
1439921a55d8Smrg					       SRC_SEL_W(SQ_SEL_1),
1440921a55d8Smrg					       BURST_COUNT(1),
1441921a55d8Smrg					       VALID_PIXEL_MODE(0),
1442921a55d8Smrg					       END_OF_PROGRAM(0),
1443921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1444921a55d8Smrg					       MARK(0),
1445921a55d8Smrg					       BARRIER(1));
1446921a55d8Smrg    /* 6 - src */
1447921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1448921a55d8Smrg					  TYPE(SQ_EXPORT_PARAM),
1449921a55d8Smrg					  RW_GPR(1),
1450921a55d8Smrg					  RW_REL(ABSOLUTE),
1451921a55d8Smrg					  INDEX_GPR(0),
1452921a55d8Smrg					  ELEM_SIZE(0));
1453921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1454921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1455921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_0),
1456921a55d8Smrg					       SRC_SEL_W(SQ_SEL_1),
1457921a55d8Smrg					       BURST_COUNT(1),
1458921a55d8Smrg					       VALID_PIXEL_MODE(0),
1459921a55d8Smrg					       END_OF_PROGRAM(0),
1460921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT),
1461921a55d8Smrg					       MARK(0),
1462921a55d8Smrg					       BARRIER(0));
1463921a55d8Smrg    /* 7 - mask */
1464921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1465921a55d8Smrg					  TYPE(SQ_EXPORT_PARAM),
1466921a55d8Smrg					  RW_GPR(0),
1467921a55d8Smrg					  RW_REL(ABSOLUTE),
1468921a55d8Smrg					  INDEX_GPR(0),
1469921a55d8Smrg					  ELEM_SIZE(0));
1470921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1471921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1472921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_0),
1473921a55d8Smrg					       SRC_SEL_W(SQ_SEL_1),
1474921a55d8Smrg					       BURST_COUNT(1),
1475921a55d8Smrg					       VALID_PIXEL_MODE(0),
1476921a55d8Smrg					       END_OF_PROGRAM(0),
1477921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1478921a55d8Smrg					       WHOLE_QUAD_MODE(0),
1479921a55d8Smrg					       BARRIER(0));
1480921a55d8Smrg    /* 8 */
1481921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1482921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1483921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1484921a55d8Smrg			    CF_CONST(0),
1485921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
1486921a55d8Smrg			    I_COUNT(0),
1487921a55d8Smrg			    VALID_PIXEL_MODE(0),
1488921a55d8Smrg			    END_OF_PROGRAM(0),
1489921a55d8Smrg			    CF_INST(SQ_CF_INST_RETURN),
1490921a55d8Smrg			    WHOLE_QUAD_MODE(0),
1491921a55d8Smrg			    BARRIER(1));
1492921a55d8Smrg    /* 9 - non-mask sub */
1493921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(50),
1494921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1495921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1496921a55d8Smrg			    CF_CONST(0),
1497921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
1498921a55d8Smrg			    I_COUNT(2),
1499921a55d8Smrg			    VALID_PIXEL_MODE(0),
1500921a55d8Smrg			    END_OF_PROGRAM(0),
1501921a55d8Smrg			    CF_INST(SQ_CF_INST_VC),
1502921a55d8Smrg			    WHOLE_QUAD_MODE(0),
1503921a55d8Smrg			    BARRIER(1));
1504921a55d8Smrg
1505921a55d8Smrg    /* 10 - ALU */
1506921a55d8Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(34),
1507921a55d8Smrg				KCACHE_BANK0(0),
1508921a55d8Smrg				KCACHE_BANK1(0),
1509921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1510921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1511921a55d8Smrg				KCACHE_ADDR0(0),
1512921a55d8Smrg				KCACHE_ADDR1(0),
1513921a55d8Smrg				I_COUNT(10),
1514921a55d8Smrg				ALT_CONST(0),
1515921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
1516921a55d8Smrg				WHOLE_QUAD_MODE(0),
1517921a55d8Smrg				BARRIER(1));
1518921a55d8Smrg
1519921a55d8Smrg    /* 11 - dst */
1520921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1521921a55d8Smrg					  TYPE(SQ_EXPORT_POS),
1522921a55d8Smrg					  RW_GPR(1),
1523921a55d8Smrg					  RW_REL(ABSOLUTE),
1524921a55d8Smrg					  INDEX_GPR(0),
1525921a55d8Smrg					  ELEM_SIZE(0));
1526921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1527921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1528921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_0),
1529921a55d8Smrg					       SRC_SEL_W(SQ_SEL_1),
1530921a55d8Smrg					       BURST_COUNT(0),
1531921a55d8Smrg					       VALID_PIXEL_MODE(0),
1532921a55d8Smrg					       END_OF_PROGRAM(0),
1533921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1534921a55d8Smrg					       MARK(0),
1535921a55d8Smrg					       BARRIER(1));
1536921a55d8Smrg    /* 12 - src */
1537921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1538921a55d8Smrg					  TYPE(SQ_EXPORT_PARAM),
1539921a55d8Smrg					  RW_GPR(0),
1540921a55d8Smrg					  RW_REL(ABSOLUTE),
1541921a55d8Smrg					  INDEX_GPR(0),
1542921a55d8Smrg					  ELEM_SIZE(0));
1543921a55d8Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1544921a55d8Smrg					       SRC_SEL_Y(SQ_SEL_Y),
1545921a55d8Smrg					       SRC_SEL_Z(SQ_SEL_0),
1546921a55d8Smrg					       SRC_SEL_W(SQ_SEL_1),
1547921a55d8Smrg					       BURST_COUNT(0),
1548921a55d8Smrg					       VALID_PIXEL_MODE(0),
1549921a55d8Smrg					       END_OF_PROGRAM(0),
1550921a55d8Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1551921a55d8Smrg					       MARK(0),
1552921a55d8Smrg					       BARRIER(0));
1553921a55d8Smrg    /* 13 */
1554921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
1555921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1556921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
1557921a55d8Smrg			    CF_CONST(0),
1558921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
1559921a55d8Smrg			    I_COUNT(0),
1560921a55d8Smrg			    VALID_PIXEL_MODE(0),
1561921a55d8Smrg			    END_OF_PROGRAM(0),
1562921a55d8Smrg			    CF_INST(SQ_CF_INST_RETURN),
1563921a55d8Smrg			    WHOLE_QUAD_MODE(0),
1564921a55d8Smrg			    BARRIER(1));
1565921a55d8Smrg
1566921a55d8Smrg    /* 14 srcX.x DOT4 - mask */
1567921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1568921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1569921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1570921a55d8Smrg                             SRC0_NEG(0),
1571921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1572921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1573921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1574921a55d8Smrg                             SRC1_NEG(0),
1575921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1576921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1577921a55d8Smrg                             LAST(0));
1578921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1579921a55d8Smrg                                 SRC1_ABS(0),
1580921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1581921a55d8Smrg                                 UPDATE_PRED(0),
1582921a55d8Smrg                                 WRITE_MASK(1),
1583921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1584921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1585921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1586921a55d8Smrg                                 DST_GPR(3),
1587921a55d8Smrg                                 DST_REL(ABSOLUTE),
1588921a55d8Smrg                                 DST_ELEM(ELEM_X),
1589921a55d8Smrg                                 CLAMP(0));
1590921a55d8Smrg
1591921a55d8Smrg    /* 15 srcX.y DOT4 - mask */
1592921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1593921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1594921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1595921a55d8Smrg                             SRC0_NEG(0),
1596921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1597921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1598921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1599921a55d8Smrg                             SRC1_NEG(0),
1600921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1601921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1602921a55d8Smrg                             LAST(0));
1603921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1604921a55d8Smrg                                 SRC1_ABS(0),
1605921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1606921a55d8Smrg                                 UPDATE_PRED(0),
1607921a55d8Smrg                                 WRITE_MASK(0),
1608921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1609921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1610921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1611921a55d8Smrg                                 DST_GPR(3),
1612921a55d8Smrg                                 DST_REL(ABSOLUTE),
1613921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1614921a55d8Smrg                                 CLAMP(0));
1615921a55d8Smrg
1616921a55d8Smrg    /* 16 srcX.z DOT4 - mask */
1617921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1618921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1619921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1620921a55d8Smrg                             SRC0_NEG(0),
1621921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1622921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1623921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1624921a55d8Smrg                             SRC1_NEG(0),
1625921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1626921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1627921a55d8Smrg                             LAST(0));
1628921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1629921a55d8Smrg                                 SRC1_ABS(0),
1630921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1631921a55d8Smrg                                 UPDATE_PRED(0),
1632921a55d8Smrg                                 WRITE_MASK(0),
1633921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1634921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1635921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1636921a55d8Smrg                                 DST_GPR(3),
1637921a55d8Smrg                                 DST_REL(ABSOLUTE),
1638921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1639921a55d8Smrg                                 CLAMP(0));
1640921a55d8Smrg
1641921a55d8Smrg    /* 17 srcX.w DOT4 - mask */
1642921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1643921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1644921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1645921a55d8Smrg                             SRC0_NEG(0),
1646921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1647921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1648921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1649921a55d8Smrg                             SRC1_NEG(0),
1650921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1651921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1652921a55d8Smrg                             LAST(1));
1653921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1654921a55d8Smrg                                 SRC1_ABS(0),
1655921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1656921a55d8Smrg                                 UPDATE_PRED(0),
1657921a55d8Smrg                                 WRITE_MASK(0),
1658921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1659921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1660921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1661921a55d8Smrg                                 DST_GPR(3),
1662921a55d8Smrg                                 DST_REL(ABSOLUTE),
1663921a55d8Smrg                                 DST_ELEM(ELEM_W),
1664921a55d8Smrg                                 CLAMP(0));
1665921a55d8Smrg
1666921a55d8Smrg    /* 18 srcY.x DOT4 - mask */
1667921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1668921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1669921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1670921a55d8Smrg                             SRC0_NEG(0),
1671921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1672921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1673921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1674921a55d8Smrg                             SRC1_NEG(0),
1675921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1676921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1677921a55d8Smrg                             LAST(0));
1678921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1679921a55d8Smrg                                 SRC1_ABS(0),
1680921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1681921a55d8Smrg                                 UPDATE_PRED(0),
1682921a55d8Smrg                                 WRITE_MASK(0),
1683921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1684921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1685921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1686921a55d8Smrg                                 DST_GPR(3),
1687921a55d8Smrg                                 DST_REL(ABSOLUTE),
1688921a55d8Smrg                                 DST_ELEM(ELEM_X),
1689921a55d8Smrg                                 CLAMP(0));
1690921a55d8Smrg
1691921a55d8Smrg    /* 19 srcY.y DOT4 - mask */
1692921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1693921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1694921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1695921a55d8Smrg                             SRC0_NEG(0),
1696921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1697921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1698921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1699921a55d8Smrg                             SRC1_NEG(0),
1700921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1701921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1702921a55d8Smrg                             LAST(0));
1703921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1704921a55d8Smrg                                 SRC1_ABS(0),
1705921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1706921a55d8Smrg                                 UPDATE_PRED(0),
1707921a55d8Smrg                                 WRITE_MASK(1),
1708921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1709921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1710921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1711921a55d8Smrg                                 DST_GPR(3),
1712921a55d8Smrg                                 DST_REL(ABSOLUTE),
1713921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1714921a55d8Smrg                                 CLAMP(0));
1715921a55d8Smrg
1716921a55d8Smrg    /* 20 srcY.z DOT4 - mask */
1717921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1718921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1719921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1720921a55d8Smrg                             SRC0_NEG(0),
1721921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1722921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1723921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1724921a55d8Smrg                             SRC1_NEG(0),
1725921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1726921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1727921a55d8Smrg                             LAST(0));
1728921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1729921a55d8Smrg                                 SRC1_ABS(0),
1730921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1731921a55d8Smrg                                 UPDATE_PRED(0),
1732921a55d8Smrg                                 WRITE_MASK(0),
1733921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1734921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1735921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1736921a55d8Smrg                                 DST_GPR(3),
1737921a55d8Smrg                                 DST_REL(ABSOLUTE),
1738921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1739921a55d8Smrg                                 CLAMP(0));
1740921a55d8Smrg
1741921a55d8Smrg    /* 21 srcY.w DOT4 - mask */
1742921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1743921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1744921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1745921a55d8Smrg                             SRC0_NEG(0),
1746921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1747921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1748921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1749921a55d8Smrg                             SRC1_NEG(0),
1750921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1751921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1752921a55d8Smrg                             LAST(1));
1753921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1754921a55d8Smrg                                 SRC1_ABS(0),
1755921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1756921a55d8Smrg                                 UPDATE_PRED(0),
1757921a55d8Smrg                                 WRITE_MASK(0),
1758921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1759921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1760921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1761921a55d8Smrg                                 DST_GPR(3),
1762921a55d8Smrg                                 DST_REL(ABSOLUTE),
1763921a55d8Smrg                                 DST_ELEM(ELEM_W),
1764921a55d8Smrg                                 CLAMP(0));
1765921a55d8Smrg
1766921a55d8Smrg    /* 22 maskX.x DOT4 - mask */
1767921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1768921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1769921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1770921a55d8Smrg                             SRC0_NEG(0),
1771921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1772921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1773921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1774921a55d8Smrg                             SRC1_NEG(0),
1775921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1776921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1777921a55d8Smrg                             LAST(0));
1778921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1779921a55d8Smrg                                 SRC1_ABS(0),
1780921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1781921a55d8Smrg                                 UPDATE_PRED(0),
1782921a55d8Smrg                                 WRITE_MASK(1),
1783921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1784921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1785921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1786921a55d8Smrg                                 DST_GPR(4),
1787921a55d8Smrg                                 DST_REL(ABSOLUTE),
1788921a55d8Smrg                                 DST_ELEM(ELEM_X),
1789921a55d8Smrg                                 CLAMP(0));
1790921a55d8Smrg
1791921a55d8Smrg    /* 23 maskX.y DOT4 - mask */
1792921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1793921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1794921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1795921a55d8Smrg                             SRC0_NEG(0),
1796921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1797921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1798921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1799921a55d8Smrg                             SRC1_NEG(0),
1800921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1801921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1802921a55d8Smrg                             LAST(0));
1803921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1804921a55d8Smrg                                 SRC1_ABS(0),
1805921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1806921a55d8Smrg                                 UPDATE_PRED(0),
1807921a55d8Smrg                                 WRITE_MASK(0),
1808921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1809921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1810921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1811921a55d8Smrg                                 DST_GPR(4),
1812921a55d8Smrg                                 DST_REL(ABSOLUTE),
1813921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1814921a55d8Smrg                                 CLAMP(0));
1815921a55d8Smrg
1816921a55d8Smrg    /* 24 maskX.z DOT4 - mask */
1817921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1818921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1819921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1820921a55d8Smrg                             SRC0_NEG(0),
1821921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1822921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1823921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1824921a55d8Smrg                             SRC1_NEG(0),
1825921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1826921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1827921a55d8Smrg                             LAST(0));
1828921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1829921a55d8Smrg                                 SRC1_ABS(0),
1830921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1831921a55d8Smrg                                 UPDATE_PRED(0),
1832921a55d8Smrg                                 WRITE_MASK(0),
1833921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1834921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1835921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1836921a55d8Smrg                                 DST_GPR(4),
1837921a55d8Smrg                                 DST_REL(ABSOLUTE),
1838921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1839921a55d8Smrg                                 CLAMP(0));
1840921a55d8Smrg
1841921a55d8Smrg    /* 25 maskX.w DOT4 - mask */
1842921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1843921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1844921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1845921a55d8Smrg                             SRC0_NEG(0),
1846921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1847921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1848921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1849921a55d8Smrg                             SRC1_NEG(0),
1850921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1851921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1852921a55d8Smrg                             LAST(1));
1853921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1854921a55d8Smrg                                 SRC1_ABS(0),
1855921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1856921a55d8Smrg                                 UPDATE_PRED(0),
1857921a55d8Smrg                                 WRITE_MASK(0),
1858921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1859921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1860921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1861921a55d8Smrg                                 DST_GPR(4),
1862921a55d8Smrg                                 DST_REL(ABSOLUTE),
1863921a55d8Smrg                                 DST_ELEM(ELEM_W),
1864921a55d8Smrg                                 CLAMP(0));
1865921a55d8Smrg
1866921a55d8Smrg    /* 26 maskY.x DOT4 - mask */
1867921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1868921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1869921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1870921a55d8Smrg                             SRC0_NEG(0),
1871921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1872921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1873921a55d8Smrg                             SRC1_ELEM(ELEM_X),
1874921a55d8Smrg                             SRC1_NEG(0),
1875921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1876921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1877921a55d8Smrg                             LAST(0));
1878921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1879921a55d8Smrg                                 SRC1_ABS(0),
1880921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1881921a55d8Smrg                                 UPDATE_PRED(0),
1882921a55d8Smrg                                 WRITE_MASK(0),
1883921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1884921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1885921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1886921a55d8Smrg                                 DST_GPR(4),
1887921a55d8Smrg                                 DST_REL(ABSOLUTE),
1888921a55d8Smrg                                 DST_ELEM(ELEM_X),
1889921a55d8Smrg                                 CLAMP(0));
1890921a55d8Smrg
1891921a55d8Smrg    /* 27 maskY.y DOT4 - mask */
1892921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1893921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1894921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1895921a55d8Smrg                             SRC0_NEG(0),
1896921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1897921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1898921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
1899921a55d8Smrg                             SRC1_NEG(0),
1900921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1901921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1902921a55d8Smrg                             LAST(0));
1903921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1904921a55d8Smrg                                 SRC1_ABS(0),
1905921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1906921a55d8Smrg                                 UPDATE_PRED(0),
1907921a55d8Smrg                                 WRITE_MASK(1),
1908921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1909921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1910921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1911921a55d8Smrg                                 DST_GPR(4),
1912921a55d8Smrg                                 DST_REL(ABSOLUTE),
1913921a55d8Smrg                                 DST_ELEM(ELEM_Y),
1914921a55d8Smrg                                 CLAMP(0));
1915921a55d8Smrg
1916921a55d8Smrg    /* 28 maskY.z DOT4 - mask */
1917921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1918921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1919921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
1920921a55d8Smrg                             SRC0_NEG(0),
1921921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1922921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1923921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
1924921a55d8Smrg                             SRC1_NEG(0),
1925921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1926921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1927921a55d8Smrg                             LAST(0));
1928921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1929921a55d8Smrg                                 SRC1_ABS(0),
1930921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1931921a55d8Smrg                                 UPDATE_PRED(0),
1932921a55d8Smrg                                 WRITE_MASK(0),
1933921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1934921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1935921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1936921a55d8Smrg                                 DST_GPR(4),
1937921a55d8Smrg                                 DST_REL(ABSOLUTE),
1938921a55d8Smrg                                 DST_ELEM(ELEM_Z),
1939921a55d8Smrg                                 CLAMP(0));
1940921a55d8Smrg
1941921a55d8Smrg    /* 29 maskY.w DOT4 - mask */
1942921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1943921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1944921a55d8Smrg                             SRC0_ELEM(ELEM_W),
1945921a55d8Smrg                             SRC0_NEG(0),
1946921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1947921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1948921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1949921a55d8Smrg                             SRC1_NEG(0),
1950921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
1951921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1952921a55d8Smrg                             LAST(1));
1953921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1954921a55d8Smrg                                 SRC1_ABS(0),
1955921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1956921a55d8Smrg                                 UPDATE_PRED(0),
1957921a55d8Smrg                                 WRITE_MASK(0),
1958921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1959921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
1960921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1961921a55d8Smrg                                 DST_GPR(4),
1962921a55d8Smrg                                 DST_REL(ABSOLUTE),
1963921a55d8Smrg                                 DST_ELEM(ELEM_W),
1964921a55d8Smrg                                 CLAMP(0));
1965921a55d8Smrg
1966921a55d8Smrg    /* 30 srcX / w */
1967921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1968921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1969921a55d8Smrg                             SRC0_ELEM(ELEM_X),
1970921a55d8Smrg                             SRC0_NEG(0),
1971921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1972921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1973921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1974921a55d8Smrg                             SRC1_NEG(0),
1975921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
1976921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
1977921a55d8Smrg                             LAST(1));
1978921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1979921a55d8Smrg                                 SRC1_ABS(0),
1980921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
1981921a55d8Smrg                                 UPDATE_PRED(0),
1982921a55d8Smrg                                 WRITE_MASK(1),
1983921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
1984921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
1985921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1986921a55d8Smrg                                 DST_GPR(1),
1987921a55d8Smrg                                 DST_REL(ABSOLUTE),
1988921a55d8Smrg                                 DST_ELEM(ELEM_X),
1989921a55d8Smrg                                 CLAMP(0));
1990921a55d8Smrg
1991921a55d8Smrg    /* 31 srcY / h */
1992921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1993921a55d8Smrg                             SRC0_REL(ABSOLUTE),
1994921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
1995921a55d8Smrg                             SRC0_NEG(0),
1996921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1997921a55d8Smrg                             SRC1_REL(ABSOLUTE),
1998921a55d8Smrg                             SRC1_ELEM(ELEM_W),
1999921a55d8Smrg                             SRC1_NEG(0),
2000921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2001921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2002921a55d8Smrg                             LAST(1));
2003921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2004921a55d8Smrg                                 SRC1_ABS(0),
2005921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2006921a55d8Smrg                                 UPDATE_PRED(0),
2007921a55d8Smrg                                 WRITE_MASK(1),
2008921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2009921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2010921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2011921a55d8Smrg                                 DST_GPR(1),
2012921a55d8Smrg                                 DST_REL(ABSOLUTE),
2013921a55d8Smrg                                 DST_ELEM(ELEM_Y),
2014921a55d8Smrg                                 CLAMP(0));
2015921a55d8Smrg
2016921a55d8Smrg    /* 32 maskX / w */
2017921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2018921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2019921a55d8Smrg                             SRC0_ELEM(ELEM_X),
2020921a55d8Smrg                             SRC0_NEG(0),
2021921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
2022921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2023921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2024921a55d8Smrg                             SRC1_NEG(0),
2025921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2026921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2027921a55d8Smrg                             LAST(1));
2028921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2029921a55d8Smrg                                 SRC1_ABS(0),
2030921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2031921a55d8Smrg                                 UPDATE_PRED(0),
2032921a55d8Smrg                                 WRITE_MASK(1),
2033921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2034921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2035921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2036921a55d8Smrg                                 DST_GPR(0),
2037921a55d8Smrg                                 DST_REL(ABSOLUTE),
2038921a55d8Smrg                                 DST_ELEM(ELEM_X),
2039921a55d8Smrg                                 CLAMP(0));
2040921a55d8Smrg
2041921a55d8Smrg    /* 33 maskY / h */
2042921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2043921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2044921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
2045921a55d8Smrg                             SRC0_NEG(0),
2046921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
2047921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2048921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2049921a55d8Smrg                             SRC1_NEG(0),
2050921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2051921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2052921a55d8Smrg                             LAST(1));
2053921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2054921a55d8Smrg                                 SRC1_ABS(0),
2055921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2056921a55d8Smrg                                 UPDATE_PRED(0),
2057921a55d8Smrg                                 WRITE_MASK(1),
2058921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2059921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2060921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2061921a55d8Smrg                                 DST_GPR(0),
2062921a55d8Smrg                                 DST_REL(ABSOLUTE),
2063921a55d8Smrg                                 DST_ELEM(ELEM_Y),
2064921a55d8Smrg                                 CLAMP(0));
2065921a55d8Smrg
2066921a55d8Smrg    /* 34 srcX.x DOT4 - non-mask */
2067921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2068921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2069921a55d8Smrg                             SRC0_ELEM(ELEM_X),
2070921a55d8Smrg                             SRC0_NEG(0),
2071921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2072921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2073921a55d8Smrg                             SRC1_ELEM(ELEM_X),
2074921a55d8Smrg                             SRC1_NEG(0),
2075921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2076921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2077921a55d8Smrg                             LAST(0));
2078921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2079921a55d8Smrg                                 SRC1_ABS(0),
2080921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2081921a55d8Smrg                                 UPDATE_PRED(0),
2082921a55d8Smrg                                 WRITE_MASK(1),
2083921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2084921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2085921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2086921a55d8Smrg                                 DST_GPR(2),
2087921a55d8Smrg                                 DST_REL(ABSOLUTE),
2088921a55d8Smrg                                 DST_ELEM(ELEM_X),
2089921a55d8Smrg                                 CLAMP(0));
2090921a55d8Smrg
2091921a55d8Smrg    /* 35 srcX.y DOT4 - non-mask */
2092921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2093921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2094921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
2095921a55d8Smrg                             SRC0_NEG(0),
2096921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2097921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2098921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
2099921a55d8Smrg                             SRC1_NEG(0),
2100921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2101921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2102921a55d8Smrg                             LAST(0));
2103921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2104921a55d8Smrg                                 SRC1_ABS(0),
2105921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2106921a55d8Smrg                                 UPDATE_PRED(0),
2107921a55d8Smrg                                 WRITE_MASK(0),
2108921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2109921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2110921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2111921a55d8Smrg                                 DST_GPR(2),
2112921a55d8Smrg                                 DST_REL(ABSOLUTE),
2113921a55d8Smrg                                 DST_ELEM(ELEM_Y),
2114921a55d8Smrg                                 CLAMP(0));
2115921a55d8Smrg
2116921a55d8Smrg    /* 36 srcX.z DOT4 - non-mask */
2117921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2118921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2119921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
2120921a55d8Smrg                             SRC0_NEG(0),
2121921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2122921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2123921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
2124921a55d8Smrg                             SRC1_NEG(0),
2125921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2126921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2127921a55d8Smrg                             LAST(0));
2128921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2129921a55d8Smrg                                 SRC1_ABS(0),
2130921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2131921a55d8Smrg                                 UPDATE_PRED(0),
2132921a55d8Smrg                                 WRITE_MASK(0),
2133921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2134921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2135921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2136921a55d8Smrg                                 DST_GPR(2),
2137921a55d8Smrg                                 DST_REL(ABSOLUTE),
2138921a55d8Smrg                                 DST_ELEM(ELEM_Z),
2139921a55d8Smrg                                 CLAMP(0));
2140921a55d8Smrg
2141921a55d8Smrg    /* 37 srcX.w DOT4 - non-mask */
2142921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2143921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2144921a55d8Smrg                             SRC0_ELEM(ELEM_W),
2145921a55d8Smrg                             SRC0_NEG(0),
2146921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2147921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2148921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2149921a55d8Smrg                             SRC1_NEG(0),
2150921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2151921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2152921a55d8Smrg                             LAST(1));
2153921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2154921a55d8Smrg                                 SRC1_ABS(0),
2155921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2156921a55d8Smrg                                 UPDATE_PRED(0),
2157921a55d8Smrg                                 WRITE_MASK(0),
2158921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2159921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2160921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2161921a55d8Smrg                                 DST_GPR(2),
2162921a55d8Smrg                                 DST_REL(ABSOLUTE),
2163921a55d8Smrg                                 DST_ELEM(ELEM_W),
2164921a55d8Smrg                                 CLAMP(0));
2165921a55d8Smrg
2166921a55d8Smrg    /* 38 srcY.x DOT4 - non-mask */
2167921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2168921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2169921a55d8Smrg                             SRC0_ELEM(ELEM_X),
2170921a55d8Smrg                             SRC0_NEG(0),
2171921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2172921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2173921a55d8Smrg                             SRC1_ELEM(ELEM_X),
2174921a55d8Smrg                             SRC1_NEG(0),
2175921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2176921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2177921a55d8Smrg                             LAST(0));
2178921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2179921a55d8Smrg                                 SRC1_ABS(0),
2180921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2181921a55d8Smrg                                 UPDATE_PRED(0),
2182921a55d8Smrg                                 WRITE_MASK(0),
2183921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2184921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2185921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2186921a55d8Smrg                                 DST_GPR(2),
2187921a55d8Smrg                                 DST_REL(ABSOLUTE),
2188921a55d8Smrg                                 DST_ELEM(ELEM_X),
2189921a55d8Smrg                                 CLAMP(0));
2190921a55d8Smrg
2191921a55d8Smrg    /* 39 srcY.y DOT4 - non-mask */
2192921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2193921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2194921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
2195921a55d8Smrg                             SRC0_NEG(0),
2196921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2197921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2198921a55d8Smrg                             SRC1_ELEM(ELEM_Y),
2199921a55d8Smrg                             SRC1_NEG(0),
2200921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2201921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2202921a55d8Smrg                             LAST(0));
2203921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2204921a55d8Smrg                                 SRC1_ABS(0),
2205921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2206921a55d8Smrg                                 UPDATE_PRED(0),
2207921a55d8Smrg                                 WRITE_MASK(1),
2208921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2209921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2210921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2211921a55d8Smrg                                 DST_GPR(2),
2212921a55d8Smrg                                 DST_REL(ABSOLUTE),
2213921a55d8Smrg                                 DST_ELEM(ELEM_Y),
2214921a55d8Smrg                                 CLAMP(0));
2215921a55d8Smrg
2216921a55d8Smrg    /* 40 srcY.z DOT4 - non-mask */
2217921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2218921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2219921a55d8Smrg                             SRC0_ELEM(ELEM_Z),
2220921a55d8Smrg                             SRC0_NEG(0),
2221921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2222921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2223921a55d8Smrg                             SRC1_ELEM(ELEM_Z),
2224921a55d8Smrg                             SRC1_NEG(0),
2225921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2226921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2227921a55d8Smrg                             LAST(0));
2228921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2229921a55d8Smrg                                 SRC1_ABS(0),
2230921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2231921a55d8Smrg                                 UPDATE_PRED(0),
2232921a55d8Smrg                                 WRITE_MASK(0),
2233921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2234921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2235921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2236921a55d8Smrg                                 DST_GPR(2),
2237921a55d8Smrg                                 DST_REL(ABSOLUTE),
2238921a55d8Smrg                                 DST_ELEM(ELEM_Z),
2239921a55d8Smrg                                 CLAMP(0));
2240921a55d8Smrg
2241921a55d8Smrg    /* 41 srcY.w DOT4 - non-mask */
2242921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2243921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2244921a55d8Smrg                             SRC0_ELEM(ELEM_W),
2245921a55d8Smrg                             SRC0_NEG(0),
2246921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2247921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2248921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2249921a55d8Smrg                             SRC1_NEG(0),
2250921a55d8Smrg                             INDEX_MODE(SQ_INDEX_LOOP),
2251921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2252921a55d8Smrg                             LAST(1));
2253921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2254921a55d8Smrg                                 SRC1_ABS(0),
2255921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2256921a55d8Smrg                                 UPDATE_PRED(0),
2257921a55d8Smrg                                 WRITE_MASK(0),
2258921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2259921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_DOT4),
2260921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2261921a55d8Smrg                                 DST_GPR(2),
2262921a55d8Smrg                                 DST_REL(ABSOLUTE),
2263921a55d8Smrg                                 DST_ELEM(ELEM_W),
2264921a55d8Smrg                                 CLAMP(0));
2265921a55d8Smrg
2266921a55d8Smrg    /* 42 srcX / w */
2267921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2268921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2269921a55d8Smrg                             SRC0_ELEM(ELEM_X),
2270921a55d8Smrg                             SRC0_NEG(0),
2271921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2272921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2273921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2274921a55d8Smrg                             SRC1_NEG(0),
2275921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2276921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2277921a55d8Smrg                             LAST(1));
2278921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2279921a55d8Smrg                                 SRC1_ABS(0),
2280921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2281921a55d8Smrg                                 UPDATE_PRED(0),
2282921a55d8Smrg                                 WRITE_MASK(1),
2283921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2284921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2285921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2286921a55d8Smrg                                 DST_GPR(0),
2287921a55d8Smrg                                 DST_REL(ABSOLUTE),
2288921a55d8Smrg                                 DST_ELEM(ELEM_X),
2289921a55d8Smrg                                 CLAMP(0));
2290921a55d8Smrg
2291921a55d8Smrg    /* 43 srcY / h */
2292921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2293921a55d8Smrg                             SRC0_REL(ABSOLUTE),
2294921a55d8Smrg                             SRC0_ELEM(ELEM_Y),
2295921a55d8Smrg                             SRC0_NEG(0),
2296921a55d8Smrg                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2297921a55d8Smrg                             SRC1_REL(ABSOLUTE),
2298921a55d8Smrg                             SRC1_ELEM(ELEM_W),
2299921a55d8Smrg                             SRC1_NEG(0),
2300921a55d8Smrg                             INDEX_MODE(SQ_INDEX_AR_X),
2301921a55d8Smrg                             PRED_SEL(SQ_PRED_SEL_OFF),
2302921a55d8Smrg                             LAST(1));
2303921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2304921a55d8Smrg                                 SRC1_ABS(0),
2305921a55d8Smrg                                 UPDATE_EXECUTE_MASK(0),
2306921a55d8Smrg                                 UPDATE_PRED(0),
2307921a55d8Smrg                                 WRITE_MASK(1),
2308921a55d8Smrg                                 OMOD(SQ_ALU_OMOD_OFF),
2309921a55d8Smrg                                 ALU_INST(SQ_OP2_INST_MUL),
2310921a55d8Smrg                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2311921a55d8Smrg                                 DST_GPR(0),
2312921a55d8Smrg                                 DST_REL(ABSOLUTE),
2313921a55d8Smrg                                 DST_ELEM(ELEM_Y),
2314921a55d8Smrg                                 CLAMP(0));
2315921a55d8Smrg
2316921a55d8Smrg    /* mask vfetch - 44/45 - dst */
2317921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2318921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2319921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
2320921a55d8Smrg			     BUFFER_ID(0),
2321921a55d8Smrg			     SRC_GPR(0),
2322921a55d8Smrg			     SRC_REL(ABSOLUTE),
2323921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
2324921a55d8Smrg			     MEGA_FETCH_COUNT(24));
2325921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2326921a55d8Smrg				 DST_REL(0),
2327921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
2328921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
2329921a55d8Smrg				 DST_SEL_Z(SQ_SEL_0),
2330921a55d8Smrg				 DST_SEL_W(SQ_SEL_1),
2331921a55d8Smrg				 USE_CONST_FIELDS(0),
2332921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2333921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2334921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2335921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2336921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2337b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2338b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2339b13dfe66Smrg#else
2340b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2341b13dfe66Smrg#endif
2342921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
2343921a55d8Smrg			     MEGA_FETCH(1),
2344921a55d8Smrg			     ALT_CONST(0),
2345921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2346921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
2347921a55d8Smrg    /* 46/47 - src */
2348921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2349921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2350921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
2351921a55d8Smrg			     BUFFER_ID(0),
2352921a55d8Smrg			     SRC_GPR(0),
2353921a55d8Smrg			     SRC_REL(ABSOLUTE),
2354921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
2355921a55d8Smrg			     MEGA_FETCH_COUNT(8));
2356921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2357921a55d8Smrg				 DST_REL(0),
2358921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
2359921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
2360921a55d8Smrg				 DST_SEL_Z(SQ_SEL_1),
2361921a55d8Smrg				 DST_SEL_W(SQ_SEL_0),
2362921a55d8Smrg				 USE_CONST_FIELDS(0),
2363921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2364921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2365921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2366921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2367921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2368b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2369b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2370b13dfe66Smrg#else
2371b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2372b13dfe66Smrg#endif
2373921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
2374921a55d8Smrg			     MEGA_FETCH(0),
2375921a55d8Smrg			     ALT_CONST(0),
2376921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2377921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
2378921a55d8Smrg    /* 48/49 - mask */
2379921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2380921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2381921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
2382921a55d8Smrg			     BUFFER_ID(0),
2383921a55d8Smrg			     SRC_GPR(0),
2384921a55d8Smrg			     SRC_REL(ABSOLUTE),
2385921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
2386921a55d8Smrg			     MEGA_FETCH_COUNT(8));
2387921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2388921a55d8Smrg				 DST_REL(0),
2389921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
2390921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
2391921a55d8Smrg				 DST_SEL_Z(SQ_SEL_1),
2392921a55d8Smrg				 DST_SEL_W(SQ_SEL_0),
2393921a55d8Smrg				 USE_CONST_FIELDS(0),
2394921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2395921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2396921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2397921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2398921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(16),
2399b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2400b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2401b13dfe66Smrg#else
2402b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2403b13dfe66Smrg#endif
2404921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
2405921a55d8Smrg			     MEGA_FETCH(0),
2406921a55d8Smrg			     ALT_CONST(0),
2407921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2408921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
2409921a55d8Smrg
2410921a55d8Smrg    /* no mask vfetch - 50/51 - dst */
2411921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2412921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2413921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
2414921a55d8Smrg			     BUFFER_ID(0),
2415921a55d8Smrg			     SRC_GPR(0),
2416921a55d8Smrg			     SRC_REL(ABSOLUTE),
2417921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
2418921a55d8Smrg			     MEGA_FETCH_COUNT(16));
2419921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2420921a55d8Smrg				 DST_REL(0),
2421921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
2422921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
2423921a55d8Smrg				 DST_SEL_Z(SQ_SEL_0),
2424921a55d8Smrg				 DST_SEL_W(SQ_SEL_1),
2425921a55d8Smrg				 USE_CONST_FIELDS(0),
2426921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2427921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2428921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2429921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2430921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(0),
2431b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2432b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2433b13dfe66Smrg#else
2434b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2435b13dfe66Smrg#endif
2436921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
2437921a55d8Smrg			     MEGA_FETCH(1),
2438921a55d8Smrg			     ALT_CONST(0),
2439921a55d8Smrg			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2440921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
2441921a55d8Smrg    /* 52/53 - src */
2442921a55d8Smrg    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2443921a55d8Smrg			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2444921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
2445921a55d8Smrg			     BUFFER_ID(0),
2446921a55d8Smrg			     SRC_GPR(0),
2447921a55d8Smrg			     SRC_REL(ABSOLUTE),
2448921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
2449921a55d8Smrg			     MEGA_FETCH_COUNT(8));
2450921a55d8Smrg    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2451921a55d8Smrg				 DST_REL(0),
2452921a55d8Smrg				 DST_SEL_X(SQ_SEL_X),
2453921a55d8Smrg				 DST_SEL_Y(SQ_SEL_Y),
2454921a55d8Smrg				 DST_SEL_Z(SQ_SEL_1),
2455921a55d8Smrg				 DST_SEL_W(SQ_SEL_0),
2456921a55d8Smrg				 USE_CONST_FIELDS(0),
2457921a55d8Smrg				 DATA_FORMAT(FMT_32_32_FLOAT),
2458921a55d8Smrg				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2459921a55d8Smrg				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2460921a55d8Smrg				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2461921a55d8Smrg    shader[i++] = VTX_DWORD2(OFFSET(8),
2462b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
2463b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2464b13dfe66Smrg#else
2465b13dfe66Smrg                             ENDIAN_SWAP(SQ_ENDIAN_NONE),
2466b13dfe66Smrg#endif
2467921a55d8Smrg			     CONST_BUF_NO_STRIDE(0),
2468921a55d8Smrg			     MEGA_FETCH(0),
2469921a55d8Smrg                             ALT_CONST(0),
2470921a55d8Smrg                             BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2471921a55d8Smrg    shader[i++] = VTX_DWORD_PAD;
2472921a55d8Smrg
2473921a55d8Smrg    return i;
2474921a55d8Smrg}
2475921a55d8Smrg
2476921a55d8Smrg/* comp ps --------------------------------------- */
247743df4709Smrgint evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2478921a55d8Smrg{
2479921a55d8Smrg    int i = 0;
2480921a55d8Smrg
2481921a55d8Smrg    /* 0 */
248243df4709Smrg    shader[i++] = CF_DWORD0(ADDR(3),
2483921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2484921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
248543df4709Smrg                            CF_CONST(0),
2486921a55d8Smrg                            COND(SQ_CF_COND_BOOL),
2487921a55d8Smrg                            I_COUNT(0),
2488921a55d8Smrg                            VALID_PIXEL_MODE(0),
2489921a55d8Smrg                            END_OF_PROGRAM(0),
2490921a55d8Smrg                            CF_INST(SQ_CF_INST_CALL),
2491921a55d8Smrg                            WHOLE_QUAD_MODE(0),
2492921a55d8Smrg                            BARRIER(0));
2493921a55d8Smrg    /* 1 */
249443df4709Smrg    shader[i++] = CF_DWORD0(ADDR(8),
2495921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2496921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
249743df4709Smrg                            CF_CONST(0),
2498921a55d8Smrg                            COND(SQ_CF_COND_NOT_BOOL),
2499921a55d8Smrg                            I_COUNT(0),
2500921a55d8Smrg                            VALID_PIXEL_MODE(0),
2501921a55d8Smrg                            END_OF_PROGRAM(0),
2502921a55d8Smrg                            CF_INST(SQ_CF_INST_CALL),
2503921a55d8Smrg                            WHOLE_QUAD_MODE(0),
2504921a55d8Smrg                            BARRIER(0));
2505921a55d8Smrg    /* 2 */
250643df4709Smrg    shader[i++] = CF_DWORD0(ADDR(0),
250743df4709Smrg                            JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
25086322c902Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
25096322c902Smrg                            CF_CONST(0),
251043df4709Smrg                            COND(SQ_CF_COND_ACTIVE),
25116322c902Smrg                            I_COUNT(0),
25126322c902Smrg                            VALID_PIXEL_MODE(0),
251343df4709Smrg                            END_OF_PROGRAM(1),
251443df4709Smrg                            CF_INST(SQ_CF_INST_NOP),
25156322c902Smrg                            WHOLE_QUAD_MODE(0),
251643df4709Smrg                            BARRIER(1));
25176322c902Smrg
251843df4709Smrg    /* 3 - mask sub */
251943df4709Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(12),
25206322c902Smrg				KCACHE_BANK0(0),
25216322c902Smrg				KCACHE_BANK1(0),
25226322c902Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
25236322c902Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
25246322c902Smrg				KCACHE_ADDR0(0),
25256322c902Smrg				KCACHE_ADDR1(0),
252643df4709Smrg				I_COUNT(8),
25276322c902Smrg				ALT_CONST(0),
25286322c902Smrg				CF_INST(SQ_CF_INST_ALU),
25296322c902Smrg				WHOLE_QUAD_MODE(0),
25306322c902Smrg				BARRIER(1));
25316322c902Smrg
253243df4709Smrg    /* 4 */
253343df4709Smrg    shader[i++] = CF_DWORD0(ADDR(28),
2534921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2535921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2536921a55d8Smrg			    CF_CONST(0),
2537921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
253843df4709Smrg			    I_COUNT(2),
2539921a55d8Smrg			    VALID_PIXEL_MODE(0),
2540921a55d8Smrg			    END_OF_PROGRAM(0),
2541921a55d8Smrg			    CF_INST(SQ_CF_INST_TC),
2542921a55d8Smrg			    WHOLE_QUAD_MODE(0),
2543921a55d8Smrg			    BARRIER(1));
2544921a55d8Smrg
254543df4709Smrg    /* 5 */
254643df4709Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(20),
2547921a55d8Smrg				KCACHE_BANK0(0),
2548921a55d8Smrg				KCACHE_BANK1(0),
2549921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2550921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2551921a55d8Smrg				KCACHE_ADDR0(0),
2552921a55d8Smrg				KCACHE_ADDR1(0),
2553921a55d8Smrg				I_COUNT(4),
255443df4709Smrg				ALT_CONST(0),
2555921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
2556921a55d8Smrg				WHOLE_QUAD_MODE(0),
2557921a55d8Smrg				BARRIER(1));
2558921a55d8Smrg
255943df4709Smrg    /* 6 */
256043df4709Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
256143df4709Smrg					  TYPE(SQ_EXPORT_PIXEL),
256243df4709Smrg					  RW_GPR(2),
256343df4709Smrg					  RW_REL(ABSOLUTE),
256443df4709Smrg					  INDEX_GPR(0),
256543df4709Smrg					  ELEM_SIZE(1));
256643df4709Smrg
256743df4709Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
256843df4709Smrg					       SRC_SEL_Y(SQ_SEL_Y),
256943df4709Smrg					       SRC_SEL_Z(SQ_SEL_Z),
257043df4709Smrg					       SRC_SEL_W(SQ_SEL_W),
257143df4709Smrg					       BURST_COUNT(1),
257243df4709Smrg					       VALID_PIXEL_MODE(0),
257343df4709Smrg					       END_OF_PROGRAM(0),
257443df4709Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
257543df4709Smrg					       MARK(0),
257643df4709Smrg					       BARRIER(1));
257743df4709Smrg    /* 7 */
2578921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
2579921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2580921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2581921a55d8Smrg			    CF_CONST(0),
2582921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
2583921a55d8Smrg			    I_COUNT(0),
2584921a55d8Smrg			    VALID_PIXEL_MODE(0),
2585921a55d8Smrg			    END_OF_PROGRAM(0),
2586921a55d8Smrg			    CF_INST(SQ_CF_INST_RETURN),
2587921a55d8Smrg			    WHOLE_QUAD_MODE(0),
258843df4709Smrg			    BARRIER(1));
2589921a55d8Smrg
259043df4709Smrg    /* 8 - non-mask sub */
259143df4709Smrg    shader[i++] = CF_ALU_DWORD0(ADDR(24),
2592921a55d8Smrg				KCACHE_BANK0(0),
2593921a55d8Smrg				KCACHE_BANK1(0),
2594921a55d8Smrg				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2595921a55d8Smrg    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2596921a55d8Smrg				KCACHE_ADDR0(0),
2597921a55d8Smrg				KCACHE_ADDR1(0),
2598921a55d8Smrg				I_COUNT(4),
2599921a55d8Smrg				ALT_CONST(0),
2600921a55d8Smrg				CF_INST(SQ_CF_INST_ALU),
2601921a55d8Smrg				WHOLE_QUAD_MODE(0),
2602921a55d8Smrg				BARRIER(1));
260343df4709Smrg    /* 9 */
260443df4709Smrg    shader[i++] = CF_DWORD0(ADDR(32),
2605921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2606921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2607921a55d8Smrg			    CF_CONST(0),
2608921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
2609921a55d8Smrg			    I_COUNT(1),
2610921a55d8Smrg			    VALID_PIXEL_MODE(0),
2611921a55d8Smrg			    END_OF_PROGRAM(0),
2612921a55d8Smrg			    CF_INST(SQ_CF_INST_TC),
2613921a55d8Smrg			    WHOLE_QUAD_MODE(0),
2614921a55d8Smrg			    BARRIER(1));
2615921a55d8Smrg
261643df4709Smrg    /* 10 */
261743df4709Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
261843df4709Smrg					  TYPE(SQ_EXPORT_PIXEL),
261943df4709Smrg					  RW_GPR(0),
262043df4709Smrg					  RW_REL(ABSOLUTE),
262143df4709Smrg					  INDEX_GPR(0),
262243df4709Smrg					  ELEM_SIZE(1));
262343df4709Smrg    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
262443df4709Smrg					       SRC_SEL_Y(SQ_SEL_Y),
262543df4709Smrg					       SRC_SEL_Z(SQ_SEL_Z),
262643df4709Smrg					       SRC_SEL_W(SQ_SEL_W),
262743df4709Smrg					       BURST_COUNT(1),
262843df4709Smrg					       VALID_PIXEL_MODE(0),
262943df4709Smrg					       END_OF_PROGRAM(0),
263043df4709Smrg					       CF_INST(SQ_CF_INST_EXPORT_DONE),
263143df4709Smrg					       MARK(0),
263243df4709Smrg					       BARRIER(1));
26336322c902Smrg
263443df4709Smrg    /* 11 */
2635921a55d8Smrg    shader[i++] = CF_DWORD0(ADDR(0),
2636921a55d8Smrg			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2637921a55d8Smrg    shader[i++] = CF_DWORD1(POP_COUNT(0),
2638921a55d8Smrg			    CF_CONST(0),
2639921a55d8Smrg			    COND(SQ_CF_COND_ACTIVE),
2640921a55d8Smrg			    I_COUNT(0),
2641921a55d8Smrg			    VALID_PIXEL_MODE(0),
2642921a55d8Smrg			    END_OF_PROGRAM(0),
2643921a55d8Smrg			    CF_INST(SQ_CF_INST_RETURN),
2644921a55d8Smrg			    WHOLE_QUAD_MODE(0),
264543df4709Smrg			    BARRIER(1));
26466322c902Smrg
264743df4709Smrg    /* 12 interpolate src tex coords - mask */
2648921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2649921a55d8Smrg			     SRC0_REL(ABSOLUTE),
265043df4709Smrg			     SRC0_ELEM(ELEM_Y),
2651921a55d8Smrg			     SRC0_NEG(0),
265243df4709Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2653921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2654921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2655921a55d8Smrg			     SRC1_NEG(0),
265643df4709Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2657921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2658921a55d8Smrg			     LAST(0));
2659921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2660921a55d8Smrg				 SRC1_ABS(0),
2661921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2662921a55d8Smrg				 UPDATE_PRED(0),
2663921a55d8Smrg				 WRITE_MASK(1),
2664921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
266543df4709Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
266643df4709Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
266743df4709Smrg				 DST_GPR(1),
2668921a55d8Smrg				 DST_REL(ABSOLUTE),
2669921a55d8Smrg				 DST_ELEM(ELEM_X),
267043df4709Smrg				 CLAMP(0));
267143df4709Smrg    /* 13 */
2672921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2673921a55d8Smrg			     SRC0_REL(ABSOLUTE),
267443df4709Smrg			     SRC0_ELEM(ELEM_X),
2675921a55d8Smrg			     SRC0_NEG(0),
267643df4709Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2677921a55d8Smrg			     SRC1_REL(ABSOLUTE),
267843df4709Smrg			     SRC1_ELEM(ELEM_X),
2679921a55d8Smrg			     SRC1_NEG(0),
268043df4709Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2681921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2682921a55d8Smrg			     LAST(0));
2683921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2684921a55d8Smrg				 SRC1_ABS(0),
2685921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2686921a55d8Smrg				 UPDATE_PRED(0),
2687921a55d8Smrg				 WRITE_MASK(1),
2688921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
268943df4709Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
269043df4709Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
269143df4709Smrg				 DST_GPR(1),
2692921a55d8Smrg				 DST_REL(ABSOLUTE),
2693921a55d8Smrg				 DST_ELEM(ELEM_Y),
269443df4709Smrg				 CLAMP(0));
269543df4709Smrg    /* 14 */
2696921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2697921a55d8Smrg			     SRC0_REL(ABSOLUTE),
269843df4709Smrg			     SRC0_ELEM(ELEM_Y),
2699921a55d8Smrg			     SRC0_NEG(0),
270043df4709Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2701921a55d8Smrg			     SRC1_REL(ABSOLUTE),
270243df4709Smrg			     SRC1_ELEM(ELEM_X),
2703921a55d8Smrg			     SRC1_NEG(0),
270443df4709Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2705921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2706921a55d8Smrg			     LAST(0));
2707921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2708921a55d8Smrg				 SRC1_ABS(0),
2709921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2710921a55d8Smrg				 UPDATE_PRED(0),
271143df4709Smrg				 WRITE_MASK(0),
2712921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
271343df4709Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
271443df4709Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
271543df4709Smrg				 DST_GPR(1),
2716921a55d8Smrg				 DST_REL(ABSOLUTE),
2717921a55d8Smrg				 DST_ELEM(ELEM_Z),
271843df4709Smrg				 CLAMP(0));
271943df4709Smrg    /* 15 */
2720921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2721921a55d8Smrg			     SRC0_REL(ABSOLUTE),
272243df4709Smrg			     SRC0_ELEM(ELEM_X),
2723921a55d8Smrg			     SRC0_NEG(0),
272443df4709Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2725921a55d8Smrg			     SRC1_REL(ABSOLUTE),
272643df4709Smrg			     SRC1_ELEM(ELEM_X),
2727921a55d8Smrg			     SRC1_NEG(0),
272843df4709Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2729921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2730921a55d8Smrg			     LAST(1));
2731921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2732921a55d8Smrg				 SRC1_ABS(0),
2733921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2734921a55d8Smrg				 UPDATE_PRED(0),
273543df4709Smrg				 WRITE_MASK(0),
2736921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
273743df4709Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
273843df4709Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
273943df4709Smrg				 DST_GPR(1),
2740921a55d8Smrg				 DST_REL(ABSOLUTE),
2741921a55d8Smrg				 DST_ELEM(ELEM_W),
274243df4709Smrg				 CLAMP(0));
2743921a55d8Smrg
274443df4709Smrg    /* 16 interpolate mask tex coords */
2745921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2746921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2747921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
2748921a55d8Smrg			     SRC0_NEG(0),
274943df4709Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2750921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2751921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2752921a55d8Smrg			     SRC1_NEG(0),
2753921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2754921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2755921a55d8Smrg			     LAST(0));
2756921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2757921a55d8Smrg				 SRC1_ABS(0),
2758921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2759921a55d8Smrg				 UPDATE_PRED(0),
2760921a55d8Smrg				 WRITE_MASK(1),
2761921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2762921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2763921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2764921a55d8Smrg				 DST_GPR(0),
2765921a55d8Smrg				 DST_REL(ABSOLUTE),
2766921a55d8Smrg				 DST_ELEM(ELEM_X),
2767921a55d8Smrg				 CLAMP(0));
276843df4709Smrg    /* 17 */
2769921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2770921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2771921a55d8Smrg			     SRC0_ELEM(ELEM_X),
2772921a55d8Smrg			     SRC0_NEG(0),
277343df4709Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2774921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2775921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2776921a55d8Smrg			     SRC1_NEG(0),
2777921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2778921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2779921a55d8Smrg			     LAST(0));
2780921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2781921a55d8Smrg				 SRC1_ABS(0),
2782921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2783921a55d8Smrg				 UPDATE_PRED(0),
2784921a55d8Smrg				 WRITE_MASK(1),
2785921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2786921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2787921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2788921a55d8Smrg				 DST_GPR(0),
2789921a55d8Smrg				 DST_REL(ABSOLUTE),
2790921a55d8Smrg				 DST_ELEM(ELEM_Y),
2791921a55d8Smrg				 CLAMP(0));
279243df4709Smrg    /* 18 */
2793921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2794921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2795921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
2796921a55d8Smrg			     SRC0_NEG(0),
279743df4709Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2798921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2799921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2800921a55d8Smrg			     SRC1_NEG(0),
2801921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2802921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2803921a55d8Smrg			     LAST(0));
2804921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2805921a55d8Smrg				 SRC1_ABS(0),
2806921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2807921a55d8Smrg				 UPDATE_PRED(0),
2808921a55d8Smrg				 WRITE_MASK(0),
2809921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2810921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2811921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2812921a55d8Smrg				 DST_GPR(0),
2813921a55d8Smrg				 DST_REL(ABSOLUTE),
2814921a55d8Smrg				 DST_ELEM(ELEM_Z),
2815921a55d8Smrg				 CLAMP(0));
281643df4709Smrg    /* 19 */
2817921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2818921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2819921a55d8Smrg			     SRC0_ELEM(ELEM_X),
2820921a55d8Smrg			     SRC0_NEG(0),
282143df4709Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2822921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2823921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2824921a55d8Smrg			     SRC1_NEG(0),
2825921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2826921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2827921a55d8Smrg			     LAST(1));
2828921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2829921a55d8Smrg				 SRC1_ABS(0),
2830921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2831921a55d8Smrg				 UPDATE_PRED(0),
2832921a55d8Smrg				 WRITE_MASK(0),
2833921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2834921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2835921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2836921a55d8Smrg				 DST_GPR(0),
2837921a55d8Smrg				 DST_REL(ABSOLUTE),
2838921a55d8Smrg				 DST_ELEM(ELEM_W),
2839921a55d8Smrg				 CLAMP(0));
2840921a55d8Smrg
284143df4709Smrg    /* 20 - alu 0 */
284243df4709Smrg    /* MUL gpr[2].x gpr[0].x gpr[1].x */
284343df4709Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2844921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2845921a55d8Smrg			     SRC0_ELEM(ELEM_X),
2846921a55d8Smrg			     SRC0_NEG(0),
284743df4709Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2848921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2849921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2850921a55d8Smrg			     SRC1_NEG(0),
285143df4709Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2852921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2853921a55d8Smrg			     LAST(0));
2854921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2855921a55d8Smrg				 SRC1_ABS(0),
2856921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2857921a55d8Smrg				 UPDATE_PRED(0),
2858921a55d8Smrg				 WRITE_MASK(1),
2859921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
286043df4709Smrg				 ALU_INST(SQ_OP2_INST_MUL),
2861921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
286243df4709Smrg				 DST_GPR(2),
2863921a55d8Smrg				 DST_REL(ABSOLUTE),
2864921a55d8Smrg				 DST_ELEM(ELEM_X),
2865921a55d8Smrg				 CLAMP(1));
286643df4709Smrg    /* 21 - alu 1 */
286743df4709Smrg    /* MUL gpr[2].y gpr[0].y gpr[1].y */
286843df4709Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2869921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2870921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
2871921a55d8Smrg			     SRC0_NEG(0),
287243df4709Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2873921a55d8Smrg			     SRC1_REL(ABSOLUTE),
287443df4709Smrg			     SRC1_ELEM(ELEM_Y),
2875921a55d8Smrg			     SRC1_NEG(0),
287643df4709Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2877921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2878921a55d8Smrg			     LAST(0));
2879921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2880921a55d8Smrg				 SRC1_ABS(0),
2881921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2882921a55d8Smrg				 UPDATE_PRED(0),
2883921a55d8Smrg				 WRITE_MASK(1),
2884921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
288543df4709Smrg				 ALU_INST(SQ_OP2_INST_MUL),
2886921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
288743df4709Smrg				 DST_GPR(2),
2888921a55d8Smrg				 DST_REL(ABSOLUTE),
2889921a55d8Smrg				 DST_ELEM(ELEM_Y),
2890921a55d8Smrg				 CLAMP(1));
289143df4709Smrg    /* 22 - alu 2 */
289243df4709Smrg    /* MUL gpr[2].z gpr[0].z gpr[1].z */
289343df4709Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2894921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2895921a55d8Smrg			     SRC0_ELEM(ELEM_Z),
2896921a55d8Smrg			     SRC0_NEG(0),
289743df4709Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2898921a55d8Smrg			     SRC1_REL(ABSOLUTE),
289943df4709Smrg			     SRC1_ELEM(ELEM_Z),
2900921a55d8Smrg			     SRC1_NEG(0),
290143df4709Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2902921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2903921a55d8Smrg			     LAST(0));
2904921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2905921a55d8Smrg				 SRC1_ABS(0),
2906921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2907921a55d8Smrg				 UPDATE_PRED(0),
2908921a55d8Smrg				 WRITE_MASK(1),
2909921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
291043df4709Smrg				 ALU_INST(SQ_OP2_INST_MUL),
2911921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
291243df4709Smrg				 DST_GPR(2),
2913921a55d8Smrg				 DST_REL(ABSOLUTE),
2914921a55d8Smrg				 DST_ELEM(ELEM_Z),
2915921a55d8Smrg				 CLAMP(1));
291643df4709Smrg    /* 23 - alu 3 */
291743df4709Smrg    /* MUL gpr[2].w gpr[0].w gpr[1].w */
291843df4709Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2919921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2920921a55d8Smrg			     SRC0_ELEM(ELEM_W),
2921921a55d8Smrg			     SRC0_NEG(0),
292243df4709Smrg			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2923921a55d8Smrg			     SRC1_REL(ABSOLUTE),
292443df4709Smrg			     SRC1_ELEM(ELEM_W),
2925921a55d8Smrg			     SRC1_NEG(0),
292643df4709Smrg			     INDEX_MODE(SQ_INDEX_LOOP),
2927921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2928921a55d8Smrg			     LAST(1));
2929921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2930921a55d8Smrg				 SRC1_ABS(0),
2931921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2932921a55d8Smrg				 UPDATE_PRED(0),
2933921a55d8Smrg				 WRITE_MASK(1),
2934921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
293543df4709Smrg				 ALU_INST(SQ_OP2_INST_MUL),
2936921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_012),
293743df4709Smrg				 DST_GPR(2),
2938921a55d8Smrg				 DST_REL(ABSOLUTE),
2939921a55d8Smrg				 DST_ELEM(ELEM_W),
2940921a55d8Smrg				 CLAMP(1));
2941921a55d8Smrg
294243df4709Smrg    /* 24 - interpolate tex coords - non-mask */
2943921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2944921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2945921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
2946921a55d8Smrg			     SRC0_NEG(0),
294743df4709Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2948921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2949921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2950921a55d8Smrg			     SRC1_NEG(0),
2951921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2952921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2953921a55d8Smrg			     LAST(0));
2954921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2955921a55d8Smrg				 SRC1_ABS(0),
2956921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2957921a55d8Smrg				 UPDATE_PRED(0),
2958921a55d8Smrg				 WRITE_MASK(1),
2959921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2960921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2961921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
296243df4709Smrg				 DST_GPR(0),
2963921a55d8Smrg				 DST_REL(ABSOLUTE),
2964921a55d8Smrg				 DST_ELEM(ELEM_X),
2965921a55d8Smrg				 CLAMP(0));
296643df4709Smrg    /* 25 */
2967921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2968921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2969921a55d8Smrg			     SRC0_ELEM(ELEM_X),
2970921a55d8Smrg			     SRC0_NEG(0),
297143df4709Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2972921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2973921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2974921a55d8Smrg			     SRC1_NEG(0),
2975921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
2976921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
2977921a55d8Smrg			     LAST(0));
2978921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2979921a55d8Smrg				 SRC1_ABS(0),
2980921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
2981921a55d8Smrg				 UPDATE_PRED(0),
2982921a55d8Smrg				 WRITE_MASK(1),
2983921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
2984921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2985921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
298643df4709Smrg				 DST_GPR(0),
2987921a55d8Smrg				 DST_REL(ABSOLUTE),
2988921a55d8Smrg				 DST_ELEM(ELEM_Y),
2989921a55d8Smrg				 CLAMP(0));
299043df4709Smrg    /* 26 */
2991921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2992921a55d8Smrg			     SRC0_REL(ABSOLUTE),
2993921a55d8Smrg			     SRC0_ELEM(ELEM_Y),
2994921a55d8Smrg			     SRC0_NEG(0),
299543df4709Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2996921a55d8Smrg			     SRC1_REL(ABSOLUTE),
2997921a55d8Smrg			     SRC1_ELEM(ELEM_X),
2998921a55d8Smrg			     SRC1_NEG(0),
2999921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
3000921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
3001921a55d8Smrg			     LAST(0));
3002921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3003921a55d8Smrg				 SRC1_ABS(0),
3004921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
3005921a55d8Smrg				 UPDATE_PRED(0),
3006921a55d8Smrg				 WRITE_MASK(0),
3007921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
3008921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3009921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
301043df4709Smrg				 DST_GPR(0),
3011921a55d8Smrg				 DST_REL(ABSOLUTE),
3012921a55d8Smrg				 DST_ELEM(ELEM_Z),
3013921a55d8Smrg				 CLAMP(0));
301443df4709Smrg    /* 27 */
3015921a55d8Smrg    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3016921a55d8Smrg			     SRC0_REL(ABSOLUTE),
3017921a55d8Smrg			     SRC0_ELEM(ELEM_X),
3018921a55d8Smrg			     SRC0_NEG(0),
301943df4709Smrg			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
3020921a55d8Smrg			     SRC1_REL(ABSOLUTE),
3021921a55d8Smrg			     SRC1_ELEM(ELEM_X),
3022921a55d8Smrg			     SRC1_NEG(0),
3023921a55d8Smrg			     INDEX_MODE(SQ_INDEX_AR_X),
3024921a55d8Smrg			     PRED_SEL(SQ_PRED_SEL_OFF),
3025921a55d8Smrg			     LAST(1));
3026921a55d8Smrg    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3027921a55d8Smrg				 SRC1_ABS(0),
3028921a55d8Smrg				 UPDATE_EXECUTE_MASK(0),
3029921a55d8Smrg				 UPDATE_PRED(0),
3030921a55d8Smrg				 WRITE_MASK(0),
3031921a55d8Smrg				 OMOD(SQ_ALU_OMOD_OFF),
3032921a55d8Smrg				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3033921a55d8Smrg				 BANK_SWIZZLE(SQ_ALU_VEC_210),
303443df4709Smrg				 DST_GPR(0),
3035921a55d8Smrg				 DST_REL(ABSOLUTE),
3036921a55d8Smrg				 DST_ELEM(ELEM_W),
3037921a55d8Smrg				 CLAMP(0));
3038921a55d8Smrg
303943df4709Smrg    /* 28/29 - src - mask */
3040921a55d8Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3041921a55d8Smrg			     INST_MOD(0),
3042921a55d8Smrg			     FETCH_WHOLE_QUAD(0),
304343df4709Smrg			     RESOURCE_ID(0),
3044921a55d8Smrg			     SRC_GPR(1),
3045921a55d8Smrg			     SRC_REL(ABSOLUTE),
3046921a55d8Smrg			     ALT_CONST(0),
3047921a55d8Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3048921a55d8Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3049921a55d8Smrg    shader[i++] = TEX_DWORD1(DST_GPR(1),
3050921a55d8Smrg			     DST_REL(ABSOLUTE),
3051921a55d8Smrg			     DST_SEL_X(SQ_SEL_X),
3052921a55d8Smrg			     DST_SEL_Y(SQ_SEL_Y),
3053921a55d8Smrg			     DST_SEL_Z(SQ_SEL_Z),
3054921a55d8Smrg			     DST_SEL_W(SQ_SEL_W),
3055921a55d8Smrg			     LOD_BIAS(0),
3056921a55d8Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
3057921a55d8Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
3058921a55d8Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
3059921a55d8Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
306043df4709Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
306143df4709Smrg			     OFFSET_Y(0),
306243df4709Smrg			     OFFSET_Z(0),
306343df4709Smrg			     SAMPLER_ID(0),
306443df4709Smrg			     SRC_SEL_X(SQ_SEL_X),
306543df4709Smrg			     SRC_SEL_Y(SQ_SEL_Y),
306643df4709Smrg			     SRC_SEL_Z(SQ_SEL_0),
306743df4709Smrg			     SRC_SEL_W(SQ_SEL_1));
306843df4709Smrg    shader[i++] = TEX_DWORD_PAD;
306943df4709Smrg    /* 30/31 - mask */
307043df4709Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
307143df4709Smrg			     INST_MOD(0),
307243df4709Smrg			     FETCH_WHOLE_QUAD(0),
307343df4709Smrg			     RESOURCE_ID(1),
307443df4709Smrg			     SRC_GPR(0),
307543df4709Smrg			     SRC_REL(ABSOLUTE),
307643df4709Smrg                             ALT_CONST(0),
307743df4709Smrg                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
307843df4709Smrg                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
307943df4709Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
308043df4709Smrg			     DST_REL(ABSOLUTE),
308143df4709Smrg			     DST_SEL_X(SQ_SEL_X),
308243df4709Smrg			     DST_SEL_Y(SQ_SEL_Y),
308343df4709Smrg			     DST_SEL_Z(SQ_SEL_Z),
308443df4709Smrg			     DST_SEL_W(SQ_SEL_W),
308543df4709Smrg			     LOD_BIAS(0),
308643df4709Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
308743df4709Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
308843df4709Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
308943df4709Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
3090921a55d8Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3091921a55d8Smrg			     OFFSET_Y(0),
3092921a55d8Smrg			     OFFSET_Z(0),
3093921a55d8Smrg			     SAMPLER_ID(1),
3094921a55d8Smrg			     SRC_SEL_X(SQ_SEL_X),
3095921a55d8Smrg			     SRC_SEL_Y(SQ_SEL_Y),
3096921a55d8Smrg			     SRC_SEL_Z(SQ_SEL_0),
3097921a55d8Smrg			     SRC_SEL_W(SQ_SEL_1));
3098921a55d8Smrg    shader[i++] = TEX_DWORD_PAD;
3099921a55d8Smrg
310043df4709Smrg    /* 32/33 - src - non-mask */
310143df4709Smrg    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
310243df4709Smrg			     INST_MOD(0),
310343df4709Smrg			     FETCH_WHOLE_QUAD(0),
310443df4709Smrg			     RESOURCE_ID(0),
310543df4709Smrg			     SRC_GPR(0),
310643df4709Smrg			     SRC_REL(ABSOLUTE),
310743df4709Smrg			     ALT_CONST(0),
310843df4709Smrg			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
310943df4709Smrg			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
311043df4709Smrg    shader[i++] = TEX_DWORD1(DST_GPR(0),
311143df4709Smrg			     DST_REL(ABSOLUTE),
311243df4709Smrg			     DST_SEL_X(SQ_SEL_X),
311343df4709Smrg			     DST_SEL_Y(SQ_SEL_Y),
311443df4709Smrg			     DST_SEL_Z(SQ_SEL_Z),
311543df4709Smrg			     DST_SEL_W(SQ_SEL_W),
311643df4709Smrg			     LOD_BIAS(0),
311743df4709Smrg			     COORD_TYPE_X(TEX_NORMALIZED),
311843df4709Smrg			     COORD_TYPE_Y(TEX_NORMALIZED),
311943df4709Smrg			     COORD_TYPE_Z(TEX_NORMALIZED),
312043df4709Smrg			     COORD_TYPE_W(TEX_NORMALIZED));
312143df4709Smrg    shader[i++] = TEX_DWORD2(OFFSET_X(0),
312243df4709Smrg			     OFFSET_Y(0),
312343df4709Smrg			     OFFSET_Z(0),
312443df4709Smrg			     SAMPLER_ID(0),
312543df4709Smrg			     SRC_SEL_X(SQ_SEL_X),
312643df4709Smrg			     SRC_SEL_Y(SQ_SEL_Y),
312743df4709Smrg			     SRC_SEL_Z(SQ_SEL_0),
312843df4709Smrg			     SRC_SEL_W(SQ_SEL_1));
312943df4709Smrg    shader[i++] = TEX_DWORD_PAD;
3130921a55d8Smrg
3131921a55d8Smrg    return i;
3132921a55d8Smrg}
313343df4709Smrg
313443df4709Smrg#endif
3135