1/*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "cayman_shader.h"
34#include "cayman_reg.h"
35
36/* solid vs --------------------------------------- */
37int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
38{
39    int i = 0;
40
41    /* 0 */
42    shader[i++] = CF_DWORD0(ADDR(4),
43			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
44    shader[i++] = CF_DWORD1(POP_COUNT(0),
45			    CF_CONST(0),
46			    COND(SQ_CF_COND_ACTIVE),
47			    I_COUNT(1),
48			    VALID_PIXEL_MODE(0),
49			    CF_INST(SQ_CF_INST_TC),
50			    BARRIER(1));
51    /* 1 */
52    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
53					  TYPE(SQ_EXPORT_POS),
54					  RW_GPR(1),
55					  RW_REL(ABSOLUTE),
56					  INDEX_GPR(0),
57					  ELEM_SIZE(0));
58    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
59					       SRC_SEL_Y(SQ_SEL_Y),
60					       SRC_SEL_Z(SQ_SEL_Z),
61					       SRC_SEL_W(SQ_SEL_W),
62					       BURST_COUNT(1),
63					       VALID_PIXEL_MODE(0),
64					       CF_INST(SQ_CF_INST_EXPORT_DONE),
65					       MARK(0),
66					       BARRIER(1));
67    /* 2 - always export a param whether it's used or not */
68    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
69					  TYPE(SQ_EXPORT_PARAM),
70					  RW_GPR(0),
71					  RW_REL(ABSOLUTE),
72					  INDEX_GPR(0),
73					  ELEM_SIZE(0));
74    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
75					       SRC_SEL_Y(SQ_SEL_Y),
76					       SRC_SEL_Z(SQ_SEL_Z),
77					       SRC_SEL_W(SQ_SEL_W),
78					       BURST_COUNT(0),
79					       VALID_PIXEL_MODE(0),
80					       CF_INST(SQ_CF_INST_EXPORT_DONE),
81					       MARK(0),
82					       BARRIER(0));
83    /* 3 - end */
84    shader[i++] = CF_DWORD0(ADDR(0),
85			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
86    shader[i++] = CF_DWORD1(POP_COUNT(0),
87			    CF_CONST(0),
88			    COND(SQ_CF_COND_ACTIVE),
89			    I_COUNT(0),
90			    VALID_PIXEL_MODE(0),
91			    CF_INST(SQ_CF_INST_END),
92			    BARRIER(1));
93    /* 4/5 */
94    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
95			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
96			     FETCH_WHOLE_QUAD(0),
97			     BUFFER_ID(0),
98			     SRC_GPR(0),
99			     SRC_REL(ABSOLUTE),
100			     SRC_SEL_X(SQ_SEL_X),
101			     SRC_SEL_Y(SQ_SEL_Y),
102			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
103			     LDS_REQ(0),
104			     COALESCED_READ(0));
105    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
106				 DST_REL(0),
107				 DST_SEL_X(SQ_SEL_X),
108				 DST_SEL_Y(SQ_SEL_Y),
109				 DST_SEL_Z(SQ_SEL_0),
110				 DST_SEL_W(SQ_SEL_1),
111				 USE_CONST_FIELDS(0),
112				 DATA_FORMAT(FMT_32_32_FLOAT),
113				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
114				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
115				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
116    shader[i++] = VTX_DWORD2(OFFSET(0),
117#if X_BYTE_ORDER == X_BIG_ENDIAN
118			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
119#else
120			     ENDIAN_SWAP(ENDIAN_NONE),
121#endif
122			     CONST_BUF_NO_STRIDE(0),
123			     ALT_CONST(0),
124			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
125    shader[i++] = VTX_DWORD_PAD;
126
127    return i;
128}
129
130/* solid ps --------------------------------------- */
131int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
132{
133    int i = 0;
134
135    /* 0 */
136    shader[i++] = CF_ALU_DWORD0(ADDR(3),
137				KCACHE_BANK0(0),
138				KCACHE_BANK1(0),
139				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
140    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
141				KCACHE_ADDR0(0),
142				KCACHE_ADDR1(0),
143				I_COUNT(4),
144				ALT_CONST(0),
145				CF_INST(SQ_CF_INST_ALU),
146				WHOLE_QUAD_MODE(0),
147				BARRIER(1));
148    /* 1 */
149    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
150					  TYPE(SQ_EXPORT_PIXEL),
151					  RW_GPR(0),
152					  RW_REL(ABSOLUTE),
153					  INDEX_GPR(0),
154					  ELEM_SIZE(1));
155    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
156					       SRC_SEL_Y(SQ_SEL_Y),
157					       SRC_SEL_Z(SQ_SEL_Z),
158					       SRC_SEL_W(SQ_SEL_W),
159					       BURST_COUNT(1),
160					       VALID_PIXEL_MODE(0),
161					       CF_INST(SQ_CF_INST_EXPORT_DONE),
162					       MARK(0),
163					       BARRIER(1));
164
165    /* 2 - end */
166    shader[i++] = CF_DWORD0(ADDR(0),
167			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
168    shader[i++] = CF_DWORD1(POP_COUNT(0),
169			    CF_CONST(0),
170			    COND(SQ_CF_COND_ACTIVE),
171			    I_COUNT(0),
172			    VALID_PIXEL_MODE(0),
173			    CF_INST(SQ_CF_INST_END),
174			    BARRIER(1));
175    /* 3 */
176    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
177			     SRC0_REL(ABSOLUTE),
178			     SRC0_ELEM(ELEM_X),
179			     SRC0_NEG(0),
180			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
181			     SRC1_REL(ABSOLUTE),
182			     SRC1_ELEM(ELEM_X),
183			     SRC1_NEG(0),
184			     INDEX_MODE(SQ_INDEX_AR_X),
185			     PRED_SEL(SQ_PRED_SEL_OFF),
186			     LAST(0));
187    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
188				 SRC1_ABS(0),
189				 UPDATE_EXECUTE_MASK(0),
190				 UPDATE_PRED(0),
191				 WRITE_MASK(1),
192				 OMOD(SQ_ALU_OMOD_OFF),
193				 ALU_INST(SQ_OP2_INST_MOV),
194				 BANK_SWIZZLE(SQ_ALU_VEC_012),
195				 DST_GPR(0),
196				 DST_REL(ABSOLUTE),
197				 DST_ELEM(ELEM_X),
198				 CLAMP(1));
199    /* 4 */
200    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
201			     SRC0_REL(ABSOLUTE),
202			     SRC0_ELEM(ELEM_Y),
203			     SRC0_NEG(0),
204			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
205			     SRC1_REL(ABSOLUTE),
206			     SRC1_ELEM(ELEM_Y),
207			     SRC1_NEG(0),
208			     INDEX_MODE(SQ_INDEX_AR_X),
209			     PRED_SEL(SQ_PRED_SEL_OFF),
210			     LAST(0));
211    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
212				 SRC1_ABS(0),
213				 UPDATE_EXECUTE_MASK(0),
214				 UPDATE_PRED(0),
215				 WRITE_MASK(1),
216				 OMOD(SQ_ALU_OMOD_OFF),
217				 ALU_INST(SQ_OP2_INST_MOV),
218				 BANK_SWIZZLE(SQ_ALU_VEC_012),
219				 DST_GPR(0),
220				 DST_REL(ABSOLUTE),
221				 DST_ELEM(ELEM_Y),
222				 CLAMP(1));
223    /* 5 */
224    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
225			     SRC0_REL(ABSOLUTE),
226			     SRC0_ELEM(ELEM_Z),
227			     SRC0_NEG(0),
228			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
229			     SRC1_REL(ABSOLUTE),
230			     SRC1_ELEM(ELEM_Z),
231			     SRC1_NEG(0),
232			     INDEX_MODE(SQ_INDEX_AR_X),
233			     PRED_SEL(SQ_PRED_SEL_OFF),
234			     LAST(0));
235    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
236				 SRC1_ABS(0),
237				 UPDATE_EXECUTE_MASK(0),
238				 UPDATE_PRED(0),
239				 WRITE_MASK(1),
240				 OMOD(SQ_ALU_OMOD_OFF),
241				 ALU_INST(SQ_OP2_INST_MOV),
242				 BANK_SWIZZLE(SQ_ALU_VEC_012),
243				 DST_GPR(0),
244				 DST_REL(ABSOLUTE),
245				 DST_ELEM(ELEM_Z),
246				 CLAMP(1));
247    /* 6 */
248    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
249			     SRC0_REL(ABSOLUTE),
250			     SRC0_ELEM(ELEM_W),
251			     SRC0_NEG(0),
252			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
253			     SRC1_REL(ABSOLUTE),
254			     SRC1_ELEM(ELEM_W),
255			     SRC1_NEG(0),
256			     INDEX_MODE(SQ_INDEX_AR_X),
257			     PRED_SEL(SQ_PRED_SEL_OFF),
258			     LAST(1));
259    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
260				 SRC1_ABS(0),
261				 UPDATE_EXECUTE_MASK(0),
262				 UPDATE_PRED(0),
263				 WRITE_MASK(1),
264				 OMOD(SQ_ALU_OMOD_OFF),
265				 ALU_INST(SQ_OP2_INST_MOV),
266				 BANK_SWIZZLE(SQ_ALU_VEC_012),
267				 DST_GPR(0),
268				 DST_REL(ABSOLUTE),
269				 DST_ELEM(ELEM_W),
270				 CLAMP(1));
271
272    return i;
273}
274
275/* copy vs --------------------------------------- */
276int cayman_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
277{
278    int i = 0;
279
280    /* 0 */
281    shader[i++] = CF_DWORD0(ADDR(4),
282			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
283    shader[i++] = CF_DWORD1(POP_COUNT(0),
284			    CF_CONST(0),
285			    COND(SQ_CF_COND_ACTIVE),
286			    I_COUNT(2),
287			    VALID_PIXEL_MODE(0),
288			    CF_INST(SQ_CF_INST_TC),
289			    BARRIER(1));
290    /* 1 */
291    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
292					  TYPE(SQ_EXPORT_POS),
293					  RW_GPR(1),
294					  RW_REL(ABSOLUTE),
295					  INDEX_GPR(0),
296					  ELEM_SIZE(0));
297    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
298					       SRC_SEL_Y(SQ_SEL_Y),
299					       SRC_SEL_Z(SQ_SEL_Z),
300					       SRC_SEL_W(SQ_SEL_W),
301					       BURST_COUNT(0),
302					       VALID_PIXEL_MODE(0),
303					       CF_INST(SQ_CF_INST_EXPORT_DONE),
304					       MARK(0),
305					       BARRIER(1));
306    /* 2 */
307    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
308					  TYPE(SQ_EXPORT_PARAM),
309					  RW_GPR(0),
310					  RW_REL(ABSOLUTE),
311					  INDEX_GPR(0),
312					  ELEM_SIZE(0));
313    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
314					       SRC_SEL_Y(SQ_SEL_Y),
315					       SRC_SEL_Z(SQ_SEL_Z),
316					       SRC_SEL_W(SQ_SEL_W),
317					       BURST_COUNT(0),
318					       VALID_PIXEL_MODE(0),
319					       CF_INST(SQ_CF_INST_EXPORT_DONE),
320					       MARK(0),
321					       BARRIER(0));
322    /* 3 - end */
323    shader[i++] = CF_DWORD0(ADDR(0),
324			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
325    shader[i++] = CF_DWORD1(POP_COUNT(0),
326			    CF_CONST(0),
327			    COND(SQ_CF_COND_ACTIVE),
328			    I_COUNT(0),
329			    VALID_PIXEL_MODE(0),
330			    CF_INST(SQ_CF_INST_END),
331			    BARRIER(1));
332    /* 4/5 */
333    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
334			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
335			     FETCH_WHOLE_QUAD(0),
336			     BUFFER_ID(0),
337			     SRC_GPR(0),
338			     SRC_REL(ABSOLUTE),
339			     SRC_SEL_X(SQ_SEL_X),
340			     SRC_SEL_Y(SQ_SEL_Y),
341			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
342			     LDS_REQ(0),
343			     COALESCED_READ(0));
344    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
345				 DST_REL(0),
346				 DST_SEL_X(SQ_SEL_X),
347				 DST_SEL_Y(SQ_SEL_Y),
348				 DST_SEL_Z(SQ_SEL_0),
349				 DST_SEL_W(SQ_SEL_1),
350				 USE_CONST_FIELDS(0),
351				 DATA_FORMAT(FMT_32_32_FLOAT),
352				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
353				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
354				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
355    shader[i++] = VTX_DWORD2(OFFSET(0),
356#if X_BYTE_ORDER == X_BIG_ENDIAN
357                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
358#else
359			     ENDIAN_SWAP(ENDIAN_NONE),
360#endif
361			     CONST_BUF_NO_STRIDE(0),
362			     ALT_CONST(0),
363			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
364    shader[i++] = VTX_DWORD_PAD;
365    /* 6/7 */
366    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
367			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
368			     FETCH_WHOLE_QUAD(0),
369			     BUFFER_ID(0),
370			     SRC_GPR(0),
371			     SRC_REL(ABSOLUTE),
372			     SRC_SEL_X(SQ_SEL_X),
373			     SRC_SEL_Y(SQ_SEL_Y),
374			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
375			     LDS_REQ(0),
376			     COALESCED_READ(0));
377    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
378				 DST_REL(0),
379				 DST_SEL_X(SQ_SEL_X),
380				 DST_SEL_Y(SQ_SEL_Y),
381				 DST_SEL_Z(SQ_SEL_0),
382				 DST_SEL_W(SQ_SEL_1),
383				 USE_CONST_FIELDS(0),
384				 DATA_FORMAT(FMT_32_32_FLOAT),
385				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
386				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
387				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
388    shader[i++] = VTX_DWORD2(OFFSET(8),
389#if X_BYTE_ORDER == X_BIG_ENDIAN
390                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
391#else
392			     ENDIAN_SWAP(ENDIAN_NONE),
393#endif
394			     CONST_BUF_NO_STRIDE(0),
395			     ALT_CONST(0),
396			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
397    shader[i++] = VTX_DWORD_PAD;
398
399    return i;
400}
401
402/* copy ps --------------------------------------- */
403int cayman_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
404{
405    int i = 0;
406
407    /* CF INST 0 */
408    shader[i++] = CF_ALU_DWORD0(ADDR(4),
409				KCACHE_BANK0(0),
410				KCACHE_BANK1(0),
411				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
412    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
413				KCACHE_ADDR0(0),
414				KCACHE_ADDR1(0),
415				I_COUNT(4),
416				ALT_CONST(0),
417				CF_INST(SQ_CF_INST_ALU),
418				WHOLE_QUAD_MODE(0),
419				BARRIER(1));
420    /* CF INST 1 */
421    shader[i++] = CF_DWORD0(ADDR(8),
422			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
423    shader[i++] = CF_DWORD1(POP_COUNT(0),
424			    CF_CONST(0),
425			    COND(SQ_CF_COND_ACTIVE),
426			    I_COUNT(1),
427			    VALID_PIXEL_MODE(0),
428			    CF_INST(SQ_CF_INST_TC),
429			    BARRIER(1));
430    /* CF INST 2 */
431    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
432					  TYPE(SQ_EXPORT_PIXEL),
433					  RW_GPR(0),
434					  RW_REL(ABSOLUTE),
435					  INDEX_GPR(0),
436					  ELEM_SIZE(1));
437    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
438					       SRC_SEL_Y(SQ_SEL_Y),
439					       SRC_SEL_Z(SQ_SEL_Z),
440					       SRC_SEL_W(SQ_SEL_W),
441					       BURST_COUNT(1),
442					       VALID_PIXEL_MODE(0),
443					       CF_INST(SQ_CF_INST_EXPORT_DONE),
444					       MARK(0),
445					       BARRIER(1));
446    /* CF INST 3 - end */
447    shader[i++] = CF_DWORD0(ADDR(0),
448			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
449    shader[i++] = CF_DWORD1(POP_COUNT(0),
450			    CF_CONST(0),
451			    COND(SQ_CF_COND_ACTIVE),
452			    I_COUNT(0),
453			    VALID_PIXEL_MODE(0),
454			    CF_INST(SQ_CF_INST_END),
455			    BARRIER(1));
456    /* 4 interpolate tex coords */
457    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
458			     SRC0_REL(ABSOLUTE),
459			     SRC0_ELEM(ELEM_Y),
460			     SRC0_NEG(0),
461			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
462			     SRC1_REL(ABSOLUTE),
463			     SRC1_ELEM(ELEM_X),
464			     SRC1_NEG(0),
465			     INDEX_MODE(SQ_INDEX_AR_X),
466			     PRED_SEL(SQ_PRED_SEL_OFF),
467			     LAST(0));
468    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
469				 SRC1_ABS(0),
470				 UPDATE_EXECUTE_MASK(0),
471				 UPDATE_PRED(0),
472				 WRITE_MASK(1),
473				 OMOD(SQ_ALU_OMOD_OFF),
474				 ALU_INST(SQ_OP2_INST_INTERP_XY),
475				 BANK_SWIZZLE(SQ_ALU_VEC_210),
476				 DST_GPR(0),
477				 DST_REL(ABSOLUTE),
478				 DST_ELEM(ELEM_X),
479				 CLAMP(0));
480    /* 5 */
481    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
482			     SRC0_REL(ABSOLUTE),
483			     SRC0_ELEM(ELEM_X),
484			     SRC0_NEG(0),
485			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
486			     SRC1_REL(ABSOLUTE),
487			     SRC1_ELEM(ELEM_X),
488			     SRC1_NEG(0),
489			     INDEX_MODE(SQ_INDEX_AR_X),
490			     PRED_SEL(SQ_PRED_SEL_OFF),
491			     LAST(0));
492    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
493				 SRC1_ABS(0),
494				 UPDATE_EXECUTE_MASK(0),
495				 UPDATE_PRED(0),
496				 WRITE_MASK(1),
497				 OMOD(SQ_ALU_OMOD_OFF),
498				 ALU_INST(SQ_OP2_INST_INTERP_XY),
499				 BANK_SWIZZLE(SQ_ALU_VEC_210),
500				 DST_GPR(0),
501				 DST_REL(ABSOLUTE),
502				 DST_ELEM(ELEM_Y),
503				 CLAMP(0));
504    /* 6 */
505    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
506			     SRC0_REL(ABSOLUTE),
507			     SRC0_ELEM(ELEM_Y),
508			     SRC0_NEG(0),
509			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
510			     SRC1_REL(ABSOLUTE),
511			     SRC1_ELEM(ELEM_X),
512			     SRC1_NEG(0),
513			     INDEX_MODE(SQ_INDEX_AR_X),
514			     PRED_SEL(SQ_PRED_SEL_OFF),
515			     LAST(0));
516    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
517				 SRC1_ABS(0),
518				 UPDATE_EXECUTE_MASK(0),
519				 UPDATE_PRED(0),
520				 WRITE_MASK(0),
521				 OMOD(SQ_ALU_OMOD_OFF),
522				 ALU_INST(SQ_OP2_INST_INTERP_XY),
523				 BANK_SWIZZLE(SQ_ALU_VEC_210),
524				 DST_GPR(0),
525				 DST_REL(ABSOLUTE),
526				 DST_ELEM(ELEM_Z),
527				 CLAMP(0));
528    /* 7 */
529    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
530			     SRC0_REL(ABSOLUTE),
531			     SRC0_ELEM(ELEM_X),
532			     SRC0_NEG(0),
533			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
534			     SRC1_REL(ABSOLUTE),
535			     SRC1_ELEM(ELEM_X),
536			     SRC1_NEG(0),
537			     INDEX_MODE(SQ_INDEX_AR_X),
538			     PRED_SEL(SQ_PRED_SEL_OFF),
539			     LAST(1));
540    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
541				 SRC1_ABS(0),
542				 UPDATE_EXECUTE_MASK(0),
543				 UPDATE_PRED(0),
544				 WRITE_MASK(0),
545				 OMOD(SQ_ALU_OMOD_OFF),
546				 ALU_INST(SQ_OP2_INST_INTERP_XY),
547				 BANK_SWIZZLE(SQ_ALU_VEC_210),
548				 DST_GPR(0),
549				 DST_REL(ABSOLUTE),
550				 DST_ELEM(ELEM_W),
551				 CLAMP(0));
552
553    /* 8/9 TEX INST 0 */
554    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
555			     INST_MOD(0),
556			     FETCH_WHOLE_QUAD(0),
557			     RESOURCE_ID(0),
558			     SRC_GPR(0),
559			     SRC_REL(ABSOLUTE),
560			     ALT_CONST(0),
561			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
562			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
563    shader[i++] = TEX_DWORD1(DST_GPR(0),
564			     DST_REL(ABSOLUTE),
565			     DST_SEL_X(SQ_SEL_X), /* R */
566			     DST_SEL_Y(SQ_SEL_Y), /* G */
567			     DST_SEL_Z(SQ_SEL_Z), /* B */
568			     DST_SEL_W(SQ_SEL_W), /* A */
569			     LOD_BIAS(0),
570			     COORD_TYPE_X(TEX_UNNORMALIZED),
571			     COORD_TYPE_Y(TEX_UNNORMALIZED),
572			     COORD_TYPE_Z(TEX_UNNORMALIZED),
573			     COORD_TYPE_W(TEX_UNNORMALIZED));
574    shader[i++] = TEX_DWORD2(OFFSET_X(0),
575			     OFFSET_Y(0),
576			     OFFSET_Z(0),
577			     SAMPLER_ID(0),
578			     SRC_SEL_X(SQ_SEL_X),
579			     SRC_SEL_Y(SQ_SEL_Y),
580			     SRC_SEL_Z(SQ_SEL_0),
581			     SRC_SEL_W(SQ_SEL_1));
582    shader[i++] = TEX_DWORD_PAD;
583
584    return i;
585}
586
587int cayman_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
588{
589    int i = 0;
590
591    /* 0 */
592    shader[i++] = CF_DWORD0(ADDR(8),
593			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
594    shader[i++] = CF_DWORD1(POP_COUNT(0),
595                            CF_CONST(0),
596                            COND(SQ_CF_COND_ACTIVE),
597                            I_COUNT(2),
598                            VALID_PIXEL_MODE(0),
599                            CF_INST(SQ_CF_INST_TC),
600                            BARRIER(1));
601
602    /* 1 - ALU */
603    shader[i++] = CF_ALU_DWORD0(ADDR(5),
604				KCACHE_BANK0(0),
605				KCACHE_BANK1(0),
606				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
607    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
608				KCACHE_ADDR0(0),
609				KCACHE_ADDR1(0),
610				I_COUNT(2),
611				ALT_CONST(0),
612				CF_INST(SQ_CF_INST_ALU),
613				WHOLE_QUAD_MODE(0),
614				BARRIER(1));
615
616    /* 2 */
617    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
618                                          TYPE(SQ_EXPORT_POS),
619                                          RW_GPR(1),
620                                          RW_REL(ABSOLUTE),
621                                          INDEX_GPR(0),
622                                          ELEM_SIZE(3));
623    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
624                                               SRC_SEL_Y(SQ_SEL_Y),
625                                               SRC_SEL_Z(SQ_SEL_Z),
626                                               SRC_SEL_W(SQ_SEL_W),
627                                               BURST_COUNT(1),
628                                               VALID_PIXEL_MODE(0),
629                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
630                                               MARK(0),
631                                               BARRIER(1));
632    /* 3 */
633    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
634                                          TYPE(SQ_EXPORT_PARAM),
635                                          RW_GPR(0),
636                                          RW_REL(ABSOLUTE),
637                                          INDEX_GPR(0),
638                                          ELEM_SIZE(3));
639    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
640                                               SRC_SEL_Y(SQ_SEL_Y),
641                                               SRC_SEL_Z(SQ_SEL_Z),
642                                               SRC_SEL_W(SQ_SEL_W),
643                                               BURST_COUNT(1),
644                                               VALID_PIXEL_MODE(0),
645                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
646                                               MARK(0),
647                                               BARRIER(0));
648    /* 4 - end */
649    shader[i++] = CF_DWORD0(ADDR(0),
650			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
651    shader[i++] = CF_DWORD1(POP_COUNT(0),
652			    CF_CONST(0),
653			    COND(SQ_CF_COND_ACTIVE),
654			    I_COUNT(0),
655			    VALID_PIXEL_MODE(0),
656			    CF_INST(SQ_CF_INST_END),
657			    BARRIER(1));
658    /* 5 texX / w */
659    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
660                             SRC0_REL(ABSOLUTE),
661                             SRC0_ELEM(ELEM_X),
662                             SRC0_NEG(0),
663                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
664                             SRC1_REL(ABSOLUTE),
665                             SRC1_ELEM(ELEM_X),
666                             SRC1_NEG(0),
667                             INDEX_MODE(SQ_INDEX_AR_X),
668                             PRED_SEL(SQ_PRED_SEL_OFF),
669                             LAST(0));
670    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
671                                 SRC1_ABS(0),
672                                 UPDATE_EXECUTE_MASK(0),
673                                 UPDATE_PRED(0),
674                                 WRITE_MASK(1),
675                                 OMOD(SQ_ALU_OMOD_OFF),
676                                 ALU_INST(SQ_OP2_INST_MUL),
677                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
678                                 DST_GPR(0),
679                                 DST_REL(ABSOLUTE),
680                                 DST_ELEM(ELEM_X),
681                                 CLAMP(0));
682
683    /* 6 texY / h */
684    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
685                             SRC0_REL(ABSOLUTE),
686                             SRC0_ELEM(ELEM_Y),
687                             SRC0_NEG(0),
688                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
689                             SRC1_REL(ABSOLUTE),
690                             SRC1_ELEM(ELEM_Y),
691                             SRC1_NEG(0),
692                             INDEX_MODE(SQ_INDEX_AR_X),
693                             PRED_SEL(SQ_PRED_SEL_OFF),
694                             LAST(1));
695    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
696                                 SRC1_ABS(0),
697                                 UPDATE_EXECUTE_MASK(0),
698                                 UPDATE_PRED(0),
699                                 WRITE_MASK(1),
700                                 OMOD(SQ_ALU_OMOD_OFF),
701                                 ALU_INST(SQ_OP2_INST_MUL),
702                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
703                                 DST_GPR(0),
704                                 DST_REL(ABSOLUTE),
705                                 DST_ELEM(ELEM_Y),
706                                 CLAMP(0));
707
708    /* 7 - padding */
709    shader[i++] = 0x00000000;
710    shader[i++] = 0x00000000;
711    /* 8/9 */
712    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
713                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
714                             FETCH_WHOLE_QUAD(0),
715                             BUFFER_ID(0),
716                             SRC_GPR(0),
717                             SRC_REL(ABSOLUTE),
718                             SRC_SEL_X(SQ_SEL_X),
719			     SRC_SEL_Y(SQ_SEL_Y),
720			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
721			     LDS_REQ(0),
722			     COALESCED_READ(0));
723    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
724                                 DST_REL(ABSOLUTE),
725                                 DST_SEL_X(SQ_SEL_X),
726                                 DST_SEL_Y(SQ_SEL_Y),
727                                 DST_SEL_Z(SQ_SEL_0),
728                                 DST_SEL_W(SQ_SEL_1),
729                                 USE_CONST_FIELDS(0),
730                                 DATA_FORMAT(FMT_32_32_FLOAT),
731                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
732                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
733                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
734    shader[i++] = VTX_DWORD2(OFFSET(0),
735#if X_BYTE_ORDER == X_BIG_ENDIAN
736                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
737#else
738                             ENDIAN_SWAP(ENDIAN_NONE),
739#endif
740                             CONST_BUF_NO_STRIDE(0),
741			     ALT_CONST(0),
742			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
743    shader[i++] = VTX_DWORD_PAD;
744    /* 10/11 */
745    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
746                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
747                             FETCH_WHOLE_QUAD(0),
748                             BUFFER_ID(0),
749                             SRC_GPR(0),
750                             SRC_REL(ABSOLUTE),
751                             SRC_SEL_X(SQ_SEL_X),
752			     SRC_SEL_Y(SQ_SEL_Y),
753			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
754			     LDS_REQ(0),
755			     COALESCED_READ(0));
756    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
757                                 DST_REL(ABSOLUTE),
758                                 DST_SEL_X(SQ_SEL_X),
759                                 DST_SEL_Y(SQ_SEL_Y),
760                                 DST_SEL_Z(SQ_SEL_0),
761                                 DST_SEL_W(SQ_SEL_1),
762                                 USE_CONST_FIELDS(0),
763                                 DATA_FORMAT(FMT_32_32_FLOAT),
764                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
765                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
766                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
767    shader[i++] = VTX_DWORD2(OFFSET(8),
768#if X_BYTE_ORDER == X_BIG_ENDIAN
769                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
770#else
771                             ENDIAN_SWAP(ENDIAN_NONE),
772#endif
773                             CONST_BUF_NO_STRIDE(0),
774			     ALT_CONST(0),
775			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
776    shader[i++] = VTX_DWORD_PAD;
777
778    return i;
779}
780
781int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
782{
783    int i = 0;
784
785    /* 0 */
786    shader[i++] = CF_ALU_DWORD0(ADDR(6),
787				KCACHE_BANK0(0),
788				KCACHE_BANK1(0),
789				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
790    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
791				KCACHE_ADDR0(0),
792				KCACHE_ADDR1(0),
793				I_COUNT(4),
794				ALT_CONST(0),
795				CF_INST(SQ_CF_INST_ALU),
796				WHOLE_QUAD_MODE(0),
797				BARRIER(1));
798    /* 1 */
799    shader[i++] = CF_DWORD0(ADDR(22),
800			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
801    shader[i++] = CF_DWORD1(POP_COUNT(0),
802                            CF_CONST(0),
803                            COND(SQ_CF_COND_BOOL),
804                            I_COUNT(0),
805                            VALID_PIXEL_MODE(0),
806                            CF_INST(SQ_CF_INST_CALL),
807                            BARRIER(0));
808    /* 2 */
809    shader[i++] = CF_DWORD0(ADDR(30),
810			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
811    shader[i++] = CF_DWORD1(POP_COUNT(0),
812                            CF_CONST(0),
813                            COND(SQ_CF_COND_NOT_BOOL),
814                            I_COUNT(0),
815                            VALID_PIXEL_MODE(0),
816                            CF_INST(SQ_CF_INST_CALL),
817                            BARRIER(0));
818    /* 3 */
819    shader[i++] = CF_ALU_DWORD0(ADDR(10),
820                                KCACHE_BANK0(0),
821                                KCACHE_BANK1(0),
822                                KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
823    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
824                                KCACHE_ADDR0(0),
825                                KCACHE_ADDR1(0),
826                                I_COUNT(12),
827                                ALT_CONST(0),
828                                CF_INST(SQ_CF_INST_ALU),
829				WHOLE_QUAD_MODE(0),
830                                BARRIER(1));
831    /* 4 */
832    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
833                                          TYPE(SQ_EXPORT_PIXEL),
834                                          RW_GPR(2),
835                                          RW_REL(ABSOLUTE),
836                                          INDEX_GPR(0),
837                                          ELEM_SIZE(3));
838    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
839                                               SRC_SEL_Y(SQ_SEL_Y),
840                                               SRC_SEL_Z(SQ_SEL_Z),
841                                               SRC_SEL_W(SQ_SEL_W),
842                                               BURST_COUNT(1),
843                                               VALID_PIXEL_MODE(0),
844                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
845                                               MARK(0),
846                                               BARRIER(1));
847    /* 5 - end */
848    shader[i++] = CF_DWORD0(ADDR(0),
849			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
850    shader[i++] = CF_DWORD1(POP_COUNT(0),
851			    CF_CONST(0),
852			    COND(SQ_CF_COND_ACTIVE),
853			    I_COUNT(0),
854			    VALID_PIXEL_MODE(0),
855			    CF_INST(SQ_CF_INST_END),
856			    BARRIER(1));
857    /* 6 interpolate tex coords */
858    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
859			     SRC0_REL(ABSOLUTE),
860			     SRC0_ELEM(ELEM_Y),
861			     SRC0_NEG(0),
862			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
863			     SRC1_REL(ABSOLUTE),
864			     SRC1_ELEM(ELEM_X),
865			     SRC1_NEG(0),
866			     INDEX_MODE(SQ_INDEX_AR_X),
867			     PRED_SEL(SQ_PRED_SEL_OFF),
868			     LAST(0));
869    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
870				 SRC1_ABS(0),
871				 UPDATE_EXECUTE_MASK(0),
872				 UPDATE_PRED(0),
873				 WRITE_MASK(1),
874				 OMOD(SQ_ALU_OMOD_OFF),
875				 ALU_INST(SQ_OP2_INST_INTERP_XY),
876				 BANK_SWIZZLE(SQ_ALU_VEC_210),
877				 DST_GPR(0),
878				 DST_REL(ABSOLUTE),
879				 DST_ELEM(ELEM_X),
880				 CLAMP(0));
881    /* 7 */
882    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
883			     SRC0_REL(ABSOLUTE),
884			     SRC0_ELEM(ELEM_X),
885			     SRC0_NEG(0),
886			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
887			     SRC1_REL(ABSOLUTE),
888			     SRC1_ELEM(ELEM_X),
889			     SRC1_NEG(0),
890			     INDEX_MODE(SQ_INDEX_AR_X),
891			     PRED_SEL(SQ_PRED_SEL_OFF),
892			     LAST(0));
893    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
894				 SRC1_ABS(0),
895				 UPDATE_EXECUTE_MASK(0),
896				 UPDATE_PRED(0),
897				 WRITE_MASK(1),
898				 OMOD(SQ_ALU_OMOD_OFF),
899				 ALU_INST(SQ_OP2_INST_INTERP_XY),
900				 BANK_SWIZZLE(SQ_ALU_VEC_210),
901				 DST_GPR(0),
902				 DST_REL(ABSOLUTE),
903				 DST_ELEM(ELEM_Y),
904				 CLAMP(0));
905    /* 8 */
906    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
907			     SRC0_REL(ABSOLUTE),
908			     SRC0_ELEM(ELEM_Y),
909			     SRC0_NEG(0),
910			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
911			     SRC1_REL(ABSOLUTE),
912			     SRC1_ELEM(ELEM_X),
913			     SRC1_NEG(0),
914			     INDEX_MODE(SQ_INDEX_AR_X),
915			     PRED_SEL(SQ_PRED_SEL_OFF),
916			     LAST(0));
917    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
918				 SRC1_ABS(0),
919				 UPDATE_EXECUTE_MASK(0),
920				 UPDATE_PRED(0),
921				 WRITE_MASK(0),
922				 OMOD(SQ_ALU_OMOD_OFF),
923				 ALU_INST(SQ_OP2_INST_INTERP_XY),
924				 BANK_SWIZZLE(SQ_ALU_VEC_210),
925				 DST_GPR(0),
926				 DST_REL(ABSOLUTE),
927				 DST_ELEM(ELEM_Z),
928				 CLAMP(0));
929    /* 9 */
930    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
931			     SRC0_REL(ABSOLUTE),
932			     SRC0_ELEM(ELEM_X),
933			     SRC0_NEG(0),
934			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
935			     SRC1_REL(ABSOLUTE),
936			     SRC1_ELEM(ELEM_X),
937			     SRC1_NEG(0),
938			     INDEX_MODE(SQ_INDEX_AR_X),
939			     PRED_SEL(SQ_PRED_SEL_OFF),
940			     LAST(1));
941    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
942				 SRC1_ABS(0),
943				 UPDATE_EXECUTE_MASK(0),
944				 UPDATE_PRED(0),
945				 WRITE_MASK(0),
946				 OMOD(SQ_ALU_OMOD_OFF),
947				 ALU_INST(SQ_OP2_INST_INTERP_XY),
948				 BANK_SWIZZLE(SQ_ALU_VEC_210),
949				 DST_GPR(0),
950				 DST_REL(ABSOLUTE),
951				 DST_ELEM(ELEM_W),
952				 CLAMP(0));
953
954    /* 10,11,12,13 */
955    /* r2.x = MAD(c0.w, r1.x, c0.x) */
956    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
957                             SRC0_REL(ABSOLUTE),
958                             SRC0_ELEM(ELEM_W),
959                             SRC0_NEG(0),
960                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
961                             SRC1_REL(ABSOLUTE),
962                             SRC1_ELEM(ELEM_X),
963                             SRC1_NEG(0),
964                             INDEX_MODE(SQ_INDEX_LOOP),
965                             PRED_SEL(SQ_PRED_SEL_OFF),
966                             LAST(0));
967    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
968                                 SRC2_REL(ABSOLUTE),
969                                 SRC2_ELEM(ELEM_X),
970                                 SRC2_NEG(0),
971                                 ALU_INST(SQ_OP3_INST_MULADD),
972                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
973                                 DST_GPR(2),
974                                 DST_REL(ABSOLUTE),
975                                 DST_ELEM(ELEM_X),
976                                 CLAMP(0));
977    /* r2.y = MAD(c0.w, r1.x, c0.y) */
978    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
979                             SRC0_REL(ABSOLUTE),
980                             SRC0_ELEM(ELEM_W),
981                             SRC0_NEG(0),
982                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
983                             SRC1_REL(ABSOLUTE),
984                             SRC1_ELEM(ELEM_X),
985                             SRC1_NEG(0),
986                             INDEX_MODE(SQ_INDEX_LOOP),
987                             PRED_SEL(SQ_PRED_SEL_OFF),
988                             LAST(0));
989    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
990                                 SRC2_REL(ABSOLUTE),
991                                 SRC2_ELEM(ELEM_Y),
992                                 SRC2_NEG(0),
993                                 ALU_INST(SQ_OP3_INST_MULADD),
994                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
995                                 DST_GPR(2),
996                                 DST_REL(ABSOLUTE),
997                                 DST_ELEM(ELEM_Y),
998                                 CLAMP(0));
999    /* r2.z = MAD(c0.w, r1.x, c0.z) */
1000    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
1001                             SRC0_REL(ABSOLUTE),
1002                             SRC0_ELEM(ELEM_W),
1003                             SRC0_NEG(0),
1004                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1005                             SRC1_REL(ABSOLUTE),
1006                             SRC1_ELEM(ELEM_X),
1007                             SRC1_NEG(0),
1008                             INDEX_MODE(SQ_INDEX_LOOP),
1009                             PRED_SEL(SQ_PRED_SEL_OFF),
1010                             LAST(0));
1011    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
1012                                 SRC2_REL(ABSOLUTE),
1013                                 SRC2_ELEM(ELEM_Z),
1014                                 SRC2_NEG(0),
1015                                 ALU_INST(SQ_OP3_INST_MULADD),
1016                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1017                                 DST_GPR(2),
1018                                 DST_REL(ABSOLUTE),
1019                                 DST_ELEM(ELEM_Z),
1020                                 CLAMP(0));
1021    /* r2.w = MAD(0, 0, 1) */
1022    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1023                             SRC0_REL(ABSOLUTE),
1024                             SRC0_ELEM(ELEM_X),
1025                             SRC0_NEG(0),
1026                             SRC1_SEL(SQ_ALU_SRC_0),
1027                             SRC1_REL(ABSOLUTE),
1028                             SRC1_ELEM(ELEM_X),
1029                             SRC1_NEG(0),
1030                             INDEX_MODE(SQ_INDEX_LOOP),
1031                             PRED_SEL(SQ_PRED_SEL_OFF),
1032                             LAST(1));
1033    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1034                                 SRC2_REL(ABSOLUTE),
1035                                 SRC2_ELEM(ELEM_X),
1036                                 SRC2_NEG(0),
1037                                 ALU_INST(SQ_OP3_INST_MULADD),
1038                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1039                                 DST_GPR(2),
1040                                 DST_REL(ABSOLUTE),
1041                                 DST_ELEM(ELEM_W),
1042                                 CLAMP(0));
1043
1044    /* 14,15,16,17 */
1045    /* r2.x = MAD(c1.x, r1.y, pv.x) */
1046    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1047                             SRC0_REL(ABSOLUTE),
1048                             SRC0_ELEM(ELEM_X),
1049                             SRC0_NEG(0),
1050                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1051                             SRC1_REL(ABSOLUTE),
1052                             SRC1_ELEM(ELEM_Y),
1053                             SRC1_NEG(0),
1054                             INDEX_MODE(SQ_INDEX_LOOP),
1055                             PRED_SEL(SQ_PRED_SEL_OFF),
1056                             LAST(0));
1057    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1058                                 SRC2_REL(ABSOLUTE),
1059                                 SRC2_ELEM(ELEM_X),
1060                                 SRC2_NEG(0),
1061                                 ALU_INST(SQ_OP3_INST_MULADD),
1062                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1063                                 DST_GPR(2),
1064                                 DST_REL(ABSOLUTE),
1065                                 DST_ELEM(ELEM_X),
1066                                 CLAMP(0));
1067    /* r2.y = MAD(c1.y, r1.y, pv.y) */
1068    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1069                             SRC0_REL(ABSOLUTE),
1070                             SRC0_ELEM(ELEM_Y),
1071                             SRC0_NEG(0),
1072                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1073                             SRC1_REL(ABSOLUTE),
1074                             SRC1_ELEM(ELEM_Y),
1075                             SRC1_NEG(0),
1076                             INDEX_MODE(SQ_INDEX_LOOP),
1077                             PRED_SEL(SQ_PRED_SEL_OFF),
1078                             LAST(0));
1079    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1080                                 SRC2_REL(ABSOLUTE),
1081                                 SRC2_ELEM(ELEM_Y),
1082                                 SRC2_NEG(0),
1083                                 ALU_INST(SQ_OP3_INST_MULADD),
1084                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1085                                 DST_GPR(2),
1086                                 DST_REL(ABSOLUTE),
1087                                 DST_ELEM(ELEM_Y),
1088                                 CLAMP(0));
1089    /* r2.z = MAD(c1.z, r1.y, pv.z) */
1090    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1091                             SRC0_REL(ABSOLUTE),
1092                             SRC0_ELEM(ELEM_Z),
1093                             SRC0_NEG(0),
1094                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1095                             SRC1_REL(ABSOLUTE),
1096                             SRC1_ELEM(ELEM_Y),
1097                             SRC1_NEG(0),
1098                             INDEX_MODE(SQ_INDEX_LOOP),
1099                             PRED_SEL(SQ_PRED_SEL_OFF),
1100                             LAST(0));
1101    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1102                                 SRC2_REL(ABSOLUTE),
1103                                 SRC2_ELEM(ELEM_Z),
1104                                 SRC2_NEG(0),
1105                                 ALU_INST(SQ_OP3_INST_MULADD),
1106                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1107                                 DST_GPR(2),
1108                                 DST_REL(ABSOLUTE),
1109                                 DST_ELEM(ELEM_Z),
1110                                 CLAMP(0));
1111    /* r2.w = MAD(0, 0, 1) */
1112    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1113                             SRC0_REL(ABSOLUTE),
1114                             SRC0_ELEM(ELEM_X),
1115                             SRC0_NEG(0),
1116                             SRC1_SEL(SQ_ALU_SRC_0),
1117                             SRC1_REL(ABSOLUTE),
1118                             SRC1_ELEM(ELEM_X),
1119                             SRC1_NEG(0),
1120                             INDEX_MODE(SQ_INDEX_LOOP),
1121                             PRED_SEL(SQ_PRED_SEL_OFF),
1122                             LAST(1));
1123    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1124                                 SRC2_REL(ABSOLUTE),
1125                                 SRC2_ELEM(ELEM_W),
1126                                 SRC2_NEG(0),
1127                                 ALU_INST(SQ_OP3_INST_MULADD),
1128                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1129                                 DST_GPR(2),
1130                                 DST_REL(ABSOLUTE),
1131                                 DST_ELEM(ELEM_W),
1132                                 CLAMP(0));
1133    /* 18,19,20,21 */
1134    /* r2.x = MAD(c2.x, r1.z, pv.x) */
1135    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1136                             SRC0_REL(ABSOLUTE),
1137                             SRC0_ELEM(ELEM_X),
1138                             SRC0_NEG(0),
1139                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1140                             SRC1_REL(ABSOLUTE),
1141                             SRC1_ELEM(ELEM_Z),
1142                             SRC1_NEG(0),
1143                             INDEX_MODE(SQ_INDEX_LOOP),
1144                             PRED_SEL(SQ_PRED_SEL_OFF),
1145                             LAST(0));
1146    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1147                                 SRC2_REL(ABSOLUTE),
1148                                 SRC2_ELEM(ELEM_X),
1149                                 SRC2_NEG(0),
1150                                 ALU_INST(SQ_OP3_INST_MULADD),
1151                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1152                                 DST_GPR(2),
1153                                 DST_REL(ABSOLUTE),
1154                                 DST_ELEM(ELEM_X),
1155                                 CLAMP(1));
1156    /* r2.y = MAD(c2.y, r1.z, pv.y) */
1157    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1158                             SRC0_REL(ABSOLUTE),
1159                             SRC0_ELEM(ELEM_Y),
1160                             SRC0_NEG(0),
1161                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1162                             SRC1_REL(ABSOLUTE),
1163                             SRC1_ELEM(ELEM_Z),
1164                             SRC1_NEG(0),
1165                             INDEX_MODE(SQ_INDEX_LOOP),
1166                             PRED_SEL(SQ_PRED_SEL_OFF),
1167                             LAST(0));
1168    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1169                                 SRC2_REL(ABSOLUTE),
1170                                 SRC2_ELEM(ELEM_Y),
1171                                 SRC2_NEG(0),
1172                                 ALU_INST(SQ_OP3_INST_MULADD),
1173                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1174                                 DST_GPR(2),
1175                                 DST_REL(ABSOLUTE),
1176                                 DST_ELEM(ELEM_Y),
1177                                 CLAMP(1));
1178    /* r2.z = MAD(c2.z, r1.z, pv.z) */
1179    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1180                             SRC0_REL(ABSOLUTE),
1181                             SRC0_ELEM(ELEM_Z),
1182                             SRC0_NEG(0),
1183                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1184                             SRC1_REL(ABSOLUTE),
1185                             SRC1_ELEM(ELEM_Z),
1186                             SRC1_NEG(0),
1187                             INDEX_MODE(SQ_INDEX_LOOP),
1188                             PRED_SEL(SQ_PRED_SEL_OFF),
1189                             LAST(0));
1190    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1191                                 SRC2_REL(ABSOLUTE),
1192                                 SRC2_ELEM(ELEM_Z),
1193                                 SRC2_NEG(0),
1194                                 ALU_INST(SQ_OP3_INST_MULADD),
1195                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1196                                 DST_GPR(2),
1197                                 DST_REL(ABSOLUTE),
1198                                 DST_ELEM(ELEM_Z),
1199                                 CLAMP(1));
1200    /* r2.w = MAD(0, 0, 1) */
1201    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1202                             SRC0_REL(ABSOLUTE),
1203                             SRC0_ELEM(ELEM_X),
1204                             SRC0_NEG(0),
1205                             SRC1_SEL(SQ_ALU_SRC_0),
1206                             SRC1_REL(ABSOLUTE),
1207                             SRC1_ELEM(ELEM_X),
1208                             SRC1_NEG(0),
1209                             INDEX_MODE(SQ_INDEX_LOOP),
1210                             PRED_SEL(SQ_PRED_SEL_OFF),
1211                             LAST(1));
1212    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1213                                 SRC2_REL(ABSOLUTE),
1214                                 SRC2_ELEM(ELEM_X),
1215                                 SRC2_NEG(0),
1216                                 ALU_INST(SQ_OP3_INST_MULADD),
1217                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1218                                 DST_GPR(2),
1219                                 DST_REL(ABSOLUTE),
1220                                 DST_ELEM(ELEM_W),
1221                                 CLAMP(1));
1222
1223    /* 22 */
1224    shader[i++] = CF_DWORD0(ADDR(24),
1225			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1226    shader[i++] = CF_DWORD1(POP_COUNT(0),
1227                            CF_CONST(0),
1228                            COND(SQ_CF_COND_ACTIVE),
1229                            I_COUNT(3),
1230                            VALID_PIXEL_MODE(0),
1231                            CF_INST(SQ_CF_INST_TC),
1232                            BARRIER(1));
1233    /* 23 */
1234    shader[i++] = CF_DWORD0(ADDR(0),
1235			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1236    shader[i++] = CF_DWORD1(POP_COUNT(0),
1237			    CF_CONST(0),
1238			    COND(SQ_CF_COND_ACTIVE),
1239			    I_COUNT(0),
1240			    VALID_PIXEL_MODE(0),
1241			    CF_INST(SQ_CF_INST_RETURN),
1242			    BARRIER(1));
1243    /* 24/25 */
1244    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1245                             INST_MOD(0),
1246                             FETCH_WHOLE_QUAD(0),
1247                             RESOURCE_ID(0),
1248                             SRC_GPR(0),
1249                             SRC_REL(ABSOLUTE),
1250                             ALT_CONST(0),
1251			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1252			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1253    shader[i++] = TEX_DWORD1(DST_GPR(1),
1254                             DST_REL(ABSOLUTE),
1255                             DST_SEL_X(SQ_SEL_X),
1256                             DST_SEL_Y(SQ_SEL_MASK),
1257                             DST_SEL_Z(SQ_SEL_MASK),
1258                             DST_SEL_W(SQ_SEL_1),
1259                             LOD_BIAS(0),
1260                             COORD_TYPE_X(TEX_NORMALIZED),
1261                             COORD_TYPE_Y(TEX_NORMALIZED),
1262                             COORD_TYPE_Z(TEX_NORMALIZED),
1263                             COORD_TYPE_W(TEX_NORMALIZED));
1264    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1265                             OFFSET_Y(0),
1266                             OFFSET_Z(0),
1267                             SAMPLER_ID(0),
1268                             SRC_SEL_X(SQ_SEL_X),
1269                             SRC_SEL_Y(SQ_SEL_Y),
1270                             SRC_SEL_Z(SQ_SEL_0),
1271                             SRC_SEL_W(SQ_SEL_1));
1272    shader[i++] = TEX_DWORD_PAD;
1273    /* 26/27 */
1274    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1275                             INST_MOD(0),
1276                             FETCH_WHOLE_QUAD(0),
1277                             RESOURCE_ID(1),
1278                             SRC_GPR(0),
1279                             SRC_REL(ABSOLUTE),
1280                             ALT_CONST(0),
1281			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1282			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1283    shader[i++] = TEX_DWORD1(DST_GPR(1),
1284                             DST_REL(ABSOLUTE),
1285                             DST_SEL_X(SQ_SEL_MASK),
1286                             DST_SEL_Y(SQ_SEL_MASK),
1287                             DST_SEL_Z(SQ_SEL_X),
1288                             DST_SEL_W(SQ_SEL_MASK),
1289                             LOD_BIAS(0),
1290                             COORD_TYPE_X(TEX_NORMALIZED),
1291                             COORD_TYPE_Y(TEX_NORMALIZED),
1292                             COORD_TYPE_Z(TEX_NORMALIZED),
1293                             COORD_TYPE_W(TEX_NORMALIZED));
1294    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1295                             OFFSET_Y(0),
1296                             OFFSET_Z(0),
1297                             SAMPLER_ID(1),
1298                             SRC_SEL_X(SQ_SEL_X),
1299                             SRC_SEL_Y(SQ_SEL_Y),
1300                             SRC_SEL_Z(SQ_SEL_0),
1301                             SRC_SEL_W(SQ_SEL_1));
1302    shader[i++] = TEX_DWORD_PAD;
1303    /* 28/29 */
1304    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1305                             INST_MOD(0),
1306                             FETCH_WHOLE_QUAD(0),
1307                             RESOURCE_ID(2),
1308                             SRC_GPR(0),
1309                             SRC_REL(ABSOLUTE),
1310                             ALT_CONST(0),
1311			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1312			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1313    shader[i++] = TEX_DWORD1(DST_GPR(1),
1314                             DST_REL(ABSOLUTE),
1315                             DST_SEL_X(SQ_SEL_MASK),
1316                             DST_SEL_Y(SQ_SEL_X),
1317                             DST_SEL_Z(SQ_SEL_MASK),
1318                             DST_SEL_W(SQ_SEL_MASK),
1319                             LOD_BIAS(0),
1320                             COORD_TYPE_X(TEX_NORMALIZED),
1321                             COORD_TYPE_Y(TEX_NORMALIZED),
1322                             COORD_TYPE_Z(TEX_NORMALIZED),
1323                             COORD_TYPE_W(TEX_NORMALIZED));
1324    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1325                             OFFSET_Y(0),
1326                             OFFSET_Z(0),
1327                             SAMPLER_ID(2),
1328                             SRC_SEL_X(SQ_SEL_X),
1329                             SRC_SEL_Y(SQ_SEL_Y),
1330                             SRC_SEL_Z(SQ_SEL_0),
1331                             SRC_SEL_W(SQ_SEL_1));
1332    shader[i++] = TEX_DWORD_PAD;
1333    /* 30 */
1334    shader[i++] = CF_DWORD0(ADDR(32),
1335			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1336    shader[i++] = CF_DWORD1(POP_COUNT(0),
1337                            CF_CONST(0),
1338                            COND(SQ_CF_COND_ACTIVE),
1339                            I_COUNT(1),
1340                            VALID_PIXEL_MODE(0),
1341                            CF_INST(SQ_CF_INST_TC),
1342                            BARRIER(1));
1343    /* 31 */
1344    shader[i++] = CF_DWORD0(ADDR(0),
1345			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1346    shader[i++] = CF_DWORD1(POP_COUNT(0),
1347			    CF_CONST(0),
1348			    COND(SQ_CF_COND_ACTIVE),
1349			    I_COUNT(0),
1350			    VALID_PIXEL_MODE(0),
1351			    CF_INST(SQ_CF_INST_RETURN),
1352			    BARRIER(1));
1353    /* 32/33 */
1354    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1355                             INST_MOD(0),
1356                             FETCH_WHOLE_QUAD(0),
1357                             RESOURCE_ID(0),
1358                             SRC_GPR(0),
1359                             SRC_REL(ABSOLUTE),
1360                             ALT_CONST(0),
1361                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1362                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1363    shader[i++] = TEX_DWORD1(DST_GPR(1),
1364                             DST_REL(ABSOLUTE),
1365                             DST_SEL_X(SQ_SEL_X),
1366                             DST_SEL_Y(SQ_SEL_Y),
1367                             DST_SEL_Z(SQ_SEL_Z),
1368                             DST_SEL_W(SQ_SEL_1),
1369                             LOD_BIAS(0),
1370                             COORD_TYPE_X(TEX_NORMALIZED),
1371                             COORD_TYPE_Y(TEX_NORMALIZED),
1372                             COORD_TYPE_Z(TEX_NORMALIZED),
1373                             COORD_TYPE_W(TEX_NORMALIZED));
1374    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1375                             OFFSET_Y(0),
1376                             OFFSET_Z(0),
1377                             SAMPLER_ID(0),
1378                             SRC_SEL_X(SQ_SEL_X),
1379                             SRC_SEL_Y(SQ_SEL_Y),
1380                             SRC_SEL_Z(SQ_SEL_0),
1381                             SRC_SEL_W(SQ_SEL_1));
1382    shader[i++] = TEX_DWORD_PAD;
1383
1384    return i;
1385}
1386
1387/* comp vs --------------------------------------- */
1388int cayman_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1389{
1390    int i = 0;
1391
1392    /* 0 */
1393    shader[i++] = CF_DWORD0(ADDR(3),
1394			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1395    shader[i++] = CF_DWORD1(POP_COUNT(0),
1396                            CF_CONST(0),
1397                            COND(SQ_CF_COND_BOOL),
1398                            I_COUNT(0),
1399                            VALID_PIXEL_MODE(0),
1400                            CF_INST(SQ_CF_INST_CALL),
1401                            BARRIER(0));
1402    /* 1 */
1403    shader[i++] = CF_DWORD0(ADDR(9),
1404			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1405    shader[i++] = CF_DWORD1(POP_COUNT(0),
1406                            CF_CONST(0),
1407                            COND(SQ_CF_COND_NOT_BOOL),
1408                            I_COUNT(0),
1409                            VALID_PIXEL_MODE(0),
1410                            CF_INST(SQ_CF_INST_CALL),
1411                            BARRIER(0));
1412    /* 2 - end */
1413    shader[i++] = CF_DWORD0(ADDR(0),
1414			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1415    shader[i++] = CF_DWORD1(POP_COUNT(0),
1416			    CF_CONST(0),
1417			    COND(SQ_CF_COND_ACTIVE),
1418			    I_COUNT(0),
1419			    VALID_PIXEL_MODE(0),
1420			    CF_INST(SQ_CF_INST_END),
1421			    BARRIER(1));
1422    /* 3 - mask sub */
1423    shader[i++] = CF_DWORD0(ADDR(44),
1424			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1425    shader[i++] = CF_DWORD1(POP_COUNT(0),
1426			    CF_CONST(0),
1427			    COND(SQ_CF_COND_ACTIVE),
1428			    I_COUNT(3),
1429			    VALID_PIXEL_MODE(0),
1430			    CF_INST(SQ_CF_INST_TC),
1431			    BARRIER(1));
1432
1433    /* 4 - ALU */
1434    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1435				KCACHE_BANK0(0),
1436				KCACHE_BANK1(0),
1437				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1438    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1439				KCACHE_ADDR0(0),
1440				KCACHE_ADDR1(0),
1441				I_COUNT(20),
1442				ALT_CONST(0),
1443				CF_INST(SQ_CF_INST_ALU),
1444				WHOLE_QUAD_MODE(0),
1445				BARRIER(1));
1446
1447    /* 5 - dst */
1448    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1449					  TYPE(SQ_EXPORT_POS),
1450					  RW_GPR(2),
1451					  RW_REL(ABSOLUTE),
1452					  INDEX_GPR(0),
1453					  ELEM_SIZE(0));
1454    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1455					       SRC_SEL_Y(SQ_SEL_Y),
1456					       SRC_SEL_Z(SQ_SEL_0),
1457					       SRC_SEL_W(SQ_SEL_1),
1458					       BURST_COUNT(1),
1459					       VALID_PIXEL_MODE(0),
1460					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1461					       MARK(0),
1462					       BARRIER(1));
1463    /* 6 - src */
1464    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1465					  TYPE(SQ_EXPORT_PARAM),
1466					  RW_GPR(1),
1467					  RW_REL(ABSOLUTE),
1468					  INDEX_GPR(0),
1469					  ELEM_SIZE(0));
1470    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1471					       SRC_SEL_Y(SQ_SEL_Y),
1472					       SRC_SEL_Z(SQ_SEL_0),
1473					       SRC_SEL_W(SQ_SEL_1),
1474					       BURST_COUNT(1),
1475					       VALID_PIXEL_MODE(0),
1476					       CF_INST(SQ_CF_INST_EXPORT),
1477					       MARK(0),
1478					       BARRIER(0));
1479    /* 7 - mask */
1480    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1481					  TYPE(SQ_EXPORT_PARAM),
1482					  RW_GPR(0),
1483					  RW_REL(ABSOLUTE),
1484					  INDEX_GPR(0),
1485					  ELEM_SIZE(0));
1486    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1487					       SRC_SEL_Y(SQ_SEL_Y),
1488					       SRC_SEL_Z(SQ_SEL_0),
1489					       SRC_SEL_W(SQ_SEL_1),
1490					       BURST_COUNT(1),
1491					       VALID_PIXEL_MODE(0),
1492					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1493					       MARK(0),
1494					       BARRIER(0));
1495    /* 8 */
1496    shader[i++] = CF_DWORD0(ADDR(0),
1497			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1498    shader[i++] = CF_DWORD1(POP_COUNT(0),
1499			    CF_CONST(0),
1500			    COND(SQ_CF_COND_ACTIVE),
1501			    I_COUNT(0),
1502			    VALID_PIXEL_MODE(0),
1503			    CF_INST(SQ_CF_INST_RETURN),
1504			    BARRIER(1));
1505    /* 9 - non-mask sub */
1506    shader[i++] = CF_DWORD0(ADDR(50),
1507			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1508    shader[i++] = CF_DWORD1(POP_COUNT(0),
1509			    CF_CONST(0),
1510			    COND(SQ_CF_COND_ACTIVE),
1511			    I_COUNT(2),
1512			    VALID_PIXEL_MODE(0),
1513			    CF_INST(SQ_CF_INST_TC),
1514			    BARRIER(1));
1515
1516    /* 10 - ALU */
1517    shader[i++] = CF_ALU_DWORD0(ADDR(34),
1518				KCACHE_BANK0(0),
1519				KCACHE_BANK1(0),
1520				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1521    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1522				KCACHE_ADDR0(0),
1523				KCACHE_ADDR1(0),
1524				I_COUNT(10),
1525				ALT_CONST(0),
1526				CF_INST(SQ_CF_INST_ALU),
1527				WHOLE_QUAD_MODE(0),
1528				BARRIER(1));
1529
1530    /* 11 - dst */
1531    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1532					  TYPE(SQ_EXPORT_POS),
1533					  RW_GPR(1),
1534					  RW_REL(ABSOLUTE),
1535					  INDEX_GPR(0),
1536					  ELEM_SIZE(0));
1537    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1538					       SRC_SEL_Y(SQ_SEL_Y),
1539					       SRC_SEL_Z(SQ_SEL_0),
1540					       SRC_SEL_W(SQ_SEL_1),
1541					       BURST_COUNT(0),
1542					       VALID_PIXEL_MODE(0),
1543					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1544					       MARK(0),
1545					       BARRIER(1));
1546    /* 12 - src */
1547    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1548					  TYPE(SQ_EXPORT_PARAM),
1549					  RW_GPR(0),
1550					  RW_REL(ABSOLUTE),
1551					  INDEX_GPR(0),
1552					  ELEM_SIZE(0));
1553    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1554					       SRC_SEL_Y(SQ_SEL_Y),
1555					       SRC_SEL_Z(SQ_SEL_0),
1556					       SRC_SEL_W(SQ_SEL_1),
1557					       BURST_COUNT(0),
1558					       VALID_PIXEL_MODE(0),
1559					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1560					       MARK(0),
1561					       BARRIER(0));
1562    /* 13 */
1563    shader[i++] = CF_DWORD0(ADDR(0),
1564			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1565    shader[i++] = CF_DWORD1(POP_COUNT(0),
1566			    CF_CONST(0),
1567			    COND(SQ_CF_COND_ACTIVE),
1568			    I_COUNT(0),
1569			    VALID_PIXEL_MODE(0),
1570			    CF_INST(SQ_CF_INST_RETURN),
1571			    BARRIER(1));
1572
1573    /* 14 srcX.x DOT4 - mask */
1574    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1575                             SRC0_REL(ABSOLUTE),
1576                             SRC0_ELEM(ELEM_X),
1577                             SRC0_NEG(0),
1578                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1579                             SRC1_REL(ABSOLUTE),
1580                             SRC1_ELEM(ELEM_X),
1581                             SRC1_NEG(0),
1582                             INDEX_MODE(SQ_INDEX_LOOP),
1583                             PRED_SEL(SQ_PRED_SEL_OFF),
1584                             LAST(0));
1585    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1586                                 SRC1_ABS(0),
1587                                 UPDATE_EXECUTE_MASK(0),
1588                                 UPDATE_PRED(0),
1589                                 WRITE_MASK(1),
1590                                 OMOD(SQ_ALU_OMOD_OFF),
1591                                 ALU_INST(SQ_OP2_INST_DOT4),
1592                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1593                                 DST_GPR(3),
1594                                 DST_REL(ABSOLUTE),
1595                                 DST_ELEM(ELEM_X),
1596                                 CLAMP(0));
1597
1598    /* 15 srcX.y DOT4 - mask */
1599    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1600                             SRC0_REL(ABSOLUTE),
1601                             SRC0_ELEM(ELEM_Y),
1602                             SRC0_NEG(0),
1603                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1604                             SRC1_REL(ABSOLUTE),
1605                             SRC1_ELEM(ELEM_Y),
1606                             SRC1_NEG(0),
1607                             INDEX_MODE(SQ_INDEX_LOOP),
1608                             PRED_SEL(SQ_PRED_SEL_OFF),
1609                             LAST(0));
1610    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1611                                 SRC1_ABS(0),
1612                                 UPDATE_EXECUTE_MASK(0),
1613                                 UPDATE_PRED(0),
1614                                 WRITE_MASK(0),
1615                                 OMOD(SQ_ALU_OMOD_OFF),
1616                                 ALU_INST(SQ_OP2_INST_DOT4),
1617                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1618                                 DST_GPR(3),
1619                                 DST_REL(ABSOLUTE),
1620                                 DST_ELEM(ELEM_Y),
1621                                 CLAMP(0));
1622
1623    /* 16 srcX.z DOT4 - mask */
1624    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1625                             SRC0_REL(ABSOLUTE),
1626                             SRC0_ELEM(ELEM_Z),
1627                             SRC0_NEG(0),
1628                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1629                             SRC1_REL(ABSOLUTE),
1630                             SRC1_ELEM(ELEM_Z),
1631                             SRC1_NEG(0),
1632                             INDEX_MODE(SQ_INDEX_LOOP),
1633                             PRED_SEL(SQ_PRED_SEL_OFF),
1634                             LAST(0));
1635    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1636                                 SRC1_ABS(0),
1637                                 UPDATE_EXECUTE_MASK(0),
1638                                 UPDATE_PRED(0),
1639                                 WRITE_MASK(0),
1640                                 OMOD(SQ_ALU_OMOD_OFF),
1641                                 ALU_INST(SQ_OP2_INST_DOT4),
1642                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1643                                 DST_GPR(3),
1644                                 DST_REL(ABSOLUTE),
1645                                 DST_ELEM(ELEM_Z),
1646                                 CLAMP(0));
1647
1648    /* 17 srcX.w DOT4 - mask */
1649    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1650                             SRC0_REL(ABSOLUTE),
1651                             SRC0_ELEM(ELEM_W),
1652                             SRC0_NEG(0),
1653                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1654                             SRC1_REL(ABSOLUTE),
1655                             SRC1_ELEM(ELEM_W),
1656                             SRC1_NEG(0),
1657                             INDEX_MODE(SQ_INDEX_LOOP),
1658                             PRED_SEL(SQ_PRED_SEL_OFF),
1659                             LAST(1));
1660    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1661                                 SRC1_ABS(0),
1662                                 UPDATE_EXECUTE_MASK(0),
1663                                 UPDATE_PRED(0),
1664                                 WRITE_MASK(0),
1665                                 OMOD(SQ_ALU_OMOD_OFF),
1666                                 ALU_INST(SQ_OP2_INST_DOT4),
1667                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1668                                 DST_GPR(3),
1669                                 DST_REL(ABSOLUTE),
1670                                 DST_ELEM(ELEM_W),
1671                                 CLAMP(0));
1672
1673    /* 18 srcY.x DOT4 - mask */
1674    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1675                             SRC0_REL(ABSOLUTE),
1676                             SRC0_ELEM(ELEM_X),
1677                             SRC0_NEG(0),
1678                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1679                             SRC1_REL(ABSOLUTE),
1680                             SRC1_ELEM(ELEM_X),
1681                             SRC1_NEG(0),
1682                             INDEX_MODE(SQ_INDEX_LOOP),
1683                             PRED_SEL(SQ_PRED_SEL_OFF),
1684                             LAST(0));
1685    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1686                                 SRC1_ABS(0),
1687                                 UPDATE_EXECUTE_MASK(0),
1688                                 UPDATE_PRED(0),
1689                                 WRITE_MASK(0),
1690                                 OMOD(SQ_ALU_OMOD_OFF),
1691                                 ALU_INST(SQ_OP2_INST_DOT4),
1692                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1693                                 DST_GPR(3),
1694                                 DST_REL(ABSOLUTE),
1695                                 DST_ELEM(ELEM_X),
1696                                 CLAMP(0));
1697
1698    /* 19 srcY.y DOT4 - mask */
1699    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1700                             SRC0_REL(ABSOLUTE),
1701                             SRC0_ELEM(ELEM_Y),
1702                             SRC0_NEG(0),
1703                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1704                             SRC1_REL(ABSOLUTE),
1705                             SRC1_ELEM(ELEM_Y),
1706                             SRC1_NEG(0),
1707                             INDEX_MODE(SQ_INDEX_LOOP),
1708                             PRED_SEL(SQ_PRED_SEL_OFF),
1709                             LAST(0));
1710    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1711                                 SRC1_ABS(0),
1712                                 UPDATE_EXECUTE_MASK(0),
1713                                 UPDATE_PRED(0),
1714                                 WRITE_MASK(1),
1715                                 OMOD(SQ_ALU_OMOD_OFF),
1716                                 ALU_INST(SQ_OP2_INST_DOT4),
1717                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1718                                 DST_GPR(3),
1719                                 DST_REL(ABSOLUTE),
1720                                 DST_ELEM(ELEM_Y),
1721                                 CLAMP(0));
1722
1723    /* 20 srcY.z DOT4 - mask */
1724    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1725                             SRC0_REL(ABSOLUTE),
1726                             SRC0_ELEM(ELEM_Z),
1727                             SRC0_NEG(0),
1728                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1729                             SRC1_REL(ABSOLUTE),
1730                             SRC1_ELEM(ELEM_Z),
1731                             SRC1_NEG(0),
1732                             INDEX_MODE(SQ_INDEX_LOOP),
1733                             PRED_SEL(SQ_PRED_SEL_OFF),
1734                             LAST(0));
1735    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1736                                 SRC1_ABS(0),
1737                                 UPDATE_EXECUTE_MASK(0),
1738                                 UPDATE_PRED(0),
1739                                 WRITE_MASK(0),
1740                                 OMOD(SQ_ALU_OMOD_OFF),
1741                                 ALU_INST(SQ_OP2_INST_DOT4),
1742                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1743                                 DST_GPR(3),
1744                                 DST_REL(ABSOLUTE),
1745                                 DST_ELEM(ELEM_Z),
1746                                 CLAMP(0));
1747
1748    /* 21 srcY.w DOT4 - mask */
1749    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1750                             SRC0_REL(ABSOLUTE),
1751                             SRC0_ELEM(ELEM_W),
1752                             SRC0_NEG(0),
1753                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1754                             SRC1_REL(ABSOLUTE),
1755                             SRC1_ELEM(ELEM_W),
1756                             SRC1_NEG(0),
1757                             INDEX_MODE(SQ_INDEX_LOOP),
1758                             PRED_SEL(SQ_PRED_SEL_OFF),
1759                             LAST(1));
1760    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1761                                 SRC1_ABS(0),
1762                                 UPDATE_EXECUTE_MASK(0),
1763                                 UPDATE_PRED(0),
1764                                 WRITE_MASK(0),
1765                                 OMOD(SQ_ALU_OMOD_OFF),
1766                                 ALU_INST(SQ_OP2_INST_DOT4),
1767                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1768                                 DST_GPR(3),
1769                                 DST_REL(ABSOLUTE),
1770                                 DST_ELEM(ELEM_W),
1771                                 CLAMP(0));
1772
1773    /* 22 maskX.x DOT4 - mask */
1774    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1775                             SRC0_REL(ABSOLUTE),
1776                             SRC0_ELEM(ELEM_X),
1777                             SRC0_NEG(0),
1778                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1779                             SRC1_REL(ABSOLUTE),
1780                             SRC1_ELEM(ELEM_X),
1781                             SRC1_NEG(0),
1782                             INDEX_MODE(SQ_INDEX_LOOP),
1783                             PRED_SEL(SQ_PRED_SEL_OFF),
1784                             LAST(0));
1785    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1786                                 SRC1_ABS(0),
1787                                 UPDATE_EXECUTE_MASK(0),
1788                                 UPDATE_PRED(0),
1789                                 WRITE_MASK(1),
1790                                 OMOD(SQ_ALU_OMOD_OFF),
1791                                 ALU_INST(SQ_OP2_INST_DOT4),
1792                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1793                                 DST_GPR(4),
1794                                 DST_REL(ABSOLUTE),
1795                                 DST_ELEM(ELEM_X),
1796                                 CLAMP(0));
1797
1798    /* 23 maskX.y DOT4 - mask */
1799    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1800                             SRC0_REL(ABSOLUTE),
1801                             SRC0_ELEM(ELEM_Y),
1802                             SRC0_NEG(0),
1803                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1804                             SRC1_REL(ABSOLUTE),
1805                             SRC1_ELEM(ELEM_Y),
1806                             SRC1_NEG(0),
1807                             INDEX_MODE(SQ_INDEX_LOOP),
1808                             PRED_SEL(SQ_PRED_SEL_OFF),
1809                             LAST(0));
1810    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1811                                 SRC1_ABS(0),
1812                                 UPDATE_EXECUTE_MASK(0),
1813                                 UPDATE_PRED(0),
1814                                 WRITE_MASK(0),
1815                                 OMOD(SQ_ALU_OMOD_OFF),
1816                                 ALU_INST(SQ_OP2_INST_DOT4),
1817                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1818                                 DST_GPR(4),
1819                                 DST_REL(ABSOLUTE),
1820                                 DST_ELEM(ELEM_Y),
1821                                 CLAMP(0));
1822
1823    /* 24 maskX.z DOT4 - mask */
1824    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1825                             SRC0_REL(ABSOLUTE),
1826                             SRC0_ELEM(ELEM_Z),
1827                             SRC0_NEG(0),
1828                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1829                             SRC1_REL(ABSOLUTE),
1830                             SRC1_ELEM(ELEM_Z),
1831                             SRC1_NEG(0),
1832                             INDEX_MODE(SQ_INDEX_LOOP),
1833                             PRED_SEL(SQ_PRED_SEL_OFF),
1834                             LAST(0));
1835    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1836                                 SRC1_ABS(0),
1837                                 UPDATE_EXECUTE_MASK(0),
1838                                 UPDATE_PRED(0),
1839                                 WRITE_MASK(0),
1840                                 OMOD(SQ_ALU_OMOD_OFF),
1841                                 ALU_INST(SQ_OP2_INST_DOT4),
1842                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1843                                 DST_GPR(4),
1844                                 DST_REL(ABSOLUTE),
1845                                 DST_ELEM(ELEM_Z),
1846                                 CLAMP(0));
1847
1848    /* 25 maskX.w DOT4 - mask */
1849    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1850                             SRC0_REL(ABSOLUTE),
1851                             SRC0_ELEM(ELEM_W),
1852                             SRC0_NEG(0),
1853                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1854                             SRC1_REL(ABSOLUTE),
1855                             SRC1_ELEM(ELEM_W),
1856                             SRC1_NEG(0),
1857                             INDEX_MODE(SQ_INDEX_LOOP),
1858                             PRED_SEL(SQ_PRED_SEL_OFF),
1859                             LAST(1));
1860    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1861                                 SRC1_ABS(0),
1862                                 UPDATE_EXECUTE_MASK(0),
1863                                 UPDATE_PRED(0),
1864                                 WRITE_MASK(0),
1865                                 OMOD(SQ_ALU_OMOD_OFF),
1866                                 ALU_INST(SQ_OP2_INST_DOT4),
1867                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1868                                 DST_GPR(4),
1869                                 DST_REL(ABSOLUTE),
1870                                 DST_ELEM(ELEM_W),
1871                                 CLAMP(0));
1872
1873    /* 26 maskY.x DOT4 - mask */
1874    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1875                             SRC0_REL(ABSOLUTE),
1876                             SRC0_ELEM(ELEM_X),
1877                             SRC0_NEG(0),
1878                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1879                             SRC1_REL(ABSOLUTE),
1880                             SRC1_ELEM(ELEM_X),
1881                             SRC1_NEG(0),
1882                             INDEX_MODE(SQ_INDEX_LOOP),
1883                             PRED_SEL(SQ_PRED_SEL_OFF),
1884                             LAST(0));
1885    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1886                                 SRC1_ABS(0),
1887                                 UPDATE_EXECUTE_MASK(0),
1888                                 UPDATE_PRED(0),
1889                                 WRITE_MASK(0),
1890                                 OMOD(SQ_ALU_OMOD_OFF),
1891                                 ALU_INST(SQ_OP2_INST_DOT4),
1892                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1893                                 DST_GPR(4),
1894                                 DST_REL(ABSOLUTE),
1895                                 DST_ELEM(ELEM_X),
1896                                 CLAMP(0));
1897
1898    /* 27 maskY.y DOT4 - mask */
1899    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1900                             SRC0_REL(ABSOLUTE),
1901                             SRC0_ELEM(ELEM_Y),
1902                             SRC0_NEG(0),
1903                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1904                             SRC1_REL(ABSOLUTE),
1905                             SRC1_ELEM(ELEM_Y),
1906                             SRC1_NEG(0),
1907                             INDEX_MODE(SQ_INDEX_LOOP),
1908                             PRED_SEL(SQ_PRED_SEL_OFF),
1909                             LAST(0));
1910    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1911                                 SRC1_ABS(0),
1912                                 UPDATE_EXECUTE_MASK(0),
1913                                 UPDATE_PRED(0),
1914                                 WRITE_MASK(1),
1915                                 OMOD(SQ_ALU_OMOD_OFF),
1916                                 ALU_INST(SQ_OP2_INST_DOT4),
1917                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1918                                 DST_GPR(4),
1919                                 DST_REL(ABSOLUTE),
1920                                 DST_ELEM(ELEM_Y),
1921                                 CLAMP(0));
1922
1923    /* 28 maskY.z DOT4 - mask */
1924    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1925                             SRC0_REL(ABSOLUTE),
1926                             SRC0_ELEM(ELEM_Z),
1927                             SRC0_NEG(0),
1928                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1929                             SRC1_REL(ABSOLUTE),
1930                             SRC1_ELEM(ELEM_Z),
1931                             SRC1_NEG(0),
1932                             INDEX_MODE(SQ_INDEX_LOOP),
1933                             PRED_SEL(SQ_PRED_SEL_OFF),
1934                             LAST(0));
1935    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1936                                 SRC1_ABS(0),
1937                                 UPDATE_EXECUTE_MASK(0),
1938                                 UPDATE_PRED(0),
1939                                 WRITE_MASK(0),
1940                                 OMOD(SQ_ALU_OMOD_OFF),
1941                                 ALU_INST(SQ_OP2_INST_DOT4),
1942                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1943                                 DST_GPR(4),
1944                                 DST_REL(ABSOLUTE),
1945                                 DST_ELEM(ELEM_Z),
1946                                 CLAMP(0));
1947
1948    /* 29 maskY.w DOT4 - mask */
1949    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1950                             SRC0_REL(ABSOLUTE),
1951                             SRC0_ELEM(ELEM_W),
1952                             SRC0_NEG(0),
1953                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1954                             SRC1_REL(ABSOLUTE),
1955                             SRC1_ELEM(ELEM_W),
1956                             SRC1_NEG(0),
1957                             INDEX_MODE(SQ_INDEX_LOOP),
1958                             PRED_SEL(SQ_PRED_SEL_OFF),
1959                             LAST(1));
1960    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1961                                 SRC1_ABS(0),
1962                                 UPDATE_EXECUTE_MASK(0),
1963                                 UPDATE_PRED(0),
1964                                 WRITE_MASK(0),
1965                                 OMOD(SQ_ALU_OMOD_OFF),
1966                                 ALU_INST(SQ_OP2_INST_DOT4),
1967                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1968                                 DST_GPR(4),
1969                                 DST_REL(ABSOLUTE),
1970                                 DST_ELEM(ELEM_W),
1971                                 CLAMP(0));
1972
1973    /* 30 srcX / w */
1974    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1975                             SRC0_REL(ABSOLUTE),
1976                             SRC0_ELEM(ELEM_X),
1977                             SRC0_NEG(0),
1978                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1979                             SRC1_REL(ABSOLUTE),
1980                             SRC1_ELEM(ELEM_W),
1981                             SRC1_NEG(0),
1982                             INDEX_MODE(SQ_INDEX_AR_X),
1983                             PRED_SEL(SQ_PRED_SEL_OFF),
1984                             LAST(1));
1985    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1986                                 SRC1_ABS(0),
1987                                 UPDATE_EXECUTE_MASK(0),
1988                                 UPDATE_PRED(0),
1989                                 WRITE_MASK(1),
1990                                 OMOD(SQ_ALU_OMOD_OFF),
1991                                 ALU_INST(SQ_OP2_INST_MUL),
1992                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1993                                 DST_GPR(1),
1994                                 DST_REL(ABSOLUTE),
1995                                 DST_ELEM(ELEM_X),
1996                                 CLAMP(0));
1997
1998    /* 31 srcY / h */
1999    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
2000                             SRC0_REL(ABSOLUTE),
2001                             SRC0_ELEM(ELEM_Y),
2002                             SRC0_NEG(0),
2003                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2004                             SRC1_REL(ABSOLUTE),
2005                             SRC1_ELEM(ELEM_W),
2006                             SRC1_NEG(0),
2007                             INDEX_MODE(SQ_INDEX_AR_X),
2008                             PRED_SEL(SQ_PRED_SEL_OFF),
2009                             LAST(1));
2010    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2011                                 SRC1_ABS(0),
2012                                 UPDATE_EXECUTE_MASK(0),
2013                                 UPDATE_PRED(0),
2014                                 WRITE_MASK(1),
2015                                 OMOD(SQ_ALU_OMOD_OFF),
2016                                 ALU_INST(SQ_OP2_INST_MUL),
2017                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2018                                 DST_GPR(1),
2019                                 DST_REL(ABSOLUTE),
2020                                 DST_ELEM(ELEM_Y),
2021                                 CLAMP(0));
2022
2023    /* 32 maskX / w */
2024    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2025                             SRC0_REL(ABSOLUTE),
2026                             SRC0_ELEM(ELEM_X),
2027                             SRC0_NEG(0),
2028                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
2029                             SRC1_REL(ABSOLUTE),
2030                             SRC1_ELEM(ELEM_W),
2031                             SRC1_NEG(0),
2032                             INDEX_MODE(SQ_INDEX_AR_X),
2033                             PRED_SEL(SQ_PRED_SEL_OFF),
2034                             LAST(1));
2035    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2036                                 SRC1_ABS(0),
2037                                 UPDATE_EXECUTE_MASK(0),
2038                                 UPDATE_PRED(0),
2039                                 WRITE_MASK(1),
2040                                 OMOD(SQ_ALU_OMOD_OFF),
2041                                 ALU_INST(SQ_OP2_INST_MUL),
2042                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2043                                 DST_GPR(0),
2044                                 DST_REL(ABSOLUTE),
2045                                 DST_ELEM(ELEM_X),
2046                                 CLAMP(0));
2047
2048    /* 33 maskY / h */
2049    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2050                             SRC0_REL(ABSOLUTE),
2051                             SRC0_ELEM(ELEM_Y),
2052                             SRC0_NEG(0),
2053                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
2054                             SRC1_REL(ABSOLUTE),
2055                             SRC1_ELEM(ELEM_W),
2056                             SRC1_NEG(0),
2057                             INDEX_MODE(SQ_INDEX_AR_X),
2058                             PRED_SEL(SQ_PRED_SEL_OFF),
2059                             LAST(1));
2060    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2061                                 SRC1_ABS(0),
2062                                 UPDATE_EXECUTE_MASK(0),
2063                                 UPDATE_PRED(0),
2064                                 WRITE_MASK(1),
2065                                 OMOD(SQ_ALU_OMOD_OFF),
2066                                 ALU_INST(SQ_OP2_INST_MUL),
2067                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2068                                 DST_GPR(0),
2069                                 DST_REL(ABSOLUTE),
2070                                 DST_ELEM(ELEM_Y),
2071                                 CLAMP(0));
2072
2073    /* 34 srcX.x DOT4 - non-mask */
2074    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2075                             SRC0_REL(ABSOLUTE),
2076                             SRC0_ELEM(ELEM_X),
2077                             SRC0_NEG(0),
2078                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2079                             SRC1_REL(ABSOLUTE),
2080                             SRC1_ELEM(ELEM_X),
2081                             SRC1_NEG(0),
2082                             INDEX_MODE(SQ_INDEX_LOOP),
2083                             PRED_SEL(SQ_PRED_SEL_OFF),
2084                             LAST(0));
2085    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2086                                 SRC1_ABS(0),
2087                                 UPDATE_EXECUTE_MASK(0),
2088                                 UPDATE_PRED(0),
2089                                 WRITE_MASK(1),
2090                                 OMOD(SQ_ALU_OMOD_OFF),
2091                                 ALU_INST(SQ_OP2_INST_DOT4),
2092                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2093                                 DST_GPR(2),
2094                                 DST_REL(ABSOLUTE),
2095                                 DST_ELEM(ELEM_X),
2096                                 CLAMP(0));
2097
2098    /* 35 srcX.y DOT4 - non-mask */
2099    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2100                             SRC0_REL(ABSOLUTE),
2101                             SRC0_ELEM(ELEM_Y),
2102                             SRC0_NEG(0),
2103                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2104                             SRC1_REL(ABSOLUTE),
2105                             SRC1_ELEM(ELEM_Y),
2106                             SRC1_NEG(0),
2107                             INDEX_MODE(SQ_INDEX_LOOP),
2108                             PRED_SEL(SQ_PRED_SEL_OFF),
2109                             LAST(0));
2110    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2111                                 SRC1_ABS(0),
2112                                 UPDATE_EXECUTE_MASK(0),
2113                                 UPDATE_PRED(0),
2114                                 WRITE_MASK(0),
2115                                 OMOD(SQ_ALU_OMOD_OFF),
2116                                 ALU_INST(SQ_OP2_INST_DOT4),
2117                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2118                                 DST_GPR(2),
2119                                 DST_REL(ABSOLUTE),
2120                                 DST_ELEM(ELEM_Y),
2121                                 CLAMP(0));
2122
2123    /* 36 srcX.z DOT4 - non-mask */
2124    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2125                             SRC0_REL(ABSOLUTE),
2126                             SRC0_ELEM(ELEM_Z),
2127                             SRC0_NEG(0),
2128                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2129                             SRC1_REL(ABSOLUTE),
2130                             SRC1_ELEM(ELEM_Z),
2131                             SRC1_NEG(0),
2132                             INDEX_MODE(SQ_INDEX_LOOP),
2133                             PRED_SEL(SQ_PRED_SEL_OFF),
2134                             LAST(0));
2135    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2136                                 SRC1_ABS(0),
2137                                 UPDATE_EXECUTE_MASK(0),
2138                                 UPDATE_PRED(0),
2139                                 WRITE_MASK(0),
2140                                 OMOD(SQ_ALU_OMOD_OFF),
2141                                 ALU_INST(SQ_OP2_INST_DOT4),
2142                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2143                                 DST_GPR(2),
2144                                 DST_REL(ABSOLUTE),
2145                                 DST_ELEM(ELEM_Z),
2146                                 CLAMP(0));
2147
2148    /* 37 srcX.w DOT4 - non-mask */
2149    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2150                             SRC0_REL(ABSOLUTE),
2151                             SRC0_ELEM(ELEM_W),
2152                             SRC0_NEG(0),
2153                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2154                             SRC1_REL(ABSOLUTE),
2155                             SRC1_ELEM(ELEM_W),
2156                             SRC1_NEG(0),
2157                             INDEX_MODE(SQ_INDEX_LOOP),
2158                             PRED_SEL(SQ_PRED_SEL_OFF),
2159                             LAST(1));
2160    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2161                                 SRC1_ABS(0),
2162                                 UPDATE_EXECUTE_MASK(0),
2163                                 UPDATE_PRED(0),
2164                                 WRITE_MASK(0),
2165                                 OMOD(SQ_ALU_OMOD_OFF),
2166                                 ALU_INST(SQ_OP2_INST_DOT4),
2167                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2168                                 DST_GPR(2),
2169                                 DST_REL(ABSOLUTE),
2170                                 DST_ELEM(ELEM_W),
2171                                 CLAMP(0));
2172
2173    /* 38 srcY.x DOT4 - non-mask */
2174    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2175                             SRC0_REL(ABSOLUTE),
2176                             SRC0_ELEM(ELEM_X),
2177                             SRC0_NEG(0),
2178                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2179                             SRC1_REL(ABSOLUTE),
2180                             SRC1_ELEM(ELEM_X),
2181                             SRC1_NEG(0),
2182                             INDEX_MODE(SQ_INDEX_LOOP),
2183                             PRED_SEL(SQ_PRED_SEL_OFF),
2184                             LAST(0));
2185    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2186                                 SRC1_ABS(0),
2187                                 UPDATE_EXECUTE_MASK(0),
2188                                 UPDATE_PRED(0),
2189                                 WRITE_MASK(0),
2190                                 OMOD(SQ_ALU_OMOD_OFF),
2191                                 ALU_INST(SQ_OP2_INST_DOT4),
2192                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2193                                 DST_GPR(2),
2194                                 DST_REL(ABSOLUTE),
2195                                 DST_ELEM(ELEM_X),
2196                                 CLAMP(0));
2197
2198    /* 39 srcY.y DOT4 - non-mask */
2199    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2200                             SRC0_REL(ABSOLUTE),
2201                             SRC0_ELEM(ELEM_Y),
2202                             SRC0_NEG(0),
2203                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2204                             SRC1_REL(ABSOLUTE),
2205                             SRC1_ELEM(ELEM_Y),
2206                             SRC1_NEG(0),
2207                             INDEX_MODE(SQ_INDEX_LOOP),
2208                             PRED_SEL(SQ_PRED_SEL_OFF),
2209                             LAST(0));
2210    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2211                                 SRC1_ABS(0),
2212                                 UPDATE_EXECUTE_MASK(0),
2213                                 UPDATE_PRED(0),
2214                                 WRITE_MASK(1),
2215                                 OMOD(SQ_ALU_OMOD_OFF),
2216                                 ALU_INST(SQ_OP2_INST_DOT4),
2217                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2218                                 DST_GPR(2),
2219                                 DST_REL(ABSOLUTE),
2220                                 DST_ELEM(ELEM_Y),
2221                                 CLAMP(0));
2222
2223    /* 40 srcY.z DOT4 - non-mask */
2224    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2225                             SRC0_REL(ABSOLUTE),
2226                             SRC0_ELEM(ELEM_Z),
2227                             SRC0_NEG(0),
2228                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2229                             SRC1_REL(ABSOLUTE),
2230                             SRC1_ELEM(ELEM_Z),
2231                             SRC1_NEG(0),
2232                             INDEX_MODE(SQ_INDEX_LOOP),
2233                             PRED_SEL(SQ_PRED_SEL_OFF),
2234                             LAST(0));
2235    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2236                                 SRC1_ABS(0),
2237                                 UPDATE_EXECUTE_MASK(0),
2238                                 UPDATE_PRED(0),
2239                                 WRITE_MASK(0),
2240                                 OMOD(SQ_ALU_OMOD_OFF),
2241                                 ALU_INST(SQ_OP2_INST_DOT4),
2242                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2243                                 DST_GPR(2),
2244                                 DST_REL(ABSOLUTE),
2245                                 DST_ELEM(ELEM_Z),
2246                                 CLAMP(0));
2247
2248    /* 41 srcY.w DOT4 - non-mask */
2249    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2250                             SRC0_REL(ABSOLUTE),
2251                             SRC0_ELEM(ELEM_W),
2252                             SRC0_NEG(0),
2253                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2254                             SRC1_REL(ABSOLUTE),
2255                             SRC1_ELEM(ELEM_W),
2256                             SRC1_NEG(0),
2257                             INDEX_MODE(SQ_INDEX_LOOP),
2258                             PRED_SEL(SQ_PRED_SEL_OFF),
2259                             LAST(1));
2260    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2261                                 SRC1_ABS(0),
2262                                 UPDATE_EXECUTE_MASK(0),
2263                                 UPDATE_PRED(0),
2264                                 WRITE_MASK(0),
2265                                 OMOD(SQ_ALU_OMOD_OFF),
2266                                 ALU_INST(SQ_OP2_INST_DOT4),
2267                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2268                                 DST_GPR(2),
2269                                 DST_REL(ABSOLUTE),
2270                                 DST_ELEM(ELEM_W),
2271                                 CLAMP(0));
2272
2273    /* 42 srcX / w */
2274    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2275                             SRC0_REL(ABSOLUTE),
2276                             SRC0_ELEM(ELEM_X),
2277                             SRC0_NEG(0),
2278                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2279                             SRC1_REL(ABSOLUTE),
2280                             SRC1_ELEM(ELEM_W),
2281                             SRC1_NEG(0),
2282                             INDEX_MODE(SQ_INDEX_AR_X),
2283                             PRED_SEL(SQ_PRED_SEL_OFF),
2284                             LAST(1));
2285    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2286                                 SRC1_ABS(0),
2287                                 UPDATE_EXECUTE_MASK(0),
2288                                 UPDATE_PRED(0),
2289                                 WRITE_MASK(1),
2290                                 OMOD(SQ_ALU_OMOD_OFF),
2291                                 ALU_INST(SQ_OP2_INST_MUL),
2292                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2293                                 DST_GPR(0),
2294                                 DST_REL(ABSOLUTE),
2295                                 DST_ELEM(ELEM_X),
2296                                 CLAMP(0));
2297
2298    /* 43 srcY / h */
2299    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2300                             SRC0_REL(ABSOLUTE),
2301                             SRC0_ELEM(ELEM_Y),
2302                             SRC0_NEG(0),
2303                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2304                             SRC1_REL(ABSOLUTE),
2305                             SRC1_ELEM(ELEM_W),
2306                             SRC1_NEG(0),
2307                             INDEX_MODE(SQ_INDEX_AR_X),
2308                             PRED_SEL(SQ_PRED_SEL_OFF),
2309                             LAST(1));
2310    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2311                                 SRC1_ABS(0),
2312                                 UPDATE_EXECUTE_MASK(0),
2313                                 UPDATE_PRED(0),
2314                                 WRITE_MASK(1),
2315                                 OMOD(SQ_ALU_OMOD_OFF),
2316                                 ALU_INST(SQ_OP2_INST_MUL),
2317                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2318                                 DST_GPR(0),
2319                                 DST_REL(ABSOLUTE),
2320                                 DST_ELEM(ELEM_Y),
2321                                 CLAMP(0));
2322    /* mask vfetch - 44/45 - dst */
2323    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2324			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2325			     FETCH_WHOLE_QUAD(0),
2326			     BUFFER_ID(0),
2327			     SRC_GPR(0),
2328			     SRC_REL(ABSOLUTE),
2329			     SRC_SEL_X(SQ_SEL_X),
2330			     SRC_SEL_Y(SQ_SEL_Y),
2331			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
2332			     LDS_REQ(0),
2333			     COALESCED_READ(0));
2334    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2335				 DST_REL(0),
2336				 DST_SEL_X(SQ_SEL_X),
2337				 DST_SEL_Y(SQ_SEL_Y),
2338				 DST_SEL_Z(SQ_SEL_0),
2339				 DST_SEL_W(SQ_SEL_1),
2340				 USE_CONST_FIELDS(0),
2341				 DATA_FORMAT(FMT_32_32_FLOAT),
2342				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2343				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2344				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2345    shader[i++] = VTX_DWORD2(OFFSET(0),
2346#if X_BYTE_ORDER == X_BIG_ENDIAN
2347                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2348#else
2349			     ENDIAN_SWAP(ENDIAN_NONE),
2350#endif
2351			     CONST_BUF_NO_STRIDE(0),
2352			     ALT_CONST(0),
2353			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2354    shader[i++] = VTX_DWORD_PAD;
2355    /* 46/47 - src */
2356    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2357			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2358			     FETCH_WHOLE_QUAD(0),
2359			     BUFFER_ID(0),
2360			     SRC_GPR(0),
2361			     SRC_REL(ABSOLUTE),
2362			     SRC_SEL_X(SQ_SEL_X),
2363			     SRC_SEL_Y(SQ_SEL_Y),
2364			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
2365			     LDS_REQ(0),
2366			     COALESCED_READ(0));
2367    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2368				 DST_REL(0),
2369				 DST_SEL_X(SQ_SEL_X),
2370				 DST_SEL_Y(SQ_SEL_Y),
2371				 DST_SEL_Z(SQ_SEL_1),
2372				 DST_SEL_W(SQ_SEL_0),
2373				 USE_CONST_FIELDS(0),
2374				 DATA_FORMAT(FMT_32_32_FLOAT),
2375				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2376				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2377				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2378    shader[i++] = VTX_DWORD2(OFFSET(8),
2379#if X_BYTE_ORDER == X_BIG_ENDIAN
2380                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2381#else
2382			     ENDIAN_SWAP(ENDIAN_NONE),
2383#endif
2384			     CONST_BUF_NO_STRIDE(0),
2385			     ALT_CONST(0),
2386			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2387    shader[i++] = VTX_DWORD_PAD;
2388    /* 48/49 - mask */
2389    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2390			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2391			     FETCH_WHOLE_QUAD(0),
2392			     BUFFER_ID(0),
2393			     SRC_GPR(0),
2394			     SRC_REL(ABSOLUTE),
2395			     SRC_SEL_X(SQ_SEL_X),
2396			     SRC_SEL_Y(SQ_SEL_Y),
2397			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
2398			     LDS_REQ(0),
2399			     COALESCED_READ(0));
2400    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2401				 DST_REL(0),
2402				 DST_SEL_X(SQ_SEL_X),
2403				 DST_SEL_Y(SQ_SEL_Y),
2404				 DST_SEL_Z(SQ_SEL_1),
2405				 DST_SEL_W(SQ_SEL_0),
2406				 USE_CONST_FIELDS(0),
2407				 DATA_FORMAT(FMT_32_32_FLOAT),
2408				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2409				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2410				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2411    shader[i++] = VTX_DWORD2(OFFSET(16),
2412#if X_BYTE_ORDER == X_BIG_ENDIAN
2413                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2414#else
2415			     ENDIAN_SWAP(ENDIAN_NONE),
2416#endif
2417			     CONST_BUF_NO_STRIDE(0),
2418			     ALT_CONST(0),
2419			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2420    shader[i++] = VTX_DWORD_PAD;
2421
2422    /* no mask vfetch - 50/51 - dst */
2423    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2424			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2425			     FETCH_WHOLE_QUAD(0),
2426			     BUFFER_ID(0),
2427			     SRC_GPR(0),
2428			     SRC_REL(ABSOLUTE),
2429			     SRC_SEL_X(SQ_SEL_X),
2430			     SRC_SEL_Y(SQ_SEL_Y),
2431			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
2432			     LDS_REQ(0),
2433			     COALESCED_READ(0));
2434    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2435				 DST_REL(0),
2436				 DST_SEL_X(SQ_SEL_X),
2437				 DST_SEL_Y(SQ_SEL_Y),
2438				 DST_SEL_Z(SQ_SEL_0),
2439				 DST_SEL_W(SQ_SEL_1),
2440				 USE_CONST_FIELDS(0),
2441				 DATA_FORMAT(FMT_32_32_FLOAT),
2442				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2443				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2444				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2445    shader[i++] = VTX_DWORD2(OFFSET(0),
2446#if X_BYTE_ORDER == X_BIG_ENDIAN
2447                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2448#else
2449			     ENDIAN_SWAP(ENDIAN_NONE),
2450#endif
2451			     CONST_BUF_NO_STRIDE(0),
2452			     ALT_CONST(0),
2453			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2454    shader[i++] = VTX_DWORD_PAD;
2455    /* 52/53 - src */
2456    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2457			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2458			     FETCH_WHOLE_QUAD(0),
2459			     BUFFER_ID(0),
2460			     SRC_GPR(0),
2461			     SRC_REL(ABSOLUTE),
2462			     SRC_SEL_X(SQ_SEL_X),
2463			     SRC_SEL_Y(SQ_SEL_Y),
2464			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
2465			     LDS_REQ(0),
2466			     COALESCED_READ(0));
2467    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2468				 DST_REL(0),
2469				 DST_SEL_X(SQ_SEL_X),
2470				 DST_SEL_Y(SQ_SEL_Y),
2471				 DST_SEL_Z(SQ_SEL_1),
2472				 DST_SEL_W(SQ_SEL_0),
2473				 USE_CONST_FIELDS(0),
2474				 DATA_FORMAT(FMT_32_32_FLOAT),
2475				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2476				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2477				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2478    shader[i++] = VTX_DWORD2(OFFSET(8),
2479#if X_BYTE_ORDER == X_BIG_ENDIAN
2480                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2481#else
2482			     ENDIAN_SWAP(ENDIAN_NONE),
2483#endif
2484			     CONST_BUF_NO_STRIDE(0),
2485                             ALT_CONST(0),
2486                             BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2487    shader[i++] = VTX_DWORD_PAD;
2488
2489    return i;
2490}
2491
2492/* comp ps --------------------------------------- */
2493int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2494{
2495    int i = 0;
2496
2497    /* 0 */
2498    /* call interp-fetch-mask if boolean1 == true */
2499    shader[i++] = CF_DWORD0(ADDR(12),
2500			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2501    shader[i++] = CF_DWORD1(POP_COUNT(0),
2502                            CF_CONST(1),
2503                            COND(SQ_CF_COND_BOOL),
2504                            I_COUNT(0),
2505                            VALID_PIXEL_MODE(0),
2506                            CF_INST(SQ_CF_INST_CALL),
2507                            BARRIER(0));
2508
2509    /* 1 */
2510    /* call read-constant-mask if boolean1 == false */
2511    shader[i++] = CF_DWORD0(ADDR(15),
2512			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2513    shader[i++] = CF_DWORD1(POP_COUNT(0),
2514                            CF_CONST(1),
2515                            COND(SQ_CF_COND_NOT_BOOL),
2516                            I_COUNT(0),
2517                            VALID_PIXEL_MODE(0),
2518                            CF_INST(SQ_CF_INST_CALL),
2519                            BARRIER(0));
2520
2521    /* 2 */
2522    /* call interp-fetch-src if boolean0 == true */
2523    shader[i++] = CF_DWORD0(ADDR(7),
2524			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2525    shader[i++] = CF_DWORD1(POP_COUNT(0),
2526                            CF_CONST(0),
2527                            COND(SQ_CF_COND_BOOL),
2528                            I_COUNT(0),
2529                            VALID_PIXEL_MODE(0),
2530                            CF_INST(SQ_CF_INST_CALL),
2531                            BARRIER(0));
2532
2533    /* 3 */
2534    /* call read-constant-src if boolean0 == false */
2535    shader[i++] = CF_DWORD0(ADDR(10),
2536			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2537    shader[i++] = CF_DWORD1(POP_COUNT(0),
2538                            CF_CONST(0),
2539                            COND(SQ_CF_COND_NOT_BOOL),
2540                            I_COUNT(0),
2541                            VALID_PIXEL_MODE(0),
2542                            CF_INST(SQ_CF_INST_CALL),
2543                            BARRIER(0));
2544    /* 4 */
2545    /* src IN mask (GPR2 := GPR1 .* GPR0) */
2546    shader[i++] = CF_ALU_DWORD0(ADDR(17),
2547				KCACHE_BANK0(0),
2548				KCACHE_BANK1(0),
2549				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2550    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2551				KCACHE_ADDR0(0),
2552				KCACHE_ADDR1(0),
2553				I_COUNT(4),
2554				ALT_CONST(0),
2555				CF_INST(SQ_CF_INST_ALU),
2556				WHOLE_QUAD_MODE(0),
2557				BARRIER(1));
2558
2559    /* 5 */
2560    /* export pixel data */
2561    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2562					  TYPE(SQ_EXPORT_PIXEL),
2563					  RW_GPR(0),
2564					  RW_REL(ABSOLUTE),
2565					  INDEX_GPR(0),
2566					  ELEM_SIZE(1));
2567    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2568					       SRC_SEL_Y(SQ_SEL_Y),
2569					       SRC_SEL_Z(SQ_SEL_Z),
2570					       SRC_SEL_W(SQ_SEL_W),
2571					       BURST_COUNT(1),
2572					       VALID_PIXEL_MODE(0),
2573					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2574					       MARK(0),
2575					       BARRIER(1));
2576
2577    /* 6 */
2578    /* end of program */
2579    shader[i++] = CF_DWORD0(ADDR(0),
2580			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2581    shader[i++] = CF_DWORD1(POP_COUNT(0),
2582			    CF_CONST(0),
2583			    COND(SQ_CF_COND_ACTIVE),
2584			    I_COUNT(0),
2585			    VALID_PIXEL_MODE(0),
2586			    CF_INST(SQ_CF_INST_END),
2587			    BARRIER(1));
2588
2589    /* subroutine interp-fetch-src */
2590
2591    /* 7 */
2592    /* interpolate src */
2593    shader[i++] = CF_ALU_DWORD0(ADDR(21),
2594				KCACHE_BANK0(0),
2595				KCACHE_BANK1(0),
2596				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2597    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2598				KCACHE_ADDR0(0),
2599				KCACHE_ADDR1(0),
2600				I_COUNT(4),
2601				ALT_CONST(0),
2602				CF_INST(SQ_CF_INST_ALU),
2603				WHOLE_QUAD_MODE(0),
2604				BARRIER(1));
2605
2606    /* 8 */
2607    /* texture fetch src into GPR0 */
2608    shader[i++] = CF_DWORD0(ADDR(26),
2609			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2610    shader[i++] = CF_DWORD1(POP_COUNT(0),
2611			    CF_CONST(0),
2612			    COND(SQ_CF_COND_ACTIVE),
2613			    I_COUNT(1),
2614			    VALID_PIXEL_MODE(0),
2615			    CF_INST(SQ_CF_INST_TC),
2616			    BARRIER(1));
2617
2618    /* 9 */
2619    /* return */
2620    shader[i++] = CF_DWORD0(ADDR(0),
2621			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2622    shader[i++] = CF_DWORD1(POP_COUNT(0),
2623			    CF_CONST(0),
2624			    COND(SQ_CF_COND_ACTIVE),
2625			    I_COUNT(0),
2626			    VALID_PIXEL_MODE(0),
2627			    CF_INST(SQ_CF_INST_RETURN),
2628			    BARRIER(0));
2629
2630    /* subroutine read-constant-src */
2631
2632    /* 10 */
2633    /* read constants into GPR0 */
2634    shader[i++] = CF_ALU_DWORD0(ADDR(28),
2635				KCACHE_BANK0(0),
2636				KCACHE_BANK1(0),
2637				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2638    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2639				KCACHE_ADDR0(0),
2640				KCACHE_ADDR1(0),
2641				I_COUNT(4),
2642				ALT_CONST(1),
2643				CF_INST(SQ_CF_INST_ALU),
2644				WHOLE_QUAD_MODE(0),
2645				BARRIER(1));
2646
2647    /* 11 */
2648    /* return */
2649    shader[i++] = CF_DWORD0(ADDR(0),
2650			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2651    shader[i++] = CF_DWORD1(POP_COUNT(0),
2652			    CF_CONST(0),
2653			    COND(SQ_CF_COND_ACTIVE),
2654			    I_COUNT(0),
2655			    VALID_PIXEL_MODE(0),
2656			    CF_INST(SQ_CF_INST_RETURN),
2657			    BARRIER(0));
2658
2659    /* subroutine interp-fetch-mask */
2660
2661    /* 12 */
2662    /* interpolate mask */
2663    shader[i++] = CF_ALU_DWORD0(ADDR(32),
2664				KCACHE_BANK0(0),
2665				KCACHE_BANK1(0),
2666				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2667    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2668				KCACHE_ADDR0(0),
2669				KCACHE_ADDR1(0),
2670				I_COUNT(4),
2671				ALT_CONST(0),
2672				CF_INST(SQ_CF_INST_ALU),
2673				WHOLE_QUAD_MODE(0),
2674				BARRIER(1));
2675
2676    /* 13 */
2677    /* texture fetch mask into GPR1 */
2678    shader[i++] = CF_DWORD0(ADDR(36),
2679			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2680    shader[i++] = CF_DWORD1(POP_COUNT(0),
2681			    CF_CONST(0),
2682			    COND(SQ_CF_COND_ACTIVE),
2683			    I_COUNT(1),
2684			    VALID_PIXEL_MODE(0),
2685			    CF_INST(SQ_CF_INST_TC),
2686			    BARRIER(1));
2687
2688    /* 14 */
2689    /* return */
2690    shader[i++] = CF_DWORD0(ADDR(0),
2691			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2692    shader[i++] = CF_DWORD1(POP_COUNT(0),
2693			    CF_CONST(0),
2694			    COND(SQ_CF_COND_ACTIVE),
2695			    I_COUNT(0),
2696			    VALID_PIXEL_MODE(0),
2697			    CF_INST(SQ_CF_INST_RETURN),
2698			    BARRIER(0));
2699
2700    /* subroutine read-constant-src */
2701
2702    /* 15 */
2703    /* read constants into GPR1 */
2704    shader[i++] = CF_ALU_DWORD0(ADDR(38),
2705				KCACHE_BANK0(0),
2706				KCACHE_BANK1(0),
2707				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2708    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2709				KCACHE_ADDR0(0),
2710				KCACHE_ADDR1(0),
2711				I_COUNT(4),
2712				ALT_CONST(1),
2713				CF_INST(SQ_CF_INST_ALU),
2714				WHOLE_QUAD_MODE(0),
2715				BARRIER(1));
2716
2717    /* 16 */
2718    /* return */
2719    shader[i++] = CF_DWORD0(ADDR(0),
2720			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2721    shader[i++] = CF_DWORD1(POP_COUNT(0),
2722			    CF_CONST(0),
2723			    COND(SQ_CF_COND_ACTIVE),
2724			    I_COUNT(0),
2725			    VALID_PIXEL_MODE(0),
2726			    CF_INST(SQ_CF_INST_RETURN),
2727			    BARRIER(0));
2728
2729    /* ALU clauses */
2730
2731    /* 17 */
2732    /* MUL gpr[0].x gpr[0].x gpr[1].x */
2733    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2734			     SRC0_REL(ABSOLUTE),
2735			     SRC0_ELEM(ELEM_X),
2736			     SRC0_NEG(0),
2737			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2738			     SRC1_REL(ABSOLUTE),
2739			     SRC1_ELEM(ELEM_X),
2740			     SRC1_NEG(0),
2741			     INDEX_MODE(SQ_INDEX_LOOP),
2742			     PRED_SEL(SQ_PRED_SEL_OFF),
2743			     LAST(0));
2744    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2745				 SRC1_ABS(0),
2746				 UPDATE_EXECUTE_MASK(0),
2747				 UPDATE_PRED(0),
2748				 WRITE_MASK(1),
2749				 OMOD(SQ_ALU_OMOD_OFF),
2750				 ALU_INST(SQ_OP2_INST_MUL),
2751				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2752				 DST_GPR(0),
2753				 DST_REL(ABSOLUTE),
2754				 DST_ELEM(ELEM_X),
2755				 CLAMP(1));
2756
2757    /* 18 */
2758    /* MUL gpr[0].y gpr[0].y gpr[1].y */
2759    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2760			     SRC0_REL(ABSOLUTE),
2761			     SRC0_ELEM(ELEM_Y),
2762			     SRC0_NEG(0),
2763			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2764			     SRC1_REL(ABSOLUTE),
2765			     SRC1_ELEM(ELEM_Y),
2766			     SRC1_NEG(0),
2767			     INDEX_MODE(SQ_INDEX_LOOP),
2768			     PRED_SEL(SQ_PRED_SEL_OFF),
2769			     LAST(0));
2770    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2771				 SRC1_ABS(0),
2772				 UPDATE_EXECUTE_MASK(0),
2773				 UPDATE_PRED(0),
2774				 WRITE_MASK(1),
2775				 OMOD(SQ_ALU_OMOD_OFF),
2776				 ALU_INST(SQ_OP2_INST_MUL),
2777				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2778				 DST_GPR(0),
2779				 DST_REL(ABSOLUTE),
2780				 DST_ELEM(ELEM_Y),
2781				 CLAMP(1));
2782    /* 19 */
2783    /* MUL gpr[0].z gpr[0].z gpr[1].z */
2784    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2785			     SRC0_REL(ABSOLUTE),
2786			     SRC0_ELEM(ELEM_Z),
2787			     SRC0_NEG(0),
2788			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2789			     SRC1_REL(ABSOLUTE),
2790			     SRC1_ELEM(ELEM_Z),
2791			     SRC1_NEG(0),
2792			     INDEX_MODE(SQ_INDEX_LOOP),
2793			     PRED_SEL(SQ_PRED_SEL_OFF),
2794			     LAST(0));
2795    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2796				 SRC1_ABS(0),
2797				 UPDATE_EXECUTE_MASK(0),
2798				 UPDATE_PRED(0),
2799				 WRITE_MASK(1),
2800				 OMOD(SQ_ALU_OMOD_OFF),
2801				 ALU_INST(SQ_OP2_INST_MUL),
2802				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2803				 DST_GPR(0),
2804				 DST_REL(ABSOLUTE),
2805				 DST_ELEM(ELEM_Z),
2806				 CLAMP(1));
2807    /* 20 */
2808    /* MUL gpr[0].w gpr[0].w gpr[1].w */
2809    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2810			     SRC0_REL(ABSOLUTE),
2811			     SRC0_ELEM(ELEM_W),
2812			     SRC0_NEG(0),
2813			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2814			     SRC1_REL(ABSOLUTE),
2815			     SRC1_ELEM(ELEM_W),
2816			     SRC1_NEG(0),
2817			     INDEX_MODE(SQ_INDEX_LOOP),
2818			     PRED_SEL(SQ_PRED_SEL_OFF),
2819			     LAST(1));
2820    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2821				 SRC1_ABS(0),
2822				 UPDATE_EXECUTE_MASK(0),
2823				 UPDATE_PRED(0),
2824				 WRITE_MASK(1),
2825				 OMOD(SQ_ALU_OMOD_OFF),
2826				 ALU_INST(SQ_OP2_INST_MUL),
2827				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2828				 DST_GPR(0),
2829				 DST_REL(ABSOLUTE),
2830				 DST_ELEM(ELEM_W),
2831				 CLAMP(1));
2832
2833    /* 21 */
2834    /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */
2835    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2836			     SRC0_REL(ABSOLUTE),
2837			     SRC0_ELEM(ELEM_Y),
2838			     SRC0_NEG(0),
2839			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2840			     SRC1_REL(ABSOLUTE),
2841			     SRC1_ELEM(ELEM_X),
2842			     SRC1_NEG(0),
2843			     INDEX_MODE(SQ_INDEX_AR_X),
2844			     PRED_SEL(SQ_PRED_SEL_OFF),
2845			     LAST(0));
2846    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2847				 SRC1_ABS(0),
2848				 UPDATE_EXECUTE_MASK(0),
2849				 UPDATE_PRED(0),
2850				 WRITE_MASK(1),
2851				 OMOD(SQ_ALU_OMOD_OFF),
2852				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2853				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2854				 DST_GPR(0),
2855				 DST_REL(ABSOLUTE),
2856				 DST_ELEM(ELEM_X),
2857				 CLAMP(0));
2858    /* 22 */
2859    /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */
2860    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2861			     SRC0_REL(ABSOLUTE),
2862			     SRC0_ELEM(ELEM_X),
2863			     SRC0_NEG(0),
2864			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2865			     SRC1_REL(ABSOLUTE),
2866			     SRC1_ELEM(ELEM_X),
2867			     SRC1_NEG(0),
2868			     INDEX_MODE(SQ_INDEX_AR_X),
2869			     PRED_SEL(SQ_PRED_SEL_OFF),
2870			     LAST(0));
2871    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2872				 SRC1_ABS(0),
2873				 UPDATE_EXECUTE_MASK(0),
2874				 UPDATE_PRED(0),
2875				 WRITE_MASK(1),
2876				 OMOD(SQ_ALU_OMOD_OFF),
2877				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2878				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2879				 DST_GPR(0),
2880				 DST_REL(ABSOLUTE),
2881				 DST_ELEM(ELEM_Y),
2882				 CLAMP(0));
2883    /* 23 */
2884    /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */
2885    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2886			     SRC0_REL(ABSOLUTE),
2887			     SRC0_ELEM(ELEM_Y),
2888			     SRC0_NEG(0),
2889			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2890			     SRC1_REL(ABSOLUTE),
2891			     SRC1_ELEM(ELEM_X),
2892			     SRC1_NEG(0),
2893			     INDEX_MODE(SQ_INDEX_AR_X),
2894			     PRED_SEL(SQ_PRED_SEL_OFF),
2895			     LAST(0));
2896    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2897				 SRC1_ABS(0),
2898				 UPDATE_EXECUTE_MASK(0),
2899				 UPDATE_PRED(0),
2900				 WRITE_MASK(0),
2901				 OMOD(SQ_ALU_OMOD_OFF),
2902				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2903				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2904				 DST_GPR(0),
2905				 DST_REL(ABSOLUTE),
2906				 DST_ELEM(ELEM_Z),
2907				 CLAMP(0));
2908
2909    /* 24 */
2910    /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */
2911    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2912			     SRC0_REL(ABSOLUTE),
2913			     SRC0_ELEM(ELEM_X),
2914			     SRC0_NEG(0),
2915			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2916			     SRC1_REL(ABSOLUTE),
2917			     SRC1_ELEM(ELEM_X),
2918			     SRC1_NEG(0),
2919			     INDEX_MODE(SQ_INDEX_AR_X),
2920			     PRED_SEL(SQ_PRED_SEL_OFF),
2921			     LAST(1));
2922    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2923				 SRC1_ABS(0),
2924				 UPDATE_EXECUTE_MASK(0),
2925				 UPDATE_PRED(0),
2926				 WRITE_MASK(0),
2927				 OMOD(SQ_ALU_OMOD_OFF),
2928				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2929				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2930				 DST_GPR(0),
2931				 DST_REL(ABSOLUTE),
2932				 DST_ELEM(ELEM_W),
2933				 CLAMP(0));
2934
2935    /* 25 */
2936    shader[i++] = 0;
2937    shader[i++] = 0;
2938
2939    /* 26/27 */
2940    /* SAMPLE RID=0 GPR0, GPR0 */
2941    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
2942			     INST_MOD(0),
2943			     FETCH_WHOLE_QUAD(0),
2944			     RESOURCE_ID(0),
2945			     SRC_GPR(0),
2946			     SRC_REL(ABSOLUTE),
2947			     ALT_CONST(0),
2948			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
2949			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
2950    shader[i++] = TEX_DWORD1(DST_GPR(0),
2951			     DST_REL(ABSOLUTE),
2952			     DST_SEL_X(SQ_SEL_X),
2953			     DST_SEL_Y(SQ_SEL_Y),
2954			     DST_SEL_Z(SQ_SEL_Z),
2955			     DST_SEL_W(SQ_SEL_W),
2956			     LOD_BIAS(0),
2957			     COORD_TYPE_X(TEX_NORMALIZED),
2958			     COORD_TYPE_Y(TEX_NORMALIZED),
2959			     COORD_TYPE_Z(TEX_NORMALIZED),
2960			     COORD_TYPE_W(TEX_NORMALIZED));
2961    shader[i++] = TEX_DWORD2(OFFSET_X(0),
2962			     OFFSET_Y(0),
2963			     OFFSET_Z(0),
2964			     SAMPLER_ID(0),
2965			     SRC_SEL_X(SQ_SEL_X),
2966			     SRC_SEL_Y(SQ_SEL_Y),
2967			     SRC_SEL_Z(SQ_SEL_0),
2968			     SRC_SEL_W(SQ_SEL_1));
2969    shader[i++] = TEX_DWORD_PAD;
2970
2971    /* 28 */
2972    /* MOV GPR0.x, KC4.x */
2973    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
2974			     SRC0_REL(ABSOLUTE),
2975			     SRC0_ELEM(ELEM_X),
2976			     SRC0_NEG(0),
2977			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
2978			     SRC1_REL(ABSOLUTE),
2979			     SRC1_ELEM(ELEM_X),
2980			     SRC1_NEG(0),
2981			     INDEX_MODE(SQ_INDEX_AR_X),
2982			     PRED_SEL(SQ_PRED_SEL_OFF),
2983			     LAST(0));
2984    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2985				 SRC1_ABS(0),
2986				 UPDATE_EXECUTE_MASK(0),
2987				 UPDATE_PRED(0),
2988				 WRITE_MASK(1),
2989				 OMOD(SQ_ALU_OMOD_OFF),
2990				 ALU_INST(SQ_OP2_INST_MOV),
2991				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2992				 DST_GPR(0),
2993				 DST_REL(ABSOLUTE),
2994				 DST_ELEM(ELEM_X),
2995				 CLAMP(1));
2996
2997    /* 29 */
2998    /* MOV GPR0.y, KC4.y */
2999    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
3000			     SRC0_REL(ABSOLUTE),
3001			     SRC0_ELEM(ELEM_Y),
3002			     SRC0_NEG(0),
3003			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3004			     SRC1_REL(ABSOLUTE),
3005			     SRC1_ELEM(ELEM_X),
3006			     SRC1_NEG(0),
3007			     INDEX_MODE(SQ_INDEX_AR_X),
3008			     PRED_SEL(SQ_PRED_SEL_OFF),
3009			     LAST(0));
3010    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3011				 SRC1_ABS(0),
3012				 UPDATE_EXECUTE_MASK(0),
3013				 UPDATE_PRED(0),
3014				 WRITE_MASK(1),
3015				 OMOD(SQ_ALU_OMOD_OFF),
3016				 ALU_INST(SQ_OP2_INST_MOV),
3017				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3018				 DST_GPR(0),
3019				 DST_REL(ABSOLUTE),
3020				 DST_ELEM(ELEM_Y),
3021				 CLAMP(1));
3022
3023    /* 30  */
3024    /* MOV GPR0.z, KC4.z */
3025    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
3026			     SRC0_REL(ABSOLUTE),
3027			     SRC0_ELEM(ELEM_Z),
3028			     SRC0_NEG(0),
3029			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3030			     SRC1_REL(ABSOLUTE),
3031			     SRC1_ELEM(ELEM_X),
3032			     SRC1_NEG(0),
3033			     INDEX_MODE(SQ_INDEX_AR_X),
3034			     PRED_SEL(SQ_PRED_SEL_OFF),
3035			     LAST(0));
3036    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3037				 SRC1_ABS(0),
3038				 UPDATE_EXECUTE_MASK(0),
3039				 UPDATE_PRED(0),
3040				 WRITE_MASK(1),
3041				 OMOD(SQ_ALU_OMOD_OFF),
3042				 ALU_INST(SQ_OP2_INST_MOV),
3043				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3044				 DST_GPR(0),
3045				 DST_REL(ABSOLUTE),
3046				 DST_ELEM(ELEM_Z),
3047				 CLAMP(1));
3048
3049    /* 31 */
3050    /* MOV GPR0.w, KC4.w */
3051    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
3052			     SRC0_REL(ABSOLUTE),
3053			     SRC0_ELEM(ELEM_W),
3054			     SRC0_NEG(0),
3055			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3056			     SRC1_REL(ABSOLUTE),
3057			     SRC1_ELEM(ELEM_X),
3058			     SRC1_NEG(0),
3059			     INDEX_MODE(SQ_INDEX_AR_X),
3060			     PRED_SEL(SQ_PRED_SEL_OFF),
3061			     LAST(1));
3062    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3063				 SRC1_ABS(0),
3064				 UPDATE_EXECUTE_MASK(0),
3065				 UPDATE_PRED(0),
3066				 WRITE_MASK(1),
3067				 OMOD(SQ_ALU_OMOD_OFF),
3068				 ALU_INST(SQ_OP2_INST_MOV),
3069				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3070				 DST_GPR(0),
3071				 DST_REL(ABSOLUTE),
3072				 DST_ELEM(ELEM_W),
3073				 CLAMP(1));
3074
3075    /* 32 */
3076    /* INTERP_XY GPR1.x, PARAM1 */
3077    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3078			     SRC0_REL(ABSOLUTE),
3079			     SRC0_ELEM(ELEM_Y),
3080			     SRC0_NEG(0),
3081			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3082			     SRC1_REL(ABSOLUTE),
3083			     SRC1_ELEM(ELEM_X),
3084			     SRC1_NEG(0),
3085			     INDEX_MODE(SQ_INDEX_AR_X),
3086			     PRED_SEL(SQ_PRED_SEL_OFF),
3087			     LAST(0));
3088    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3089				 SRC1_ABS(0),
3090				 UPDATE_EXECUTE_MASK(0),
3091				 UPDATE_PRED(0),
3092				 WRITE_MASK(1),
3093				 OMOD(SQ_ALU_OMOD_OFF),
3094				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3095				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3096				 DST_GPR(1),
3097				 DST_REL(ABSOLUTE),
3098				 DST_ELEM(ELEM_X),
3099				 CLAMP(0));
3100    /* 33 */
3101    /* INTERP_XY GPR1.y, PARAM1 */
3102    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3103			     SRC0_REL(ABSOLUTE),
3104			     SRC0_ELEM(ELEM_X),
3105			     SRC0_NEG(0),
3106			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3107			     SRC1_REL(ABSOLUTE),
3108			     SRC1_ELEM(ELEM_X),
3109			     SRC1_NEG(0),
3110			     INDEX_MODE(SQ_INDEX_AR_X),
3111			     PRED_SEL(SQ_PRED_SEL_OFF),
3112			     LAST(0));
3113    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3114				 SRC1_ABS(0),
3115				 UPDATE_EXECUTE_MASK(0),
3116				 UPDATE_PRED(0),
3117				 WRITE_MASK(1),
3118				 OMOD(SQ_ALU_OMOD_OFF),
3119				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3120				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3121				 DST_GPR(1),
3122				 DST_REL(ABSOLUTE),
3123				 DST_ELEM(ELEM_Y),
3124				 CLAMP(0));
3125    /* 34 */
3126    /* INTERP_XY GPR1.z, PARAM1 */
3127    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3128			     SRC0_REL(ABSOLUTE),
3129			     SRC0_ELEM(ELEM_Y),
3130			     SRC0_NEG(0),
3131			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3132			     SRC1_REL(ABSOLUTE),
3133			     SRC1_ELEM(ELEM_X),
3134			     SRC1_NEG(0),
3135			     INDEX_MODE(SQ_INDEX_AR_X),
3136			     PRED_SEL(SQ_PRED_SEL_OFF),
3137			     LAST(0));
3138    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3139				 SRC1_ABS(0),
3140				 UPDATE_EXECUTE_MASK(0),
3141				 UPDATE_PRED(0),
3142				 WRITE_MASK(0),
3143				 OMOD(SQ_ALU_OMOD_OFF),
3144				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3145				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3146				 DST_GPR(1),
3147				 DST_REL(ABSOLUTE),
3148				 DST_ELEM(ELEM_Z),
3149				 CLAMP(0));
3150    /* 35 */
3151    /* INTERP_XY GPR1.w, PARAM1 */
3152    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3153			     SRC0_REL(ABSOLUTE),
3154			     SRC0_ELEM(ELEM_X),
3155			     SRC0_NEG(0),
3156			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
3157			     SRC1_REL(ABSOLUTE),
3158			     SRC1_ELEM(ELEM_X),
3159			     SRC1_NEG(0),
3160			     INDEX_MODE(SQ_INDEX_AR_X),
3161			     PRED_SEL(SQ_PRED_SEL_OFF),
3162			     LAST(1));
3163    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3164				 SRC1_ABS(0),
3165				 UPDATE_EXECUTE_MASK(0),
3166				 UPDATE_PRED(0),
3167				 WRITE_MASK(0),
3168				 OMOD(SQ_ALU_OMOD_OFF),
3169				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3170				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3171				 DST_GPR(1),
3172				 DST_REL(ABSOLUTE),
3173				 DST_ELEM(ELEM_W),
3174				 CLAMP(0));
3175
3176    /* 36/37 */
3177    /* SAMPLE RID=1 GPR1, GPR1 */
3178    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3179			     INST_MOD(0),
3180			     FETCH_WHOLE_QUAD(0),
3181			     RESOURCE_ID(1),
3182			     SRC_GPR(1),
3183			     SRC_REL(ABSOLUTE),
3184			     ALT_CONST(0),
3185			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3186			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3187    shader[i++] = TEX_DWORD1(DST_GPR(1),
3188			     DST_REL(ABSOLUTE),
3189			     DST_SEL_X(SQ_SEL_X),
3190			     DST_SEL_Y(SQ_SEL_Y),
3191			     DST_SEL_Z(SQ_SEL_Z),
3192			     DST_SEL_W(SQ_SEL_W),
3193			     LOD_BIAS(0),
3194			     COORD_TYPE_X(TEX_NORMALIZED),
3195			     COORD_TYPE_Y(TEX_NORMALIZED),
3196			     COORD_TYPE_Z(TEX_NORMALIZED),
3197			     COORD_TYPE_W(TEX_NORMALIZED));
3198    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3199			     OFFSET_Y(0),
3200			     OFFSET_Z(0),
3201			     SAMPLER_ID(1),
3202			     SRC_SEL_X(SQ_SEL_X),
3203			     SRC_SEL_Y(SQ_SEL_Y),
3204			     SRC_SEL_Z(SQ_SEL_0),
3205			     SRC_SEL_W(SQ_SEL_1));
3206    shader[i++] = TEX_DWORD_PAD;
3207
3208    /* 38 */
3209    /* MOV GPR1.x, KC5.x */
3210    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3211			     SRC0_REL(ABSOLUTE),
3212			     SRC0_ELEM(ELEM_X),
3213			     SRC0_NEG(0),
3214			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3215			     SRC1_REL(ABSOLUTE),
3216			     SRC1_ELEM(ELEM_X),
3217			     SRC1_NEG(0),
3218			     INDEX_MODE(SQ_INDEX_AR_X),
3219			     PRED_SEL(SQ_PRED_SEL_OFF),
3220			     LAST(0));
3221    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3222				 SRC1_ABS(0),
3223				 UPDATE_EXECUTE_MASK(0),
3224				 UPDATE_PRED(0),
3225				 WRITE_MASK(1),
3226				 OMOD(SQ_ALU_OMOD_OFF),
3227				 ALU_INST(SQ_OP2_INST_MOV),
3228				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3229				 DST_GPR(1),
3230				 DST_REL(ABSOLUTE),
3231				 DST_ELEM(ELEM_X),
3232				 CLAMP(1));
3233
3234    /* 39 */
3235    /* MOV GPR1.y, KC5.y */
3236    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3237			     SRC0_REL(ABSOLUTE),
3238			     SRC0_ELEM(ELEM_Y),
3239			     SRC0_NEG(0),
3240			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3241			     SRC1_REL(ABSOLUTE),
3242			     SRC1_ELEM(ELEM_X),
3243			     SRC1_NEG(0),
3244			     INDEX_MODE(SQ_INDEX_AR_X),
3245			     PRED_SEL(SQ_PRED_SEL_OFF),
3246			     LAST(0));
3247    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3248				 SRC1_ABS(0),
3249				 UPDATE_EXECUTE_MASK(0),
3250				 UPDATE_PRED(0),
3251				 WRITE_MASK(1),
3252				 OMOD(SQ_ALU_OMOD_OFF),
3253				 ALU_INST(SQ_OP2_INST_MOV),
3254				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3255				 DST_GPR(1),
3256				 DST_REL(ABSOLUTE),
3257				 DST_ELEM(ELEM_Y),
3258				 CLAMP(1));
3259
3260    /* 40 */
3261    /* MOV GPR1.z, KC5.z */
3262    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3263			     SRC0_REL(ABSOLUTE),
3264			     SRC0_ELEM(ELEM_Z),
3265			     SRC0_NEG(0),
3266			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3267			     SRC1_REL(ABSOLUTE),
3268			     SRC1_ELEM(ELEM_X),
3269			     SRC1_NEG(0),
3270			     INDEX_MODE(SQ_INDEX_AR_X),
3271			     PRED_SEL(SQ_PRED_SEL_OFF),
3272			     LAST(0));
3273    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3274				 SRC1_ABS(0),
3275				 UPDATE_EXECUTE_MASK(0),
3276				 UPDATE_PRED(0),
3277				 WRITE_MASK(1),
3278				 OMOD(SQ_ALU_OMOD_OFF),
3279				 ALU_INST(SQ_OP2_INST_MOV),
3280				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3281				 DST_GPR(1),
3282				 DST_REL(ABSOLUTE),
3283				 DST_ELEM(ELEM_Z),
3284				 CLAMP(1));
3285
3286    /* 41 */
3287    /* MOV GPR1.w, KC5.w */
3288    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
3289			     SRC0_REL(ABSOLUTE),
3290			     SRC0_ELEM(ELEM_W),
3291			     SRC0_NEG(0),
3292			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
3293			     SRC1_REL(ABSOLUTE),
3294			     SRC1_ELEM(ELEM_X),
3295			     SRC1_NEG(0),
3296			     INDEX_MODE(SQ_INDEX_AR_X),
3297			     PRED_SEL(SQ_PRED_SEL_OFF),
3298			     LAST(1));
3299    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3300				 SRC1_ABS(0),
3301				 UPDATE_EXECUTE_MASK(0),
3302				 UPDATE_PRED(0),
3303				 WRITE_MASK(1),
3304				 OMOD(SQ_ALU_OMOD_OFF),
3305				 ALU_INST(SQ_OP2_INST_MOV),
3306				 BANK_SWIZZLE(SQ_ALU_VEC_012),
3307				 DST_GPR(1),
3308				 DST_REL(ABSOLUTE),
3309				 DST_ELEM(ELEM_W),
3310				 CLAMP(1));
3311
3312    return i;
3313}
3314