1/*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#ifdef XF86DRM_MODE
32
33#include "xf86.h"
34
35#include "cayman_shader.h"
36#include "cayman_reg.h"
37
38/* solid vs --------------------------------------- */
39int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
40{
41    int i = 0;
42
43    /* 0 */
44    shader[i++] = CF_DWORD0(ADDR(4),
45			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
46    shader[i++] = CF_DWORD1(POP_COUNT(0),
47			    CF_CONST(0),
48			    COND(SQ_CF_COND_ACTIVE),
49			    I_COUNT(1),
50			    VALID_PIXEL_MODE(0),
51			    CF_INST(SQ_CF_INST_TC),
52			    BARRIER(1));
53    /* 1 */
54    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
55					  TYPE(SQ_EXPORT_POS),
56					  RW_GPR(1),
57					  RW_REL(ABSOLUTE),
58					  INDEX_GPR(0),
59					  ELEM_SIZE(0));
60    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
61					       SRC_SEL_Y(SQ_SEL_Y),
62					       SRC_SEL_Z(SQ_SEL_Z),
63					       SRC_SEL_W(SQ_SEL_W),
64					       BURST_COUNT(1),
65					       VALID_PIXEL_MODE(0),
66					       CF_INST(SQ_CF_INST_EXPORT_DONE),
67					       MARK(0),
68					       BARRIER(1));
69    /* 2 - always export a param whether it's used or not */
70    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
71					  TYPE(SQ_EXPORT_PARAM),
72					  RW_GPR(0),
73					  RW_REL(ABSOLUTE),
74					  INDEX_GPR(0),
75					  ELEM_SIZE(0));
76    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
77					       SRC_SEL_Y(SQ_SEL_Y),
78					       SRC_SEL_Z(SQ_SEL_Z),
79					       SRC_SEL_W(SQ_SEL_W),
80					       BURST_COUNT(0),
81					       VALID_PIXEL_MODE(0),
82					       CF_INST(SQ_CF_INST_EXPORT_DONE),
83					       MARK(0),
84					       BARRIER(0));
85    /* 3 - end */
86    shader[i++] = CF_DWORD0(ADDR(0),
87			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
88    shader[i++] = CF_DWORD1(POP_COUNT(0),
89			    CF_CONST(0),
90			    COND(SQ_CF_COND_ACTIVE),
91			    I_COUNT(0),
92			    VALID_PIXEL_MODE(0),
93			    CF_INST(SQ_CF_INST_END),
94			    BARRIER(1));
95    /* 4/5 */
96    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
97			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
98			     FETCH_WHOLE_QUAD(0),
99			     BUFFER_ID(0),
100			     SRC_GPR(0),
101			     SRC_REL(ABSOLUTE),
102			     SRC_SEL_X(SQ_SEL_X),
103			     SRC_SEL_Y(SQ_SEL_Y),
104			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
105			     LDS_REQ(0),
106			     COALESCED_READ(0));
107    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
108				 DST_REL(0),
109				 DST_SEL_X(SQ_SEL_X),
110				 DST_SEL_Y(SQ_SEL_Y),
111				 DST_SEL_Z(SQ_SEL_0),
112				 DST_SEL_W(SQ_SEL_1),
113				 USE_CONST_FIELDS(0),
114				 DATA_FORMAT(FMT_32_32_FLOAT),
115				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
116				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
117				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
118    shader[i++] = VTX_DWORD2(OFFSET(0),
119#if X_BYTE_ORDER == X_BIG_ENDIAN
120			     ENDIAN_SWAP(SQ_ENDIAN_8IN32),
121#else
122			     ENDIAN_SWAP(ENDIAN_NONE),
123#endif
124			     CONST_BUF_NO_STRIDE(0),
125			     ALT_CONST(0),
126			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
127    shader[i++] = VTX_DWORD_PAD;
128
129    return i;
130}
131
132/* solid ps --------------------------------------- */
133int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
134{
135    int i = 0;
136
137    /* 0 */
138    shader[i++] = CF_ALU_DWORD0(ADDR(3),
139				KCACHE_BANK0(0),
140				KCACHE_BANK1(0),
141				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
142    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
143				KCACHE_ADDR0(0),
144				KCACHE_ADDR1(0),
145				I_COUNT(4),
146				ALT_CONST(0),
147				CF_INST(SQ_CF_INST_ALU),
148				WHOLE_QUAD_MODE(0),
149				BARRIER(1));
150    /* 1 */
151    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
152					  TYPE(SQ_EXPORT_PIXEL),
153					  RW_GPR(0),
154					  RW_REL(ABSOLUTE),
155					  INDEX_GPR(0),
156					  ELEM_SIZE(1));
157    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
158					       SRC_SEL_Y(SQ_SEL_Y),
159					       SRC_SEL_Z(SQ_SEL_Z),
160					       SRC_SEL_W(SQ_SEL_W),
161					       BURST_COUNT(1),
162					       VALID_PIXEL_MODE(0),
163					       CF_INST(SQ_CF_INST_EXPORT_DONE),
164					       MARK(0),
165					       BARRIER(1));
166
167    /* 2 - end */
168    shader[i++] = CF_DWORD0(ADDR(0),
169			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
170    shader[i++] = CF_DWORD1(POP_COUNT(0),
171			    CF_CONST(0),
172			    COND(SQ_CF_COND_ACTIVE),
173			    I_COUNT(0),
174			    VALID_PIXEL_MODE(0),
175			    CF_INST(SQ_CF_INST_END),
176			    BARRIER(1));
177    /* 3 */
178    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
179			     SRC0_REL(ABSOLUTE),
180			     SRC0_ELEM(ELEM_X),
181			     SRC0_NEG(0),
182			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
183			     SRC1_REL(ABSOLUTE),
184			     SRC1_ELEM(ELEM_X),
185			     SRC1_NEG(0),
186			     INDEX_MODE(SQ_INDEX_AR_X),
187			     PRED_SEL(SQ_PRED_SEL_OFF),
188			     LAST(0));
189    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
190				 SRC1_ABS(0),
191				 UPDATE_EXECUTE_MASK(0),
192				 UPDATE_PRED(0),
193				 WRITE_MASK(1),
194				 OMOD(SQ_ALU_OMOD_OFF),
195				 ALU_INST(SQ_OP2_INST_MOV),
196				 BANK_SWIZZLE(SQ_ALU_VEC_012),
197				 DST_GPR(0),
198				 DST_REL(ABSOLUTE),
199				 DST_ELEM(ELEM_X),
200				 CLAMP(1));
201    /* 4 */
202    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
203			     SRC0_REL(ABSOLUTE),
204			     SRC0_ELEM(ELEM_Y),
205			     SRC0_NEG(0),
206			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
207			     SRC1_REL(ABSOLUTE),
208			     SRC1_ELEM(ELEM_Y),
209			     SRC1_NEG(0),
210			     INDEX_MODE(SQ_INDEX_AR_X),
211			     PRED_SEL(SQ_PRED_SEL_OFF),
212			     LAST(0));
213    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
214				 SRC1_ABS(0),
215				 UPDATE_EXECUTE_MASK(0),
216				 UPDATE_PRED(0),
217				 WRITE_MASK(1),
218				 OMOD(SQ_ALU_OMOD_OFF),
219				 ALU_INST(SQ_OP2_INST_MOV),
220				 BANK_SWIZZLE(SQ_ALU_VEC_012),
221				 DST_GPR(0),
222				 DST_REL(ABSOLUTE),
223				 DST_ELEM(ELEM_Y),
224				 CLAMP(1));
225    /* 5 */
226    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
227			     SRC0_REL(ABSOLUTE),
228			     SRC0_ELEM(ELEM_Z),
229			     SRC0_NEG(0),
230			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
231			     SRC1_REL(ABSOLUTE),
232			     SRC1_ELEM(ELEM_Z),
233			     SRC1_NEG(0),
234			     INDEX_MODE(SQ_INDEX_AR_X),
235			     PRED_SEL(SQ_PRED_SEL_OFF),
236			     LAST(0));
237    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
238				 SRC1_ABS(0),
239				 UPDATE_EXECUTE_MASK(0),
240				 UPDATE_PRED(0),
241				 WRITE_MASK(1),
242				 OMOD(SQ_ALU_OMOD_OFF),
243				 ALU_INST(SQ_OP2_INST_MOV),
244				 BANK_SWIZZLE(SQ_ALU_VEC_012),
245				 DST_GPR(0),
246				 DST_REL(ABSOLUTE),
247				 DST_ELEM(ELEM_Z),
248				 CLAMP(1));
249    /* 6 */
250    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
251			     SRC0_REL(ABSOLUTE),
252			     SRC0_ELEM(ELEM_W),
253			     SRC0_NEG(0),
254			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
255			     SRC1_REL(ABSOLUTE),
256			     SRC1_ELEM(ELEM_W),
257			     SRC1_NEG(0),
258			     INDEX_MODE(SQ_INDEX_AR_X),
259			     PRED_SEL(SQ_PRED_SEL_OFF),
260			     LAST(1));
261    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
262				 SRC1_ABS(0),
263				 UPDATE_EXECUTE_MASK(0),
264				 UPDATE_PRED(0),
265				 WRITE_MASK(1),
266				 OMOD(SQ_ALU_OMOD_OFF),
267				 ALU_INST(SQ_OP2_INST_MOV),
268				 BANK_SWIZZLE(SQ_ALU_VEC_012),
269				 DST_GPR(0),
270				 DST_REL(ABSOLUTE),
271				 DST_ELEM(ELEM_W),
272				 CLAMP(1));
273
274    return i;
275}
276
277/* copy vs --------------------------------------- */
278int cayman_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
279{
280    int i = 0;
281
282    /* 0 */
283    shader[i++] = CF_DWORD0(ADDR(4),
284			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
285    shader[i++] = CF_DWORD1(POP_COUNT(0),
286			    CF_CONST(0),
287			    COND(SQ_CF_COND_ACTIVE),
288			    I_COUNT(2),
289			    VALID_PIXEL_MODE(0),
290			    CF_INST(SQ_CF_INST_TC),
291			    BARRIER(1));
292    /* 1 */
293    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
294					  TYPE(SQ_EXPORT_POS),
295					  RW_GPR(1),
296					  RW_REL(ABSOLUTE),
297					  INDEX_GPR(0),
298					  ELEM_SIZE(0));
299    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
300					       SRC_SEL_Y(SQ_SEL_Y),
301					       SRC_SEL_Z(SQ_SEL_Z),
302					       SRC_SEL_W(SQ_SEL_W),
303					       BURST_COUNT(0),
304					       VALID_PIXEL_MODE(0),
305					       CF_INST(SQ_CF_INST_EXPORT_DONE),
306					       MARK(0),
307					       BARRIER(1));
308    /* 2 */
309    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
310					  TYPE(SQ_EXPORT_PARAM),
311					  RW_GPR(0),
312					  RW_REL(ABSOLUTE),
313					  INDEX_GPR(0),
314					  ELEM_SIZE(0));
315    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
316					       SRC_SEL_Y(SQ_SEL_Y),
317					       SRC_SEL_Z(SQ_SEL_Z),
318					       SRC_SEL_W(SQ_SEL_W),
319					       BURST_COUNT(0),
320					       VALID_PIXEL_MODE(0),
321					       CF_INST(SQ_CF_INST_EXPORT_DONE),
322					       MARK(0),
323					       BARRIER(0));
324    /* 3 - end */
325    shader[i++] = CF_DWORD0(ADDR(0),
326			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
327    shader[i++] = CF_DWORD1(POP_COUNT(0),
328			    CF_CONST(0),
329			    COND(SQ_CF_COND_ACTIVE),
330			    I_COUNT(0),
331			    VALID_PIXEL_MODE(0),
332			    CF_INST(SQ_CF_INST_END),
333			    BARRIER(1));
334    /* 4/5 */
335    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
336			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
337			     FETCH_WHOLE_QUAD(0),
338			     BUFFER_ID(0),
339			     SRC_GPR(0),
340			     SRC_REL(ABSOLUTE),
341			     SRC_SEL_X(SQ_SEL_X),
342			     SRC_SEL_Y(SQ_SEL_Y),
343			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
344			     LDS_REQ(0),
345			     COALESCED_READ(0));
346    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
347				 DST_REL(0),
348				 DST_SEL_X(SQ_SEL_X),
349				 DST_SEL_Y(SQ_SEL_Y),
350				 DST_SEL_Z(SQ_SEL_0),
351				 DST_SEL_W(SQ_SEL_1),
352				 USE_CONST_FIELDS(0),
353				 DATA_FORMAT(FMT_32_32_FLOAT),
354				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
355				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
356				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
357    shader[i++] = VTX_DWORD2(OFFSET(0),
358#if X_BYTE_ORDER == X_BIG_ENDIAN
359                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
360#else
361			     ENDIAN_SWAP(ENDIAN_NONE),
362#endif
363			     CONST_BUF_NO_STRIDE(0),
364			     ALT_CONST(0),
365			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
366    shader[i++] = VTX_DWORD_PAD;
367    /* 6/7 */
368    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
369			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
370			     FETCH_WHOLE_QUAD(0),
371			     BUFFER_ID(0),
372			     SRC_GPR(0),
373			     SRC_REL(ABSOLUTE),
374			     SRC_SEL_X(SQ_SEL_X),
375			     SRC_SEL_Y(SQ_SEL_Y),
376			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
377			     LDS_REQ(0),
378			     COALESCED_READ(0));
379    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
380				 DST_REL(0),
381				 DST_SEL_X(SQ_SEL_X),
382				 DST_SEL_Y(SQ_SEL_Y),
383				 DST_SEL_Z(SQ_SEL_0),
384				 DST_SEL_W(SQ_SEL_1),
385				 USE_CONST_FIELDS(0),
386				 DATA_FORMAT(FMT_32_32_FLOAT),
387				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
388				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
389				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
390    shader[i++] = VTX_DWORD2(OFFSET(8),
391#if X_BYTE_ORDER == X_BIG_ENDIAN
392                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
393#else
394			     ENDIAN_SWAP(ENDIAN_NONE),
395#endif
396			     CONST_BUF_NO_STRIDE(0),
397			     ALT_CONST(0),
398			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
399    shader[i++] = VTX_DWORD_PAD;
400
401    return i;
402}
403
404/* copy ps --------------------------------------- */
405int cayman_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
406{
407    int i = 0;
408
409    /* CF INST 0 */
410    shader[i++] = CF_ALU_DWORD0(ADDR(4),
411				KCACHE_BANK0(0),
412				KCACHE_BANK1(0),
413				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
414    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
415				KCACHE_ADDR0(0),
416				KCACHE_ADDR1(0),
417				I_COUNT(4),
418				ALT_CONST(0),
419				CF_INST(SQ_CF_INST_ALU),
420				WHOLE_QUAD_MODE(0),
421				BARRIER(1));
422    /* CF INST 1 */
423    shader[i++] = CF_DWORD0(ADDR(8),
424			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
425    shader[i++] = CF_DWORD1(POP_COUNT(0),
426			    CF_CONST(0),
427			    COND(SQ_CF_COND_ACTIVE),
428			    I_COUNT(1),
429			    VALID_PIXEL_MODE(0),
430			    CF_INST(SQ_CF_INST_TC),
431			    BARRIER(1));
432    /* CF INST 2 */
433    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
434					  TYPE(SQ_EXPORT_PIXEL),
435					  RW_GPR(0),
436					  RW_REL(ABSOLUTE),
437					  INDEX_GPR(0),
438					  ELEM_SIZE(1));
439    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
440					       SRC_SEL_Y(SQ_SEL_Y),
441					       SRC_SEL_Z(SQ_SEL_Z),
442					       SRC_SEL_W(SQ_SEL_W),
443					       BURST_COUNT(1),
444					       VALID_PIXEL_MODE(0),
445					       CF_INST(SQ_CF_INST_EXPORT_DONE),
446					       MARK(0),
447					       BARRIER(1));
448    /* CF INST 3 - end */
449    shader[i++] = CF_DWORD0(ADDR(0),
450			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
451    shader[i++] = CF_DWORD1(POP_COUNT(0),
452			    CF_CONST(0),
453			    COND(SQ_CF_COND_ACTIVE),
454			    I_COUNT(0),
455			    VALID_PIXEL_MODE(0),
456			    CF_INST(SQ_CF_INST_END),
457			    BARRIER(1));
458    /* 4 interpolate tex coords */
459    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
460			     SRC0_REL(ABSOLUTE),
461			     SRC0_ELEM(ELEM_Y),
462			     SRC0_NEG(0),
463			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
464			     SRC1_REL(ABSOLUTE),
465			     SRC1_ELEM(ELEM_X),
466			     SRC1_NEG(0),
467			     INDEX_MODE(SQ_INDEX_AR_X),
468			     PRED_SEL(SQ_PRED_SEL_OFF),
469			     LAST(0));
470    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
471				 SRC1_ABS(0),
472				 UPDATE_EXECUTE_MASK(0),
473				 UPDATE_PRED(0),
474				 WRITE_MASK(1),
475				 OMOD(SQ_ALU_OMOD_OFF),
476				 ALU_INST(SQ_OP2_INST_INTERP_XY),
477				 BANK_SWIZZLE(SQ_ALU_VEC_210),
478				 DST_GPR(0),
479				 DST_REL(ABSOLUTE),
480				 DST_ELEM(ELEM_X),
481				 CLAMP(0));
482    /* 5 */
483    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
484			     SRC0_REL(ABSOLUTE),
485			     SRC0_ELEM(ELEM_X),
486			     SRC0_NEG(0),
487			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
488			     SRC1_REL(ABSOLUTE),
489			     SRC1_ELEM(ELEM_X),
490			     SRC1_NEG(0),
491			     INDEX_MODE(SQ_INDEX_AR_X),
492			     PRED_SEL(SQ_PRED_SEL_OFF),
493			     LAST(0));
494    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
495				 SRC1_ABS(0),
496				 UPDATE_EXECUTE_MASK(0),
497				 UPDATE_PRED(0),
498				 WRITE_MASK(1),
499				 OMOD(SQ_ALU_OMOD_OFF),
500				 ALU_INST(SQ_OP2_INST_INTERP_XY),
501				 BANK_SWIZZLE(SQ_ALU_VEC_210),
502				 DST_GPR(0),
503				 DST_REL(ABSOLUTE),
504				 DST_ELEM(ELEM_Y),
505				 CLAMP(0));
506    /* 6 */
507    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
508			     SRC0_REL(ABSOLUTE),
509			     SRC0_ELEM(ELEM_Y),
510			     SRC0_NEG(0),
511			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
512			     SRC1_REL(ABSOLUTE),
513			     SRC1_ELEM(ELEM_X),
514			     SRC1_NEG(0),
515			     INDEX_MODE(SQ_INDEX_AR_X),
516			     PRED_SEL(SQ_PRED_SEL_OFF),
517			     LAST(0));
518    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
519				 SRC1_ABS(0),
520				 UPDATE_EXECUTE_MASK(0),
521				 UPDATE_PRED(0),
522				 WRITE_MASK(0),
523				 OMOD(SQ_ALU_OMOD_OFF),
524				 ALU_INST(SQ_OP2_INST_INTERP_XY),
525				 BANK_SWIZZLE(SQ_ALU_VEC_210),
526				 DST_GPR(0),
527				 DST_REL(ABSOLUTE),
528				 DST_ELEM(ELEM_Z),
529				 CLAMP(0));
530    /* 7 */
531    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
532			     SRC0_REL(ABSOLUTE),
533			     SRC0_ELEM(ELEM_X),
534			     SRC0_NEG(0),
535			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
536			     SRC1_REL(ABSOLUTE),
537			     SRC1_ELEM(ELEM_X),
538			     SRC1_NEG(0),
539			     INDEX_MODE(SQ_INDEX_AR_X),
540			     PRED_SEL(SQ_PRED_SEL_OFF),
541			     LAST(1));
542    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
543				 SRC1_ABS(0),
544				 UPDATE_EXECUTE_MASK(0),
545				 UPDATE_PRED(0),
546				 WRITE_MASK(0),
547				 OMOD(SQ_ALU_OMOD_OFF),
548				 ALU_INST(SQ_OP2_INST_INTERP_XY),
549				 BANK_SWIZZLE(SQ_ALU_VEC_210),
550				 DST_GPR(0),
551				 DST_REL(ABSOLUTE),
552				 DST_ELEM(ELEM_W),
553				 CLAMP(0));
554
555    /* 8/9 TEX INST 0 */
556    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
557			     INST_MOD(0),
558			     FETCH_WHOLE_QUAD(0),
559			     RESOURCE_ID(0),
560			     SRC_GPR(0),
561			     SRC_REL(ABSOLUTE),
562			     ALT_CONST(0),
563			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
564			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
565    shader[i++] = TEX_DWORD1(DST_GPR(0),
566			     DST_REL(ABSOLUTE),
567			     DST_SEL_X(SQ_SEL_X), /* R */
568			     DST_SEL_Y(SQ_SEL_Y), /* G */
569			     DST_SEL_Z(SQ_SEL_Z), /* B */
570			     DST_SEL_W(SQ_SEL_W), /* A */
571			     LOD_BIAS(0),
572			     COORD_TYPE_X(TEX_UNNORMALIZED),
573			     COORD_TYPE_Y(TEX_UNNORMALIZED),
574			     COORD_TYPE_Z(TEX_UNNORMALIZED),
575			     COORD_TYPE_W(TEX_UNNORMALIZED));
576    shader[i++] = TEX_DWORD2(OFFSET_X(0),
577			     OFFSET_Y(0),
578			     OFFSET_Z(0),
579			     SAMPLER_ID(0),
580			     SRC_SEL_X(SQ_SEL_X),
581			     SRC_SEL_Y(SQ_SEL_Y),
582			     SRC_SEL_Z(SQ_SEL_0),
583			     SRC_SEL_W(SQ_SEL_1));
584    shader[i++] = TEX_DWORD_PAD;
585
586    return i;
587}
588
589int cayman_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
590{
591    int i = 0;
592
593    /* 0 */
594    shader[i++] = CF_DWORD0(ADDR(8),
595			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
596    shader[i++] = CF_DWORD1(POP_COUNT(0),
597                            CF_CONST(0),
598                            COND(SQ_CF_COND_ACTIVE),
599                            I_COUNT(2),
600                            VALID_PIXEL_MODE(0),
601                            CF_INST(SQ_CF_INST_TC),
602                            BARRIER(1));
603
604    /* 1 - ALU */
605    shader[i++] = CF_ALU_DWORD0(ADDR(5),
606				KCACHE_BANK0(0),
607				KCACHE_BANK1(0),
608				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
609    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
610				KCACHE_ADDR0(0),
611				KCACHE_ADDR1(0),
612				I_COUNT(2),
613				ALT_CONST(0),
614				CF_INST(SQ_CF_INST_ALU),
615				WHOLE_QUAD_MODE(0),
616				BARRIER(1));
617
618    /* 2 */
619    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
620                                          TYPE(SQ_EXPORT_POS),
621                                          RW_GPR(1),
622                                          RW_REL(ABSOLUTE),
623                                          INDEX_GPR(0),
624                                          ELEM_SIZE(3));
625    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
626                                               SRC_SEL_Y(SQ_SEL_Y),
627                                               SRC_SEL_Z(SQ_SEL_Z),
628                                               SRC_SEL_W(SQ_SEL_W),
629                                               BURST_COUNT(1),
630                                               VALID_PIXEL_MODE(0),
631                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
632                                               MARK(0),
633                                               BARRIER(1));
634    /* 3 */
635    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
636                                          TYPE(SQ_EXPORT_PARAM),
637                                          RW_GPR(0),
638                                          RW_REL(ABSOLUTE),
639                                          INDEX_GPR(0),
640                                          ELEM_SIZE(3));
641    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
642                                               SRC_SEL_Y(SQ_SEL_Y),
643                                               SRC_SEL_Z(SQ_SEL_Z),
644                                               SRC_SEL_W(SQ_SEL_W),
645                                               BURST_COUNT(1),
646                                               VALID_PIXEL_MODE(0),
647                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
648                                               MARK(0),
649                                               BARRIER(0));
650    /* 4 - end */
651    shader[i++] = CF_DWORD0(ADDR(0),
652			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
653    shader[i++] = CF_DWORD1(POP_COUNT(0),
654			    CF_CONST(0),
655			    COND(SQ_CF_COND_ACTIVE),
656			    I_COUNT(0),
657			    VALID_PIXEL_MODE(0),
658			    CF_INST(SQ_CF_INST_END),
659			    BARRIER(1));
660    /* 5 texX / w */
661    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
662                             SRC0_REL(ABSOLUTE),
663                             SRC0_ELEM(ELEM_X),
664                             SRC0_NEG(0),
665                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
666                             SRC1_REL(ABSOLUTE),
667                             SRC1_ELEM(ELEM_X),
668                             SRC1_NEG(0),
669                             INDEX_MODE(SQ_INDEX_AR_X),
670                             PRED_SEL(SQ_PRED_SEL_OFF),
671                             LAST(0));
672    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
673                                 SRC1_ABS(0),
674                                 UPDATE_EXECUTE_MASK(0),
675                                 UPDATE_PRED(0),
676                                 WRITE_MASK(1),
677                                 OMOD(SQ_ALU_OMOD_OFF),
678                                 ALU_INST(SQ_OP2_INST_MUL),
679                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
680                                 DST_GPR(0),
681                                 DST_REL(ABSOLUTE),
682                                 DST_ELEM(ELEM_X),
683                                 CLAMP(0));
684
685    /* 6 texY / h */
686    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
687                             SRC0_REL(ABSOLUTE),
688                             SRC0_ELEM(ELEM_Y),
689                             SRC0_NEG(0),
690                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
691                             SRC1_REL(ABSOLUTE),
692                             SRC1_ELEM(ELEM_Y),
693                             SRC1_NEG(0),
694                             INDEX_MODE(SQ_INDEX_AR_X),
695                             PRED_SEL(SQ_PRED_SEL_OFF),
696                             LAST(1));
697    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
698                                 SRC1_ABS(0),
699                                 UPDATE_EXECUTE_MASK(0),
700                                 UPDATE_PRED(0),
701                                 WRITE_MASK(1),
702                                 OMOD(SQ_ALU_OMOD_OFF),
703                                 ALU_INST(SQ_OP2_INST_MUL),
704                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
705                                 DST_GPR(0),
706                                 DST_REL(ABSOLUTE),
707                                 DST_ELEM(ELEM_Y),
708                                 CLAMP(0));
709
710    /* 7 - padding */
711    shader[i++] = 0x00000000;
712    shader[i++] = 0x00000000;
713    /* 8/9 */
714    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
715                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
716                             FETCH_WHOLE_QUAD(0),
717                             BUFFER_ID(0),
718                             SRC_GPR(0),
719                             SRC_REL(ABSOLUTE),
720                             SRC_SEL_X(SQ_SEL_X),
721			     SRC_SEL_Y(SQ_SEL_Y),
722			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
723			     LDS_REQ(0),
724			     COALESCED_READ(0));
725    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
726                                 DST_REL(ABSOLUTE),
727                                 DST_SEL_X(SQ_SEL_X),
728                                 DST_SEL_Y(SQ_SEL_Y),
729                                 DST_SEL_Z(SQ_SEL_0),
730                                 DST_SEL_W(SQ_SEL_1),
731                                 USE_CONST_FIELDS(0),
732                                 DATA_FORMAT(FMT_32_32_FLOAT),
733                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
734                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
735                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
736    shader[i++] = VTX_DWORD2(OFFSET(0),
737#if X_BYTE_ORDER == X_BIG_ENDIAN
738                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
739#else
740                             ENDIAN_SWAP(ENDIAN_NONE),
741#endif
742                             CONST_BUF_NO_STRIDE(0),
743			     ALT_CONST(0),
744			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
745    shader[i++] = VTX_DWORD_PAD;
746    /* 10/11 */
747    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
748                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
749                             FETCH_WHOLE_QUAD(0),
750                             BUFFER_ID(0),
751                             SRC_GPR(0),
752                             SRC_REL(ABSOLUTE),
753                             SRC_SEL_X(SQ_SEL_X),
754			     SRC_SEL_Y(SQ_SEL_Y),
755			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
756			     LDS_REQ(0),
757			     COALESCED_READ(0));
758    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
759                                 DST_REL(ABSOLUTE),
760                                 DST_SEL_X(SQ_SEL_X),
761                                 DST_SEL_Y(SQ_SEL_Y),
762                                 DST_SEL_Z(SQ_SEL_0),
763                                 DST_SEL_W(SQ_SEL_1),
764                                 USE_CONST_FIELDS(0),
765                                 DATA_FORMAT(FMT_32_32_FLOAT),
766                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
767                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
768                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
769    shader[i++] = VTX_DWORD2(OFFSET(8),
770#if X_BYTE_ORDER == X_BIG_ENDIAN
771                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
772#else
773                             ENDIAN_SWAP(ENDIAN_NONE),
774#endif
775                             CONST_BUF_NO_STRIDE(0),
776			     ALT_CONST(0),
777			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
778    shader[i++] = VTX_DWORD_PAD;
779
780    return i;
781}
782
783int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
784{
785    int i = 0;
786
787    /* 0 */
788    shader[i++] = CF_ALU_DWORD0(ADDR(6),
789				KCACHE_BANK0(0),
790				KCACHE_BANK1(0),
791				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
792    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
793				KCACHE_ADDR0(0),
794				KCACHE_ADDR1(0),
795				I_COUNT(4),
796				ALT_CONST(0),
797				CF_INST(SQ_CF_INST_ALU),
798				WHOLE_QUAD_MODE(0),
799				BARRIER(1));
800    /* 1 */
801    shader[i++] = CF_DWORD0(ADDR(22),
802			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
803    shader[i++] = CF_DWORD1(POP_COUNT(0),
804                            CF_CONST(0),
805                            COND(SQ_CF_COND_BOOL),
806                            I_COUNT(0),
807                            VALID_PIXEL_MODE(0),
808                            CF_INST(SQ_CF_INST_CALL),
809                            BARRIER(0));
810    /* 2 */
811    shader[i++] = CF_DWORD0(ADDR(30),
812			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
813    shader[i++] = CF_DWORD1(POP_COUNT(0),
814                            CF_CONST(0),
815                            COND(SQ_CF_COND_NOT_BOOL),
816                            I_COUNT(0),
817                            VALID_PIXEL_MODE(0),
818                            CF_INST(SQ_CF_INST_CALL),
819                            BARRIER(0));
820    /* 3 */
821    shader[i++] = CF_ALU_DWORD0(ADDR(10),
822                                KCACHE_BANK0(0),
823                                KCACHE_BANK1(0),
824                                KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
825    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
826                                KCACHE_ADDR0(0),
827                                KCACHE_ADDR1(0),
828                                I_COUNT(12),
829                                ALT_CONST(0),
830                                CF_INST(SQ_CF_INST_ALU),
831				WHOLE_QUAD_MODE(0),
832                                BARRIER(1));
833    /* 4 */
834    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
835                                          TYPE(SQ_EXPORT_PIXEL),
836                                          RW_GPR(2),
837                                          RW_REL(ABSOLUTE),
838                                          INDEX_GPR(0),
839                                          ELEM_SIZE(3));
840    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
841                                               SRC_SEL_Y(SQ_SEL_Y),
842                                               SRC_SEL_Z(SQ_SEL_Z),
843                                               SRC_SEL_W(SQ_SEL_W),
844                                               BURST_COUNT(1),
845                                               VALID_PIXEL_MODE(0),
846                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
847                                               MARK(0),
848                                               BARRIER(1));
849    /* 5 - end */
850    shader[i++] = CF_DWORD0(ADDR(0),
851			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
852    shader[i++] = CF_DWORD1(POP_COUNT(0),
853			    CF_CONST(0),
854			    COND(SQ_CF_COND_ACTIVE),
855			    I_COUNT(0),
856			    VALID_PIXEL_MODE(0),
857			    CF_INST(SQ_CF_INST_END),
858			    BARRIER(1));
859    /* 6 interpolate tex coords */
860    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
861			     SRC0_REL(ABSOLUTE),
862			     SRC0_ELEM(ELEM_Y),
863			     SRC0_NEG(0),
864			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
865			     SRC1_REL(ABSOLUTE),
866			     SRC1_ELEM(ELEM_X),
867			     SRC1_NEG(0),
868			     INDEX_MODE(SQ_INDEX_AR_X),
869			     PRED_SEL(SQ_PRED_SEL_OFF),
870			     LAST(0));
871    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
872				 SRC1_ABS(0),
873				 UPDATE_EXECUTE_MASK(0),
874				 UPDATE_PRED(0),
875				 WRITE_MASK(1),
876				 OMOD(SQ_ALU_OMOD_OFF),
877				 ALU_INST(SQ_OP2_INST_INTERP_XY),
878				 BANK_SWIZZLE(SQ_ALU_VEC_210),
879				 DST_GPR(0),
880				 DST_REL(ABSOLUTE),
881				 DST_ELEM(ELEM_X),
882				 CLAMP(0));
883    /* 7 */
884    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
885			     SRC0_REL(ABSOLUTE),
886			     SRC0_ELEM(ELEM_X),
887			     SRC0_NEG(0),
888			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
889			     SRC1_REL(ABSOLUTE),
890			     SRC1_ELEM(ELEM_X),
891			     SRC1_NEG(0),
892			     INDEX_MODE(SQ_INDEX_AR_X),
893			     PRED_SEL(SQ_PRED_SEL_OFF),
894			     LAST(0));
895    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
896				 SRC1_ABS(0),
897				 UPDATE_EXECUTE_MASK(0),
898				 UPDATE_PRED(0),
899				 WRITE_MASK(1),
900				 OMOD(SQ_ALU_OMOD_OFF),
901				 ALU_INST(SQ_OP2_INST_INTERP_XY),
902				 BANK_SWIZZLE(SQ_ALU_VEC_210),
903				 DST_GPR(0),
904				 DST_REL(ABSOLUTE),
905				 DST_ELEM(ELEM_Y),
906				 CLAMP(0));
907    /* 8 */
908    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
909			     SRC0_REL(ABSOLUTE),
910			     SRC0_ELEM(ELEM_Y),
911			     SRC0_NEG(0),
912			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
913			     SRC1_REL(ABSOLUTE),
914			     SRC1_ELEM(ELEM_X),
915			     SRC1_NEG(0),
916			     INDEX_MODE(SQ_INDEX_AR_X),
917			     PRED_SEL(SQ_PRED_SEL_OFF),
918			     LAST(0));
919    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
920				 SRC1_ABS(0),
921				 UPDATE_EXECUTE_MASK(0),
922				 UPDATE_PRED(0),
923				 WRITE_MASK(0),
924				 OMOD(SQ_ALU_OMOD_OFF),
925				 ALU_INST(SQ_OP2_INST_INTERP_XY),
926				 BANK_SWIZZLE(SQ_ALU_VEC_210),
927				 DST_GPR(0),
928				 DST_REL(ABSOLUTE),
929				 DST_ELEM(ELEM_Z),
930				 CLAMP(0));
931    /* 9 */
932    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
933			     SRC0_REL(ABSOLUTE),
934			     SRC0_ELEM(ELEM_X),
935			     SRC0_NEG(0),
936			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
937			     SRC1_REL(ABSOLUTE),
938			     SRC1_ELEM(ELEM_X),
939			     SRC1_NEG(0),
940			     INDEX_MODE(SQ_INDEX_AR_X),
941			     PRED_SEL(SQ_PRED_SEL_OFF),
942			     LAST(1));
943    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
944				 SRC1_ABS(0),
945				 UPDATE_EXECUTE_MASK(0),
946				 UPDATE_PRED(0),
947				 WRITE_MASK(0),
948				 OMOD(SQ_ALU_OMOD_OFF),
949				 ALU_INST(SQ_OP2_INST_INTERP_XY),
950				 BANK_SWIZZLE(SQ_ALU_VEC_210),
951				 DST_GPR(0),
952				 DST_REL(ABSOLUTE),
953				 DST_ELEM(ELEM_W),
954				 CLAMP(0));
955
956    /* 10,11,12,13 */
957    /* r2.x = MAD(c0.w, r1.x, c0.x) */
958    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
959                             SRC0_REL(ABSOLUTE),
960                             SRC0_ELEM(ELEM_W),
961                             SRC0_NEG(0),
962                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
963                             SRC1_REL(ABSOLUTE),
964                             SRC1_ELEM(ELEM_X),
965                             SRC1_NEG(0),
966                             INDEX_MODE(SQ_INDEX_LOOP),
967                             PRED_SEL(SQ_PRED_SEL_OFF),
968                             LAST(0));
969    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
970                                 SRC2_REL(ABSOLUTE),
971                                 SRC2_ELEM(ELEM_X),
972                                 SRC2_NEG(0),
973                                 ALU_INST(SQ_OP3_INST_MULADD),
974                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
975                                 DST_GPR(2),
976                                 DST_REL(ABSOLUTE),
977                                 DST_ELEM(ELEM_X),
978                                 CLAMP(0));
979    /* r2.y = MAD(c0.w, r1.x, c0.y) */
980    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
981                             SRC0_REL(ABSOLUTE),
982                             SRC0_ELEM(ELEM_W),
983                             SRC0_NEG(0),
984                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
985                             SRC1_REL(ABSOLUTE),
986                             SRC1_ELEM(ELEM_X),
987                             SRC1_NEG(0),
988                             INDEX_MODE(SQ_INDEX_LOOP),
989                             PRED_SEL(SQ_PRED_SEL_OFF),
990                             LAST(0));
991    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
992                                 SRC2_REL(ABSOLUTE),
993                                 SRC2_ELEM(ELEM_Y),
994                                 SRC2_NEG(0),
995                                 ALU_INST(SQ_OP3_INST_MULADD),
996                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
997                                 DST_GPR(2),
998                                 DST_REL(ABSOLUTE),
999                                 DST_ELEM(ELEM_Y),
1000                                 CLAMP(0));
1001    /* r2.z = MAD(c0.w, r1.x, c0.z) */
1002    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
1003                             SRC0_REL(ABSOLUTE),
1004                             SRC0_ELEM(ELEM_W),
1005                             SRC0_NEG(0),
1006                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1007                             SRC1_REL(ABSOLUTE),
1008                             SRC1_ELEM(ELEM_X),
1009                             SRC1_NEG(0),
1010                             INDEX_MODE(SQ_INDEX_LOOP),
1011                             PRED_SEL(SQ_PRED_SEL_OFF),
1012                             LAST(0));
1013    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
1014                                 SRC2_REL(ABSOLUTE),
1015                                 SRC2_ELEM(ELEM_Z),
1016                                 SRC2_NEG(0),
1017                                 ALU_INST(SQ_OP3_INST_MULADD),
1018                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1019                                 DST_GPR(2),
1020                                 DST_REL(ABSOLUTE),
1021                                 DST_ELEM(ELEM_Z),
1022                                 CLAMP(0));
1023    /* r2.w = MAD(0, 0, 1) */
1024    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1025                             SRC0_REL(ABSOLUTE),
1026                             SRC0_ELEM(ELEM_X),
1027                             SRC0_NEG(0),
1028                             SRC1_SEL(SQ_ALU_SRC_0),
1029                             SRC1_REL(ABSOLUTE),
1030                             SRC1_ELEM(ELEM_X),
1031                             SRC1_NEG(0),
1032                             INDEX_MODE(SQ_INDEX_LOOP),
1033                             PRED_SEL(SQ_PRED_SEL_OFF),
1034                             LAST(1));
1035    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1036                                 SRC2_REL(ABSOLUTE),
1037                                 SRC2_ELEM(ELEM_X),
1038                                 SRC2_NEG(0),
1039                                 ALU_INST(SQ_OP3_INST_MULADD),
1040                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1041                                 DST_GPR(2),
1042                                 DST_REL(ABSOLUTE),
1043                                 DST_ELEM(ELEM_W),
1044                                 CLAMP(0));
1045
1046    /* 14,15,16,17 */
1047    /* r2.x = MAD(c1.x, r1.y, pv.x) */
1048    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1049                             SRC0_REL(ABSOLUTE),
1050                             SRC0_ELEM(ELEM_X),
1051                             SRC0_NEG(0),
1052                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1053                             SRC1_REL(ABSOLUTE),
1054                             SRC1_ELEM(ELEM_Y),
1055                             SRC1_NEG(0),
1056                             INDEX_MODE(SQ_INDEX_LOOP),
1057                             PRED_SEL(SQ_PRED_SEL_OFF),
1058                             LAST(0));
1059    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1060                                 SRC2_REL(ABSOLUTE),
1061                                 SRC2_ELEM(ELEM_X),
1062                                 SRC2_NEG(0),
1063                                 ALU_INST(SQ_OP3_INST_MULADD),
1064                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1065                                 DST_GPR(2),
1066                                 DST_REL(ABSOLUTE),
1067                                 DST_ELEM(ELEM_X),
1068                                 CLAMP(0));
1069    /* r2.y = MAD(c1.y, r1.y, pv.y) */
1070    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1071                             SRC0_REL(ABSOLUTE),
1072                             SRC0_ELEM(ELEM_Y),
1073                             SRC0_NEG(0),
1074                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1075                             SRC1_REL(ABSOLUTE),
1076                             SRC1_ELEM(ELEM_Y),
1077                             SRC1_NEG(0),
1078                             INDEX_MODE(SQ_INDEX_LOOP),
1079                             PRED_SEL(SQ_PRED_SEL_OFF),
1080                             LAST(0));
1081    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1082                                 SRC2_REL(ABSOLUTE),
1083                                 SRC2_ELEM(ELEM_Y),
1084                                 SRC2_NEG(0),
1085                                 ALU_INST(SQ_OP3_INST_MULADD),
1086                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1087                                 DST_GPR(2),
1088                                 DST_REL(ABSOLUTE),
1089                                 DST_ELEM(ELEM_Y),
1090                                 CLAMP(0));
1091    /* r2.z = MAD(c1.z, r1.y, pv.z) */
1092    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
1093                             SRC0_REL(ABSOLUTE),
1094                             SRC0_ELEM(ELEM_Z),
1095                             SRC0_NEG(0),
1096                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1097                             SRC1_REL(ABSOLUTE),
1098                             SRC1_ELEM(ELEM_Y),
1099                             SRC1_NEG(0),
1100                             INDEX_MODE(SQ_INDEX_LOOP),
1101                             PRED_SEL(SQ_PRED_SEL_OFF),
1102                             LAST(0));
1103    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1104                                 SRC2_REL(ABSOLUTE),
1105                                 SRC2_ELEM(ELEM_Z),
1106                                 SRC2_NEG(0),
1107                                 ALU_INST(SQ_OP3_INST_MULADD),
1108                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1109                                 DST_GPR(2),
1110                                 DST_REL(ABSOLUTE),
1111                                 DST_ELEM(ELEM_Z),
1112                                 CLAMP(0));
1113    /* r2.w = MAD(0, 0, 1) */
1114    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1115                             SRC0_REL(ABSOLUTE),
1116                             SRC0_ELEM(ELEM_X),
1117                             SRC0_NEG(0),
1118                             SRC1_SEL(SQ_ALU_SRC_0),
1119                             SRC1_REL(ABSOLUTE),
1120                             SRC1_ELEM(ELEM_X),
1121                             SRC1_NEG(0),
1122                             INDEX_MODE(SQ_INDEX_LOOP),
1123                             PRED_SEL(SQ_PRED_SEL_OFF),
1124                             LAST(1));
1125    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1126                                 SRC2_REL(ABSOLUTE),
1127                                 SRC2_ELEM(ELEM_W),
1128                                 SRC2_NEG(0),
1129                                 ALU_INST(SQ_OP3_INST_MULADD),
1130                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1131                                 DST_GPR(2),
1132                                 DST_REL(ABSOLUTE),
1133                                 DST_ELEM(ELEM_W),
1134                                 CLAMP(0));
1135    /* 18,19,20,21 */
1136    /* r2.x = MAD(c2.x, r1.z, pv.x) */
1137    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1138                             SRC0_REL(ABSOLUTE),
1139                             SRC0_ELEM(ELEM_X),
1140                             SRC0_NEG(0),
1141                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1142                             SRC1_REL(ABSOLUTE),
1143                             SRC1_ELEM(ELEM_Z),
1144                             SRC1_NEG(0),
1145                             INDEX_MODE(SQ_INDEX_LOOP),
1146                             PRED_SEL(SQ_PRED_SEL_OFF),
1147                             LAST(0));
1148    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1149                                 SRC2_REL(ABSOLUTE),
1150                                 SRC2_ELEM(ELEM_X),
1151                                 SRC2_NEG(0),
1152                                 ALU_INST(SQ_OP3_INST_MULADD),
1153                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1154                                 DST_GPR(2),
1155                                 DST_REL(ABSOLUTE),
1156                                 DST_ELEM(ELEM_X),
1157                                 CLAMP(1));
1158    /* r2.y = MAD(c2.y, r1.z, pv.y) */
1159    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1160                             SRC0_REL(ABSOLUTE),
1161                             SRC0_ELEM(ELEM_Y),
1162                             SRC0_NEG(0),
1163                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1164                             SRC1_REL(ABSOLUTE),
1165                             SRC1_ELEM(ELEM_Z),
1166                             SRC1_NEG(0),
1167                             INDEX_MODE(SQ_INDEX_LOOP),
1168                             PRED_SEL(SQ_PRED_SEL_OFF),
1169                             LAST(0));
1170    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1171                                 SRC2_REL(ABSOLUTE),
1172                                 SRC2_ELEM(ELEM_Y),
1173                                 SRC2_NEG(0),
1174                                 ALU_INST(SQ_OP3_INST_MULADD),
1175                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1176                                 DST_GPR(2),
1177                                 DST_REL(ABSOLUTE),
1178                                 DST_ELEM(ELEM_Y),
1179                                 CLAMP(1));
1180    /* r2.z = MAD(c2.z, r1.z, pv.z) */
1181    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
1182                             SRC0_REL(ABSOLUTE),
1183                             SRC0_ELEM(ELEM_Z),
1184                             SRC0_NEG(0),
1185                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
1186                             SRC1_REL(ABSOLUTE),
1187                             SRC1_ELEM(ELEM_Z),
1188                             SRC1_NEG(0),
1189                             INDEX_MODE(SQ_INDEX_LOOP),
1190                             PRED_SEL(SQ_PRED_SEL_OFF),
1191                             LAST(0));
1192    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
1193                                 SRC2_REL(ABSOLUTE),
1194                                 SRC2_ELEM(ELEM_Z),
1195                                 SRC2_NEG(0),
1196                                 ALU_INST(SQ_OP3_INST_MULADD),
1197                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1198                                 DST_GPR(2),
1199                                 DST_REL(ABSOLUTE),
1200                                 DST_ELEM(ELEM_Z),
1201                                 CLAMP(1));
1202    /* r2.w = MAD(0, 0, 1) */
1203    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
1204                             SRC0_REL(ABSOLUTE),
1205                             SRC0_ELEM(ELEM_X),
1206                             SRC0_NEG(0),
1207                             SRC1_SEL(SQ_ALU_SRC_0),
1208                             SRC1_REL(ABSOLUTE),
1209                             SRC1_ELEM(ELEM_X),
1210                             SRC1_NEG(0),
1211                             INDEX_MODE(SQ_INDEX_LOOP),
1212                             PRED_SEL(SQ_PRED_SEL_OFF),
1213                             LAST(1));
1214    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
1215                                 SRC2_REL(ABSOLUTE),
1216                                 SRC2_ELEM(ELEM_X),
1217                                 SRC2_NEG(0),
1218                                 ALU_INST(SQ_OP3_INST_MULADD),
1219                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1220                                 DST_GPR(2),
1221                                 DST_REL(ABSOLUTE),
1222                                 DST_ELEM(ELEM_W),
1223                                 CLAMP(1));
1224
1225    /* 22 */
1226    shader[i++] = CF_DWORD0(ADDR(24),
1227			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1228    shader[i++] = CF_DWORD1(POP_COUNT(0),
1229                            CF_CONST(0),
1230                            COND(SQ_CF_COND_ACTIVE),
1231                            I_COUNT(3),
1232                            VALID_PIXEL_MODE(0),
1233                            CF_INST(SQ_CF_INST_TC),
1234                            BARRIER(1));
1235    /* 23 */
1236    shader[i++] = CF_DWORD0(ADDR(0),
1237			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1238    shader[i++] = CF_DWORD1(POP_COUNT(0),
1239			    CF_CONST(0),
1240			    COND(SQ_CF_COND_ACTIVE),
1241			    I_COUNT(0),
1242			    VALID_PIXEL_MODE(0),
1243			    CF_INST(SQ_CF_INST_RETURN),
1244			    BARRIER(1));
1245    /* 24/25 */
1246    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1247                             INST_MOD(0),
1248                             FETCH_WHOLE_QUAD(0),
1249                             RESOURCE_ID(0),
1250                             SRC_GPR(0),
1251                             SRC_REL(ABSOLUTE),
1252                             ALT_CONST(0),
1253			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1254			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1255    shader[i++] = TEX_DWORD1(DST_GPR(1),
1256                             DST_REL(ABSOLUTE),
1257                             DST_SEL_X(SQ_SEL_X),
1258                             DST_SEL_Y(SQ_SEL_MASK),
1259                             DST_SEL_Z(SQ_SEL_MASK),
1260                             DST_SEL_W(SQ_SEL_1),
1261                             LOD_BIAS(0),
1262                             COORD_TYPE_X(TEX_NORMALIZED),
1263                             COORD_TYPE_Y(TEX_NORMALIZED),
1264                             COORD_TYPE_Z(TEX_NORMALIZED),
1265                             COORD_TYPE_W(TEX_NORMALIZED));
1266    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1267                             OFFSET_Y(0),
1268                             OFFSET_Z(0),
1269                             SAMPLER_ID(0),
1270                             SRC_SEL_X(SQ_SEL_X),
1271                             SRC_SEL_Y(SQ_SEL_Y),
1272                             SRC_SEL_Z(SQ_SEL_0),
1273                             SRC_SEL_W(SQ_SEL_1));
1274    shader[i++] = TEX_DWORD_PAD;
1275    /* 26/27 */
1276    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1277                             INST_MOD(0),
1278                             FETCH_WHOLE_QUAD(0),
1279                             RESOURCE_ID(1),
1280                             SRC_GPR(0),
1281                             SRC_REL(ABSOLUTE),
1282                             ALT_CONST(0),
1283			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1284			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1285    shader[i++] = TEX_DWORD1(DST_GPR(1),
1286                             DST_REL(ABSOLUTE),
1287                             DST_SEL_X(SQ_SEL_MASK),
1288                             DST_SEL_Y(SQ_SEL_MASK),
1289                             DST_SEL_Z(SQ_SEL_X),
1290                             DST_SEL_W(SQ_SEL_MASK),
1291                             LOD_BIAS(0),
1292                             COORD_TYPE_X(TEX_NORMALIZED),
1293                             COORD_TYPE_Y(TEX_NORMALIZED),
1294                             COORD_TYPE_Z(TEX_NORMALIZED),
1295                             COORD_TYPE_W(TEX_NORMALIZED));
1296    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1297                             OFFSET_Y(0),
1298                             OFFSET_Z(0),
1299                             SAMPLER_ID(1),
1300                             SRC_SEL_X(SQ_SEL_X),
1301                             SRC_SEL_Y(SQ_SEL_Y),
1302                             SRC_SEL_Z(SQ_SEL_0),
1303                             SRC_SEL_W(SQ_SEL_1));
1304    shader[i++] = TEX_DWORD_PAD;
1305    /* 28/29 */
1306    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1307                             INST_MOD(0),
1308                             FETCH_WHOLE_QUAD(0),
1309                             RESOURCE_ID(2),
1310                             SRC_GPR(0),
1311                             SRC_REL(ABSOLUTE),
1312                             ALT_CONST(0),
1313			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1314			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1315    shader[i++] = TEX_DWORD1(DST_GPR(1),
1316                             DST_REL(ABSOLUTE),
1317                             DST_SEL_X(SQ_SEL_MASK),
1318                             DST_SEL_Y(SQ_SEL_X),
1319                             DST_SEL_Z(SQ_SEL_MASK),
1320                             DST_SEL_W(SQ_SEL_MASK),
1321                             LOD_BIAS(0),
1322                             COORD_TYPE_X(TEX_NORMALIZED),
1323                             COORD_TYPE_Y(TEX_NORMALIZED),
1324                             COORD_TYPE_Z(TEX_NORMALIZED),
1325                             COORD_TYPE_W(TEX_NORMALIZED));
1326    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1327                             OFFSET_Y(0),
1328                             OFFSET_Z(0),
1329                             SAMPLER_ID(2),
1330                             SRC_SEL_X(SQ_SEL_X),
1331                             SRC_SEL_Y(SQ_SEL_Y),
1332                             SRC_SEL_Z(SQ_SEL_0),
1333                             SRC_SEL_W(SQ_SEL_1));
1334    shader[i++] = TEX_DWORD_PAD;
1335    /* 30 */
1336    shader[i++] = CF_DWORD0(ADDR(32),
1337			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1338    shader[i++] = CF_DWORD1(POP_COUNT(0),
1339                            CF_CONST(0),
1340                            COND(SQ_CF_COND_ACTIVE),
1341                            I_COUNT(1),
1342                            VALID_PIXEL_MODE(0),
1343                            CF_INST(SQ_CF_INST_TC),
1344                            BARRIER(1));
1345    /* 31 */
1346    shader[i++] = CF_DWORD0(ADDR(0),
1347			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1348    shader[i++] = CF_DWORD1(POP_COUNT(0),
1349			    CF_CONST(0),
1350			    COND(SQ_CF_COND_ACTIVE),
1351			    I_COUNT(0),
1352			    VALID_PIXEL_MODE(0),
1353			    CF_INST(SQ_CF_INST_RETURN),
1354			    BARRIER(1));
1355    /* 32/33 */
1356    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
1357                             INST_MOD(0),
1358                             FETCH_WHOLE_QUAD(0),
1359                             RESOURCE_ID(0),
1360                             SRC_GPR(0),
1361                             SRC_REL(ABSOLUTE),
1362                             ALT_CONST(0),
1363                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
1364                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
1365    shader[i++] = TEX_DWORD1(DST_GPR(1),
1366                             DST_REL(ABSOLUTE),
1367                             DST_SEL_X(SQ_SEL_X),
1368                             DST_SEL_Y(SQ_SEL_Y),
1369                             DST_SEL_Z(SQ_SEL_Z),
1370                             DST_SEL_W(SQ_SEL_1),
1371                             LOD_BIAS(0),
1372                             COORD_TYPE_X(TEX_NORMALIZED),
1373                             COORD_TYPE_Y(TEX_NORMALIZED),
1374                             COORD_TYPE_Z(TEX_NORMALIZED),
1375                             COORD_TYPE_W(TEX_NORMALIZED));
1376    shader[i++] = TEX_DWORD2(OFFSET_X(0),
1377                             OFFSET_Y(0),
1378                             OFFSET_Z(0),
1379                             SAMPLER_ID(0),
1380                             SRC_SEL_X(SQ_SEL_X),
1381                             SRC_SEL_Y(SQ_SEL_Y),
1382                             SRC_SEL_Z(SQ_SEL_0),
1383                             SRC_SEL_W(SQ_SEL_1));
1384    shader[i++] = TEX_DWORD_PAD;
1385
1386    return i;
1387}
1388
1389/* comp vs --------------------------------------- */
1390int cayman_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
1391{
1392    int i = 0;
1393
1394    /* 0 */
1395    shader[i++] = CF_DWORD0(ADDR(3),
1396			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1397    shader[i++] = CF_DWORD1(POP_COUNT(0),
1398                            CF_CONST(0),
1399                            COND(SQ_CF_COND_BOOL),
1400                            I_COUNT(0),
1401                            VALID_PIXEL_MODE(0),
1402                            CF_INST(SQ_CF_INST_CALL),
1403                            BARRIER(0));
1404    /* 1 */
1405    shader[i++] = CF_DWORD0(ADDR(9),
1406			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1407    shader[i++] = CF_DWORD1(POP_COUNT(0),
1408                            CF_CONST(0),
1409                            COND(SQ_CF_COND_NOT_BOOL),
1410                            I_COUNT(0),
1411                            VALID_PIXEL_MODE(0),
1412                            CF_INST(SQ_CF_INST_CALL),
1413                            BARRIER(0));
1414    /* 2 - end */
1415    shader[i++] = CF_DWORD0(ADDR(0),
1416			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1417    shader[i++] = CF_DWORD1(POP_COUNT(0),
1418			    CF_CONST(0),
1419			    COND(SQ_CF_COND_ACTIVE),
1420			    I_COUNT(0),
1421			    VALID_PIXEL_MODE(0),
1422			    CF_INST(SQ_CF_INST_END),
1423			    BARRIER(1));
1424    /* 3 - mask sub */
1425    shader[i++] = CF_DWORD0(ADDR(44),
1426			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1427    shader[i++] = CF_DWORD1(POP_COUNT(0),
1428			    CF_CONST(0),
1429			    COND(SQ_CF_COND_ACTIVE),
1430			    I_COUNT(3),
1431			    VALID_PIXEL_MODE(0),
1432			    CF_INST(SQ_CF_INST_TC),
1433			    BARRIER(1));
1434
1435    /* 4 - ALU */
1436    shader[i++] = CF_ALU_DWORD0(ADDR(14),
1437				KCACHE_BANK0(0),
1438				KCACHE_BANK1(0),
1439				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1440    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1441				KCACHE_ADDR0(0),
1442				KCACHE_ADDR1(0),
1443				I_COUNT(20),
1444				ALT_CONST(0),
1445				CF_INST(SQ_CF_INST_ALU),
1446				WHOLE_QUAD_MODE(0),
1447				BARRIER(1));
1448
1449    /* 5 - dst */
1450    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1451					  TYPE(SQ_EXPORT_POS),
1452					  RW_GPR(2),
1453					  RW_REL(ABSOLUTE),
1454					  INDEX_GPR(0),
1455					  ELEM_SIZE(0));
1456    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1457					       SRC_SEL_Y(SQ_SEL_Y),
1458					       SRC_SEL_Z(SQ_SEL_0),
1459					       SRC_SEL_W(SQ_SEL_1),
1460					       BURST_COUNT(1),
1461					       VALID_PIXEL_MODE(0),
1462					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1463					       MARK(0),
1464					       BARRIER(1));
1465    /* 6 - src */
1466    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1467					  TYPE(SQ_EXPORT_PARAM),
1468					  RW_GPR(1),
1469					  RW_REL(ABSOLUTE),
1470					  INDEX_GPR(0),
1471					  ELEM_SIZE(0));
1472    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1473					       SRC_SEL_Y(SQ_SEL_Y),
1474					       SRC_SEL_Z(SQ_SEL_0),
1475					       SRC_SEL_W(SQ_SEL_1),
1476					       BURST_COUNT(1),
1477					       VALID_PIXEL_MODE(0),
1478					       CF_INST(SQ_CF_INST_EXPORT),
1479					       MARK(0),
1480					       BARRIER(0));
1481    /* 7 - mask */
1482    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
1483					  TYPE(SQ_EXPORT_PARAM),
1484					  RW_GPR(0),
1485					  RW_REL(ABSOLUTE),
1486					  INDEX_GPR(0),
1487					  ELEM_SIZE(0));
1488    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1489					       SRC_SEL_Y(SQ_SEL_Y),
1490					       SRC_SEL_Z(SQ_SEL_0),
1491					       SRC_SEL_W(SQ_SEL_1),
1492					       BURST_COUNT(1),
1493					       VALID_PIXEL_MODE(0),
1494					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1495					       MARK(0),
1496					       BARRIER(0));
1497    /* 8 */
1498    shader[i++] = CF_DWORD0(ADDR(0),
1499			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1500    shader[i++] = CF_DWORD1(POP_COUNT(0),
1501			    CF_CONST(0),
1502			    COND(SQ_CF_COND_ACTIVE),
1503			    I_COUNT(0),
1504			    VALID_PIXEL_MODE(0),
1505			    CF_INST(SQ_CF_INST_RETURN),
1506			    BARRIER(1));
1507    /* 9 - non-mask sub */
1508    shader[i++] = CF_DWORD0(ADDR(50),
1509			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1510    shader[i++] = CF_DWORD1(POP_COUNT(0),
1511			    CF_CONST(0),
1512			    COND(SQ_CF_COND_ACTIVE),
1513			    I_COUNT(2),
1514			    VALID_PIXEL_MODE(0),
1515			    CF_INST(SQ_CF_INST_TC),
1516			    BARRIER(1));
1517
1518    /* 10 - ALU */
1519    shader[i++] = CF_ALU_DWORD0(ADDR(34),
1520				KCACHE_BANK0(0),
1521				KCACHE_BANK1(0),
1522				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
1523    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
1524				KCACHE_ADDR0(0),
1525				KCACHE_ADDR1(0),
1526				I_COUNT(10),
1527				ALT_CONST(0),
1528				CF_INST(SQ_CF_INST_ALU),
1529				WHOLE_QUAD_MODE(0),
1530				BARRIER(1));
1531
1532    /* 11 - dst */
1533    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
1534					  TYPE(SQ_EXPORT_POS),
1535					  RW_GPR(1),
1536					  RW_REL(ABSOLUTE),
1537					  INDEX_GPR(0),
1538					  ELEM_SIZE(0));
1539    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1540					       SRC_SEL_Y(SQ_SEL_Y),
1541					       SRC_SEL_Z(SQ_SEL_0),
1542					       SRC_SEL_W(SQ_SEL_1),
1543					       BURST_COUNT(0),
1544					       VALID_PIXEL_MODE(0),
1545					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1546					       MARK(0),
1547					       BARRIER(1));
1548    /* 12 - src */
1549    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
1550					  TYPE(SQ_EXPORT_PARAM),
1551					  RW_GPR(0),
1552					  RW_REL(ABSOLUTE),
1553					  INDEX_GPR(0),
1554					  ELEM_SIZE(0));
1555    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
1556					       SRC_SEL_Y(SQ_SEL_Y),
1557					       SRC_SEL_Z(SQ_SEL_0),
1558					       SRC_SEL_W(SQ_SEL_1),
1559					       BURST_COUNT(0),
1560					       VALID_PIXEL_MODE(0),
1561					       CF_INST(SQ_CF_INST_EXPORT_DONE),
1562					       MARK(0),
1563					       BARRIER(0));
1564    /* 13 */
1565    shader[i++] = CF_DWORD0(ADDR(0),
1566			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
1567    shader[i++] = CF_DWORD1(POP_COUNT(0),
1568			    CF_CONST(0),
1569			    COND(SQ_CF_COND_ACTIVE),
1570			    I_COUNT(0),
1571			    VALID_PIXEL_MODE(0),
1572			    CF_INST(SQ_CF_INST_RETURN),
1573			    BARRIER(1));
1574
1575    /* 14 srcX.x DOT4 - mask */
1576    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1577                             SRC0_REL(ABSOLUTE),
1578                             SRC0_ELEM(ELEM_X),
1579                             SRC0_NEG(0),
1580                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1581                             SRC1_REL(ABSOLUTE),
1582                             SRC1_ELEM(ELEM_X),
1583                             SRC1_NEG(0),
1584                             INDEX_MODE(SQ_INDEX_LOOP),
1585                             PRED_SEL(SQ_PRED_SEL_OFF),
1586                             LAST(0));
1587    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1588                                 SRC1_ABS(0),
1589                                 UPDATE_EXECUTE_MASK(0),
1590                                 UPDATE_PRED(0),
1591                                 WRITE_MASK(1),
1592                                 OMOD(SQ_ALU_OMOD_OFF),
1593                                 ALU_INST(SQ_OP2_INST_DOT4),
1594                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1595                                 DST_GPR(3),
1596                                 DST_REL(ABSOLUTE),
1597                                 DST_ELEM(ELEM_X),
1598                                 CLAMP(0));
1599
1600    /* 15 srcX.y DOT4 - mask */
1601    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1602                             SRC0_REL(ABSOLUTE),
1603                             SRC0_ELEM(ELEM_Y),
1604                             SRC0_NEG(0),
1605                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1606                             SRC1_REL(ABSOLUTE),
1607                             SRC1_ELEM(ELEM_Y),
1608                             SRC1_NEG(0),
1609                             INDEX_MODE(SQ_INDEX_LOOP),
1610                             PRED_SEL(SQ_PRED_SEL_OFF),
1611                             LAST(0));
1612    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1613                                 SRC1_ABS(0),
1614                                 UPDATE_EXECUTE_MASK(0),
1615                                 UPDATE_PRED(0),
1616                                 WRITE_MASK(0),
1617                                 OMOD(SQ_ALU_OMOD_OFF),
1618                                 ALU_INST(SQ_OP2_INST_DOT4),
1619                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1620                                 DST_GPR(3),
1621                                 DST_REL(ABSOLUTE),
1622                                 DST_ELEM(ELEM_Y),
1623                                 CLAMP(0));
1624
1625    /* 16 srcX.z DOT4 - mask */
1626    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1627                             SRC0_REL(ABSOLUTE),
1628                             SRC0_ELEM(ELEM_Z),
1629                             SRC0_NEG(0),
1630                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1631                             SRC1_REL(ABSOLUTE),
1632                             SRC1_ELEM(ELEM_Z),
1633                             SRC1_NEG(0),
1634                             INDEX_MODE(SQ_INDEX_LOOP),
1635                             PRED_SEL(SQ_PRED_SEL_OFF),
1636                             LAST(0));
1637    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1638                                 SRC1_ABS(0),
1639                                 UPDATE_EXECUTE_MASK(0),
1640                                 UPDATE_PRED(0),
1641                                 WRITE_MASK(0),
1642                                 OMOD(SQ_ALU_OMOD_OFF),
1643                                 ALU_INST(SQ_OP2_INST_DOT4),
1644                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1645                                 DST_GPR(3),
1646                                 DST_REL(ABSOLUTE),
1647                                 DST_ELEM(ELEM_Z),
1648                                 CLAMP(0));
1649
1650    /* 17 srcX.w DOT4 - mask */
1651    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1652                             SRC0_REL(ABSOLUTE),
1653                             SRC0_ELEM(ELEM_W),
1654                             SRC0_NEG(0),
1655                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1656                             SRC1_REL(ABSOLUTE),
1657                             SRC1_ELEM(ELEM_W),
1658                             SRC1_NEG(0),
1659                             INDEX_MODE(SQ_INDEX_LOOP),
1660                             PRED_SEL(SQ_PRED_SEL_OFF),
1661                             LAST(1));
1662    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1663                                 SRC1_ABS(0),
1664                                 UPDATE_EXECUTE_MASK(0),
1665                                 UPDATE_PRED(0),
1666                                 WRITE_MASK(0),
1667                                 OMOD(SQ_ALU_OMOD_OFF),
1668                                 ALU_INST(SQ_OP2_INST_DOT4),
1669                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1670                                 DST_GPR(3),
1671                                 DST_REL(ABSOLUTE),
1672                                 DST_ELEM(ELEM_W),
1673                                 CLAMP(0));
1674
1675    /* 18 srcY.x DOT4 - mask */
1676    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1677                             SRC0_REL(ABSOLUTE),
1678                             SRC0_ELEM(ELEM_X),
1679                             SRC0_NEG(0),
1680                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1681                             SRC1_REL(ABSOLUTE),
1682                             SRC1_ELEM(ELEM_X),
1683                             SRC1_NEG(0),
1684                             INDEX_MODE(SQ_INDEX_LOOP),
1685                             PRED_SEL(SQ_PRED_SEL_OFF),
1686                             LAST(0));
1687    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1688                                 SRC1_ABS(0),
1689                                 UPDATE_EXECUTE_MASK(0),
1690                                 UPDATE_PRED(0),
1691                                 WRITE_MASK(0),
1692                                 OMOD(SQ_ALU_OMOD_OFF),
1693                                 ALU_INST(SQ_OP2_INST_DOT4),
1694                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1695                                 DST_GPR(3),
1696                                 DST_REL(ABSOLUTE),
1697                                 DST_ELEM(ELEM_X),
1698                                 CLAMP(0));
1699
1700    /* 19 srcY.y DOT4 - mask */
1701    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1702                             SRC0_REL(ABSOLUTE),
1703                             SRC0_ELEM(ELEM_Y),
1704                             SRC0_NEG(0),
1705                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1706                             SRC1_REL(ABSOLUTE),
1707                             SRC1_ELEM(ELEM_Y),
1708                             SRC1_NEG(0),
1709                             INDEX_MODE(SQ_INDEX_LOOP),
1710                             PRED_SEL(SQ_PRED_SEL_OFF),
1711                             LAST(0));
1712    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1713                                 SRC1_ABS(0),
1714                                 UPDATE_EXECUTE_MASK(0),
1715                                 UPDATE_PRED(0),
1716                                 WRITE_MASK(1),
1717                                 OMOD(SQ_ALU_OMOD_OFF),
1718                                 ALU_INST(SQ_OP2_INST_DOT4),
1719                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1720                                 DST_GPR(3),
1721                                 DST_REL(ABSOLUTE),
1722                                 DST_ELEM(ELEM_Y),
1723                                 CLAMP(0));
1724
1725    /* 20 srcY.z DOT4 - mask */
1726    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1727                             SRC0_REL(ABSOLUTE),
1728                             SRC0_ELEM(ELEM_Z),
1729                             SRC0_NEG(0),
1730                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1731                             SRC1_REL(ABSOLUTE),
1732                             SRC1_ELEM(ELEM_Z),
1733                             SRC1_NEG(0),
1734                             INDEX_MODE(SQ_INDEX_LOOP),
1735                             PRED_SEL(SQ_PRED_SEL_OFF),
1736                             LAST(0));
1737    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1738                                 SRC1_ABS(0),
1739                                 UPDATE_EXECUTE_MASK(0),
1740                                 UPDATE_PRED(0),
1741                                 WRITE_MASK(0),
1742                                 OMOD(SQ_ALU_OMOD_OFF),
1743                                 ALU_INST(SQ_OP2_INST_DOT4),
1744                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1745                                 DST_GPR(3),
1746                                 DST_REL(ABSOLUTE),
1747                                 DST_ELEM(ELEM_Z),
1748                                 CLAMP(0));
1749
1750    /* 21 srcY.w DOT4 - mask */
1751    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
1752                             SRC0_REL(ABSOLUTE),
1753                             SRC0_ELEM(ELEM_W),
1754                             SRC0_NEG(0),
1755                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
1756                             SRC1_REL(ABSOLUTE),
1757                             SRC1_ELEM(ELEM_W),
1758                             SRC1_NEG(0),
1759                             INDEX_MODE(SQ_INDEX_LOOP),
1760                             PRED_SEL(SQ_PRED_SEL_OFF),
1761                             LAST(1));
1762    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1763                                 SRC1_ABS(0),
1764                                 UPDATE_EXECUTE_MASK(0),
1765                                 UPDATE_PRED(0),
1766                                 WRITE_MASK(0),
1767                                 OMOD(SQ_ALU_OMOD_OFF),
1768                                 ALU_INST(SQ_OP2_INST_DOT4),
1769                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1770                                 DST_GPR(3),
1771                                 DST_REL(ABSOLUTE),
1772                                 DST_ELEM(ELEM_W),
1773                                 CLAMP(0));
1774
1775    /* 22 maskX.x DOT4 - mask */
1776    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1777                             SRC0_REL(ABSOLUTE),
1778                             SRC0_ELEM(ELEM_X),
1779                             SRC0_NEG(0),
1780                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1781                             SRC1_REL(ABSOLUTE),
1782                             SRC1_ELEM(ELEM_X),
1783                             SRC1_NEG(0),
1784                             INDEX_MODE(SQ_INDEX_LOOP),
1785                             PRED_SEL(SQ_PRED_SEL_OFF),
1786                             LAST(0));
1787    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1788                                 SRC1_ABS(0),
1789                                 UPDATE_EXECUTE_MASK(0),
1790                                 UPDATE_PRED(0),
1791                                 WRITE_MASK(1),
1792                                 OMOD(SQ_ALU_OMOD_OFF),
1793                                 ALU_INST(SQ_OP2_INST_DOT4),
1794                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1795                                 DST_GPR(4),
1796                                 DST_REL(ABSOLUTE),
1797                                 DST_ELEM(ELEM_X),
1798                                 CLAMP(0));
1799
1800    /* 23 maskX.y DOT4 - mask */
1801    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1802                             SRC0_REL(ABSOLUTE),
1803                             SRC0_ELEM(ELEM_Y),
1804                             SRC0_NEG(0),
1805                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1806                             SRC1_REL(ABSOLUTE),
1807                             SRC1_ELEM(ELEM_Y),
1808                             SRC1_NEG(0),
1809                             INDEX_MODE(SQ_INDEX_LOOP),
1810                             PRED_SEL(SQ_PRED_SEL_OFF),
1811                             LAST(0));
1812    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1813                                 SRC1_ABS(0),
1814                                 UPDATE_EXECUTE_MASK(0),
1815                                 UPDATE_PRED(0),
1816                                 WRITE_MASK(0),
1817                                 OMOD(SQ_ALU_OMOD_OFF),
1818                                 ALU_INST(SQ_OP2_INST_DOT4),
1819                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1820                                 DST_GPR(4),
1821                                 DST_REL(ABSOLUTE),
1822                                 DST_ELEM(ELEM_Y),
1823                                 CLAMP(0));
1824
1825    /* 24 maskX.z DOT4 - mask */
1826    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1827                             SRC0_REL(ABSOLUTE),
1828                             SRC0_ELEM(ELEM_Z),
1829                             SRC0_NEG(0),
1830                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1831                             SRC1_REL(ABSOLUTE),
1832                             SRC1_ELEM(ELEM_Z),
1833                             SRC1_NEG(0),
1834                             INDEX_MODE(SQ_INDEX_LOOP),
1835                             PRED_SEL(SQ_PRED_SEL_OFF),
1836                             LAST(0));
1837    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1838                                 SRC1_ABS(0),
1839                                 UPDATE_EXECUTE_MASK(0),
1840                                 UPDATE_PRED(0),
1841                                 WRITE_MASK(0),
1842                                 OMOD(SQ_ALU_OMOD_OFF),
1843                                 ALU_INST(SQ_OP2_INST_DOT4),
1844                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1845                                 DST_GPR(4),
1846                                 DST_REL(ABSOLUTE),
1847                                 DST_ELEM(ELEM_Z),
1848                                 CLAMP(0));
1849
1850    /* 25 maskX.w DOT4 - mask */
1851    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1852                             SRC0_REL(ABSOLUTE),
1853                             SRC0_ELEM(ELEM_W),
1854                             SRC0_NEG(0),
1855                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
1856                             SRC1_REL(ABSOLUTE),
1857                             SRC1_ELEM(ELEM_W),
1858                             SRC1_NEG(0),
1859                             INDEX_MODE(SQ_INDEX_LOOP),
1860                             PRED_SEL(SQ_PRED_SEL_OFF),
1861                             LAST(1));
1862    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1863                                 SRC1_ABS(0),
1864                                 UPDATE_EXECUTE_MASK(0),
1865                                 UPDATE_PRED(0),
1866                                 WRITE_MASK(0),
1867                                 OMOD(SQ_ALU_OMOD_OFF),
1868                                 ALU_INST(SQ_OP2_INST_DOT4),
1869                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1870                                 DST_GPR(4),
1871                                 DST_REL(ABSOLUTE),
1872                                 DST_ELEM(ELEM_W),
1873                                 CLAMP(0));
1874
1875    /* 26 maskY.x DOT4 - mask */
1876    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1877                             SRC0_REL(ABSOLUTE),
1878                             SRC0_ELEM(ELEM_X),
1879                             SRC0_NEG(0),
1880                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1881                             SRC1_REL(ABSOLUTE),
1882                             SRC1_ELEM(ELEM_X),
1883                             SRC1_NEG(0),
1884                             INDEX_MODE(SQ_INDEX_LOOP),
1885                             PRED_SEL(SQ_PRED_SEL_OFF),
1886                             LAST(0));
1887    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1888                                 SRC1_ABS(0),
1889                                 UPDATE_EXECUTE_MASK(0),
1890                                 UPDATE_PRED(0),
1891                                 WRITE_MASK(0),
1892                                 OMOD(SQ_ALU_OMOD_OFF),
1893                                 ALU_INST(SQ_OP2_INST_DOT4),
1894                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1895                                 DST_GPR(4),
1896                                 DST_REL(ABSOLUTE),
1897                                 DST_ELEM(ELEM_X),
1898                                 CLAMP(0));
1899
1900    /* 27 maskY.y DOT4 - mask */
1901    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1902                             SRC0_REL(ABSOLUTE),
1903                             SRC0_ELEM(ELEM_Y),
1904                             SRC0_NEG(0),
1905                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1906                             SRC1_REL(ABSOLUTE),
1907                             SRC1_ELEM(ELEM_Y),
1908                             SRC1_NEG(0),
1909                             INDEX_MODE(SQ_INDEX_LOOP),
1910                             PRED_SEL(SQ_PRED_SEL_OFF),
1911                             LAST(0));
1912    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1913                                 SRC1_ABS(0),
1914                                 UPDATE_EXECUTE_MASK(0),
1915                                 UPDATE_PRED(0),
1916                                 WRITE_MASK(1),
1917                                 OMOD(SQ_ALU_OMOD_OFF),
1918                                 ALU_INST(SQ_OP2_INST_DOT4),
1919                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1920                                 DST_GPR(4),
1921                                 DST_REL(ABSOLUTE),
1922                                 DST_ELEM(ELEM_Y),
1923                                 CLAMP(0));
1924
1925    /* 28 maskY.z DOT4 - mask */
1926    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1927                             SRC0_REL(ABSOLUTE),
1928                             SRC0_ELEM(ELEM_Z),
1929                             SRC0_NEG(0),
1930                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1931                             SRC1_REL(ABSOLUTE),
1932                             SRC1_ELEM(ELEM_Z),
1933                             SRC1_NEG(0),
1934                             INDEX_MODE(SQ_INDEX_LOOP),
1935                             PRED_SEL(SQ_PRED_SEL_OFF),
1936                             LAST(0));
1937    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1938                                 SRC1_ABS(0),
1939                                 UPDATE_EXECUTE_MASK(0),
1940                                 UPDATE_PRED(0),
1941                                 WRITE_MASK(0),
1942                                 OMOD(SQ_ALU_OMOD_OFF),
1943                                 ALU_INST(SQ_OP2_INST_DOT4),
1944                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1945                                 DST_GPR(4),
1946                                 DST_REL(ABSOLUTE),
1947                                 DST_ELEM(ELEM_Z),
1948                                 CLAMP(0));
1949
1950    /* 29 maskY.w DOT4 - mask */
1951    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
1952                             SRC0_REL(ABSOLUTE),
1953                             SRC0_ELEM(ELEM_W),
1954                             SRC0_NEG(0),
1955                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
1956                             SRC1_REL(ABSOLUTE),
1957                             SRC1_ELEM(ELEM_W),
1958                             SRC1_NEG(0),
1959                             INDEX_MODE(SQ_INDEX_LOOP),
1960                             PRED_SEL(SQ_PRED_SEL_OFF),
1961                             LAST(1));
1962    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1963                                 SRC1_ABS(0),
1964                                 UPDATE_EXECUTE_MASK(0),
1965                                 UPDATE_PRED(0),
1966                                 WRITE_MASK(0),
1967                                 OMOD(SQ_ALU_OMOD_OFF),
1968                                 ALU_INST(SQ_OP2_INST_DOT4),
1969                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1970                                 DST_GPR(4),
1971                                 DST_REL(ABSOLUTE),
1972                                 DST_ELEM(ELEM_W),
1973                                 CLAMP(0));
1974
1975    /* 30 srcX / w */
1976    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
1977                             SRC0_REL(ABSOLUTE),
1978                             SRC0_ELEM(ELEM_X),
1979                             SRC0_NEG(0),
1980                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
1981                             SRC1_REL(ABSOLUTE),
1982                             SRC1_ELEM(ELEM_W),
1983                             SRC1_NEG(0),
1984                             INDEX_MODE(SQ_INDEX_AR_X),
1985                             PRED_SEL(SQ_PRED_SEL_OFF),
1986                             LAST(1));
1987    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
1988                                 SRC1_ABS(0),
1989                                 UPDATE_EXECUTE_MASK(0),
1990                                 UPDATE_PRED(0),
1991                                 WRITE_MASK(1),
1992                                 OMOD(SQ_ALU_OMOD_OFF),
1993                                 ALU_INST(SQ_OP2_INST_MUL),
1994                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
1995                                 DST_GPR(1),
1996                                 DST_REL(ABSOLUTE),
1997                                 DST_ELEM(ELEM_X),
1998                                 CLAMP(0));
1999
2000    /* 31 srcY / h */
2001    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
2002                             SRC0_REL(ABSOLUTE),
2003                             SRC0_ELEM(ELEM_Y),
2004                             SRC0_NEG(0),
2005                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2006                             SRC1_REL(ABSOLUTE),
2007                             SRC1_ELEM(ELEM_W),
2008                             SRC1_NEG(0),
2009                             INDEX_MODE(SQ_INDEX_AR_X),
2010                             PRED_SEL(SQ_PRED_SEL_OFF),
2011                             LAST(1));
2012    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2013                                 SRC1_ABS(0),
2014                                 UPDATE_EXECUTE_MASK(0),
2015                                 UPDATE_PRED(0),
2016                                 WRITE_MASK(1),
2017                                 OMOD(SQ_ALU_OMOD_OFF),
2018                                 ALU_INST(SQ_OP2_INST_MUL),
2019                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2020                                 DST_GPR(1),
2021                                 DST_REL(ABSOLUTE),
2022                                 DST_ELEM(ELEM_Y),
2023                                 CLAMP(0));
2024
2025    /* 32 maskX / w */
2026    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2027                             SRC0_REL(ABSOLUTE),
2028                             SRC0_ELEM(ELEM_X),
2029                             SRC0_NEG(0),
2030                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
2031                             SRC1_REL(ABSOLUTE),
2032                             SRC1_ELEM(ELEM_W),
2033                             SRC1_NEG(0),
2034                             INDEX_MODE(SQ_INDEX_AR_X),
2035                             PRED_SEL(SQ_PRED_SEL_OFF),
2036                             LAST(1));
2037    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2038                                 SRC1_ABS(0),
2039                                 UPDATE_EXECUTE_MASK(0),
2040                                 UPDATE_PRED(0),
2041                                 WRITE_MASK(1),
2042                                 OMOD(SQ_ALU_OMOD_OFF),
2043                                 ALU_INST(SQ_OP2_INST_MUL),
2044                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2045                                 DST_GPR(0),
2046                                 DST_REL(ABSOLUTE),
2047                                 DST_ELEM(ELEM_X),
2048                                 CLAMP(0));
2049
2050    /* 33 maskY / h */
2051    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
2052                             SRC0_REL(ABSOLUTE),
2053                             SRC0_ELEM(ELEM_Y),
2054                             SRC0_NEG(0),
2055                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
2056                             SRC1_REL(ABSOLUTE),
2057                             SRC1_ELEM(ELEM_W),
2058                             SRC1_NEG(0),
2059                             INDEX_MODE(SQ_INDEX_AR_X),
2060                             PRED_SEL(SQ_PRED_SEL_OFF),
2061                             LAST(1));
2062    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2063                                 SRC1_ABS(0),
2064                                 UPDATE_EXECUTE_MASK(0),
2065                                 UPDATE_PRED(0),
2066                                 WRITE_MASK(1),
2067                                 OMOD(SQ_ALU_OMOD_OFF),
2068                                 ALU_INST(SQ_OP2_INST_MUL),
2069                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2070                                 DST_GPR(0),
2071                                 DST_REL(ABSOLUTE),
2072                                 DST_ELEM(ELEM_Y),
2073                                 CLAMP(0));
2074
2075    /* 34 srcX.x DOT4 - non-mask */
2076    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2077                             SRC0_REL(ABSOLUTE),
2078                             SRC0_ELEM(ELEM_X),
2079                             SRC0_NEG(0),
2080                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2081                             SRC1_REL(ABSOLUTE),
2082                             SRC1_ELEM(ELEM_X),
2083                             SRC1_NEG(0),
2084                             INDEX_MODE(SQ_INDEX_LOOP),
2085                             PRED_SEL(SQ_PRED_SEL_OFF),
2086                             LAST(0));
2087    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2088                                 SRC1_ABS(0),
2089                                 UPDATE_EXECUTE_MASK(0),
2090                                 UPDATE_PRED(0),
2091                                 WRITE_MASK(1),
2092                                 OMOD(SQ_ALU_OMOD_OFF),
2093                                 ALU_INST(SQ_OP2_INST_DOT4),
2094                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2095                                 DST_GPR(2),
2096                                 DST_REL(ABSOLUTE),
2097                                 DST_ELEM(ELEM_X),
2098                                 CLAMP(0));
2099
2100    /* 35 srcX.y DOT4 - non-mask */
2101    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2102                             SRC0_REL(ABSOLUTE),
2103                             SRC0_ELEM(ELEM_Y),
2104                             SRC0_NEG(0),
2105                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2106                             SRC1_REL(ABSOLUTE),
2107                             SRC1_ELEM(ELEM_Y),
2108                             SRC1_NEG(0),
2109                             INDEX_MODE(SQ_INDEX_LOOP),
2110                             PRED_SEL(SQ_PRED_SEL_OFF),
2111                             LAST(0));
2112    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2113                                 SRC1_ABS(0),
2114                                 UPDATE_EXECUTE_MASK(0),
2115                                 UPDATE_PRED(0),
2116                                 WRITE_MASK(0),
2117                                 OMOD(SQ_ALU_OMOD_OFF),
2118                                 ALU_INST(SQ_OP2_INST_DOT4),
2119                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2120                                 DST_GPR(2),
2121                                 DST_REL(ABSOLUTE),
2122                                 DST_ELEM(ELEM_Y),
2123                                 CLAMP(0));
2124
2125    /* 36 srcX.z DOT4 - non-mask */
2126    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2127                             SRC0_REL(ABSOLUTE),
2128                             SRC0_ELEM(ELEM_Z),
2129                             SRC0_NEG(0),
2130                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2131                             SRC1_REL(ABSOLUTE),
2132                             SRC1_ELEM(ELEM_Z),
2133                             SRC1_NEG(0),
2134                             INDEX_MODE(SQ_INDEX_LOOP),
2135                             PRED_SEL(SQ_PRED_SEL_OFF),
2136                             LAST(0));
2137    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2138                                 SRC1_ABS(0),
2139                                 UPDATE_EXECUTE_MASK(0),
2140                                 UPDATE_PRED(0),
2141                                 WRITE_MASK(0),
2142                                 OMOD(SQ_ALU_OMOD_OFF),
2143                                 ALU_INST(SQ_OP2_INST_DOT4),
2144                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2145                                 DST_GPR(2),
2146                                 DST_REL(ABSOLUTE),
2147                                 DST_ELEM(ELEM_Z),
2148                                 CLAMP(0));
2149
2150    /* 37 srcX.w DOT4 - non-mask */
2151    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2152                             SRC0_REL(ABSOLUTE),
2153                             SRC0_ELEM(ELEM_W),
2154                             SRC0_NEG(0),
2155                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2156                             SRC1_REL(ABSOLUTE),
2157                             SRC1_ELEM(ELEM_W),
2158                             SRC1_NEG(0),
2159                             INDEX_MODE(SQ_INDEX_LOOP),
2160                             PRED_SEL(SQ_PRED_SEL_OFF),
2161                             LAST(1));
2162    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2163                                 SRC1_ABS(0),
2164                                 UPDATE_EXECUTE_MASK(0),
2165                                 UPDATE_PRED(0),
2166                                 WRITE_MASK(0),
2167                                 OMOD(SQ_ALU_OMOD_OFF),
2168                                 ALU_INST(SQ_OP2_INST_DOT4),
2169                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2170                                 DST_GPR(2),
2171                                 DST_REL(ABSOLUTE),
2172                                 DST_ELEM(ELEM_W),
2173                                 CLAMP(0));
2174
2175    /* 38 srcY.x DOT4 - non-mask */
2176    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2177                             SRC0_REL(ABSOLUTE),
2178                             SRC0_ELEM(ELEM_X),
2179                             SRC0_NEG(0),
2180                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2181                             SRC1_REL(ABSOLUTE),
2182                             SRC1_ELEM(ELEM_X),
2183                             SRC1_NEG(0),
2184                             INDEX_MODE(SQ_INDEX_LOOP),
2185                             PRED_SEL(SQ_PRED_SEL_OFF),
2186                             LAST(0));
2187    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2188                                 SRC1_ABS(0),
2189                                 UPDATE_EXECUTE_MASK(0),
2190                                 UPDATE_PRED(0),
2191                                 WRITE_MASK(0),
2192                                 OMOD(SQ_ALU_OMOD_OFF),
2193                                 ALU_INST(SQ_OP2_INST_DOT4),
2194                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2195                                 DST_GPR(2),
2196                                 DST_REL(ABSOLUTE),
2197                                 DST_ELEM(ELEM_X),
2198                                 CLAMP(0));
2199
2200    /* 39 srcY.y DOT4 - non-mask */
2201    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2202                             SRC0_REL(ABSOLUTE),
2203                             SRC0_ELEM(ELEM_Y),
2204                             SRC0_NEG(0),
2205                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2206                             SRC1_REL(ABSOLUTE),
2207                             SRC1_ELEM(ELEM_Y),
2208                             SRC1_NEG(0),
2209                             INDEX_MODE(SQ_INDEX_LOOP),
2210                             PRED_SEL(SQ_PRED_SEL_OFF),
2211                             LAST(0));
2212    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2213                                 SRC1_ABS(0),
2214                                 UPDATE_EXECUTE_MASK(0),
2215                                 UPDATE_PRED(0),
2216                                 WRITE_MASK(1),
2217                                 OMOD(SQ_ALU_OMOD_OFF),
2218                                 ALU_INST(SQ_OP2_INST_DOT4),
2219                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2220                                 DST_GPR(2),
2221                                 DST_REL(ABSOLUTE),
2222                                 DST_ELEM(ELEM_Y),
2223                                 CLAMP(0));
2224
2225    /* 40 srcY.z DOT4 - non-mask */
2226    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2227                             SRC0_REL(ABSOLUTE),
2228                             SRC0_ELEM(ELEM_Z),
2229                             SRC0_NEG(0),
2230                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2231                             SRC1_REL(ABSOLUTE),
2232                             SRC1_ELEM(ELEM_Z),
2233                             SRC1_NEG(0),
2234                             INDEX_MODE(SQ_INDEX_LOOP),
2235                             PRED_SEL(SQ_PRED_SEL_OFF),
2236                             LAST(0));
2237    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2238                                 SRC1_ABS(0),
2239                                 UPDATE_EXECUTE_MASK(0),
2240                                 UPDATE_PRED(0),
2241                                 WRITE_MASK(0),
2242                                 OMOD(SQ_ALU_OMOD_OFF),
2243                                 ALU_INST(SQ_OP2_INST_DOT4),
2244                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2245                                 DST_GPR(2),
2246                                 DST_REL(ABSOLUTE),
2247                                 DST_ELEM(ELEM_Z),
2248                                 CLAMP(0));
2249
2250    /* 41 srcY.w DOT4 - non-mask */
2251    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2252                             SRC0_REL(ABSOLUTE),
2253                             SRC0_ELEM(ELEM_W),
2254                             SRC0_NEG(0),
2255                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2256                             SRC1_REL(ABSOLUTE),
2257                             SRC1_ELEM(ELEM_W),
2258                             SRC1_NEG(0),
2259                             INDEX_MODE(SQ_INDEX_LOOP),
2260                             PRED_SEL(SQ_PRED_SEL_OFF),
2261                             LAST(1));
2262    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2263                                 SRC1_ABS(0),
2264                                 UPDATE_EXECUTE_MASK(0),
2265                                 UPDATE_PRED(0),
2266                                 WRITE_MASK(0),
2267                                 OMOD(SQ_ALU_OMOD_OFF),
2268                                 ALU_INST(SQ_OP2_INST_DOT4),
2269                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2270                                 DST_GPR(2),
2271                                 DST_REL(ABSOLUTE),
2272                                 DST_ELEM(ELEM_W),
2273                                 CLAMP(0));
2274
2275    /* 42 srcX / w */
2276    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2277                             SRC0_REL(ABSOLUTE),
2278                             SRC0_ELEM(ELEM_X),
2279                             SRC0_NEG(0),
2280                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
2281                             SRC1_REL(ABSOLUTE),
2282                             SRC1_ELEM(ELEM_W),
2283                             SRC1_NEG(0),
2284                             INDEX_MODE(SQ_INDEX_AR_X),
2285                             PRED_SEL(SQ_PRED_SEL_OFF),
2286                             LAST(1));
2287    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2288                                 SRC1_ABS(0),
2289                                 UPDATE_EXECUTE_MASK(0),
2290                                 UPDATE_PRED(0),
2291                                 WRITE_MASK(1),
2292                                 OMOD(SQ_ALU_OMOD_OFF),
2293                                 ALU_INST(SQ_OP2_INST_MUL),
2294                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2295                                 DST_GPR(0),
2296                                 DST_REL(ABSOLUTE),
2297                                 DST_ELEM(ELEM_X),
2298                                 CLAMP(0));
2299
2300    /* 43 srcY / h */
2301    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
2302                             SRC0_REL(ABSOLUTE),
2303                             SRC0_ELEM(ELEM_Y),
2304                             SRC0_NEG(0),
2305                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
2306                             SRC1_REL(ABSOLUTE),
2307                             SRC1_ELEM(ELEM_W),
2308                             SRC1_NEG(0),
2309                             INDEX_MODE(SQ_INDEX_AR_X),
2310                             PRED_SEL(SQ_PRED_SEL_OFF),
2311                             LAST(1));
2312    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2313                                 SRC1_ABS(0),
2314                                 UPDATE_EXECUTE_MASK(0),
2315                                 UPDATE_PRED(0),
2316                                 WRITE_MASK(1),
2317                                 OMOD(SQ_ALU_OMOD_OFF),
2318                                 ALU_INST(SQ_OP2_INST_MUL),
2319                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
2320                                 DST_GPR(0),
2321                                 DST_REL(ABSOLUTE),
2322                                 DST_ELEM(ELEM_Y),
2323                                 CLAMP(0));
2324    /* mask vfetch - 44/45 - dst */
2325    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2326			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2327			     FETCH_WHOLE_QUAD(0),
2328			     BUFFER_ID(0),
2329			     SRC_GPR(0),
2330			     SRC_REL(ABSOLUTE),
2331			     SRC_SEL_X(SQ_SEL_X),
2332			     SRC_SEL_Y(SQ_SEL_Y),
2333			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
2334			     LDS_REQ(0),
2335			     COALESCED_READ(0));
2336    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
2337				 DST_REL(0),
2338				 DST_SEL_X(SQ_SEL_X),
2339				 DST_SEL_Y(SQ_SEL_Y),
2340				 DST_SEL_Z(SQ_SEL_0),
2341				 DST_SEL_W(SQ_SEL_1),
2342				 USE_CONST_FIELDS(0),
2343				 DATA_FORMAT(FMT_32_32_FLOAT),
2344				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2345				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2346				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2347    shader[i++] = VTX_DWORD2(OFFSET(0),
2348#if X_BYTE_ORDER == X_BIG_ENDIAN
2349                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2350#else
2351			     ENDIAN_SWAP(ENDIAN_NONE),
2352#endif
2353			     CONST_BUF_NO_STRIDE(0),
2354			     ALT_CONST(0),
2355			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2356    shader[i++] = VTX_DWORD_PAD;
2357    /* 46/47 - src */
2358    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2359			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2360			     FETCH_WHOLE_QUAD(0),
2361			     BUFFER_ID(0),
2362			     SRC_GPR(0),
2363			     SRC_REL(ABSOLUTE),
2364			     SRC_SEL_X(SQ_SEL_X),
2365			     SRC_SEL_Y(SQ_SEL_Y),
2366			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
2367			     LDS_REQ(0),
2368			     COALESCED_READ(0));
2369    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2370				 DST_REL(0),
2371				 DST_SEL_X(SQ_SEL_X),
2372				 DST_SEL_Y(SQ_SEL_Y),
2373				 DST_SEL_Z(SQ_SEL_1),
2374				 DST_SEL_W(SQ_SEL_0),
2375				 USE_CONST_FIELDS(0),
2376				 DATA_FORMAT(FMT_32_32_FLOAT),
2377				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2378				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2379				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2380    shader[i++] = VTX_DWORD2(OFFSET(8),
2381#if X_BYTE_ORDER == X_BIG_ENDIAN
2382                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2383#else
2384			     ENDIAN_SWAP(ENDIAN_NONE),
2385#endif
2386			     CONST_BUF_NO_STRIDE(0),
2387			     ALT_CONST(0),
2388			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2389    shader[i++] = VTX_DWORD_PAD;
2390    /* 48/49 - mask */
2391    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2392			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2393			     FETCH_WHOLE_QUAD(0),
2394			     BUFFER_ID(0),
2395			     SRC_GPR(0),
2396			     SRC_REL(ABSOLUTE),
2397			     SRC_SEL_X(SQ_SEL_X),
2398			     SRC_SEL_Y(SQ_SEL_Y),
2399			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
2400			     LDS_REQ(0),
2401			     COALESCED_READ(0));
2402    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2403				 DST_REL(0),
2404				 DST_SEL_X(SQ_SEL_X),
2405				 DST_SEL_Y(SQ_SEL_Y),
2406				 DST_SEL_Z(SQ_SEL_1),
2407				 DST_SEL_W(SQ_SEL_0),
2408				 USE_CONST_FIELDS(0),
2409				 DATA_FORMAT(FMT_32_32_FLOAT),
2410				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2411				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2412				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2413    shader[i++] = VTX_DWORD2(OFFSET(16),
2414#if X_BYTE_ORDER == X_BIG_ENDIAN
2415                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2416#else
2417			     ENDIAN_SWAP(ENDIAN_NONE),
2418#endif
2419			     CONST_BUF_NO_STRIDE(0),
2420			     ALT_CONST(0),
2421			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2422    shader[i++] = VTX_DWORD_PAD;
2423
2424    /* no mask vfetch - 50/51 - dst */
2425    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2426			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2427			     FETCH_WHOLE_QUAD(0),
2428			     BUFFER_ID(0),
2429			     SRC_GPR(0),
2430			     SRC_REL(ABSOLUTE),
2431			     SRC_SEL_X(SQ_SEL_X),
2432			     SRC_SEL_Y(SQ_SEL_Y),
2433			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
2434			     LDS_REQ(0),
2435			     COALESCED_READ(0));
2436    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
2437				 DST_REL(0),
2438				 DST_SEL_X(SQ_SEL_X),
2439				 DST_SEL_Y(SQ_SEL_Y),
2440				 DST_SEL_Z(SQ_SEL_0),
2441				 DST_SEL_W(SQ_SEL_1),
2442				 USE_CONST_FIELDS(0),
2443				 DATA_FORMAT(FMT_32_32_FLOAT),
2444				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2445				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2446				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2447    shader[i++] = VTX_DWORD2(OFFSET(0),
2448#if X_BYTE_ORDER == X_BIG_ENDIAN
2449                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2450#else
2451			     ENDIAN_SWAP(ENDIAN_NONE),
2452#endif
2453			     CONST_BUF_NO_STRIDE(0),
2454			     ALT_CONST(0),
2455			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2456    shader[i++] = VTX_DWORD_PAD;
2457    /* 52/53 - src */
2458    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
2459			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
2460			     FETCH_WHOLE_QUAD(0),
2461			     BUFFER_ID(0),
2462			     SRC_GPR(0),
2463			     SRC_REL(ABSOLUTE),
2464			     SRC_SEL_X(SQ_SEL_X),
2465			     SRC_SEL_Y(SQ_SEL_Y),
2466			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
2467			     LDS_REQ(0),
2468			     COALESCED_READ(0));
2469    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
2470				 DST_REL(0),
2471				 DST_SEL_X(SQ_SEL_X),
2472				 DST_SEL_Y(SQ_SEL_Y),
2473				 DST_SEL_Z(SQ_SEL_1),
2474				 DST_SEL_W(SQ_SEL_0),
2475				 USE_CONST_FIELDS(0),
2476				 DATA_FORMAT(FMT_32_32_FLOAT),
2477				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
2478				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
2479				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
2480    shader[i++] = VTX_DWORD2(OFFSET(8),
2481#if X_BYTE_ORDER == X_BIG_ENDIAN
2482                             ENDIAN_SWAP(SQ_ENDIAN_8IN32),
2483#else
2484			     ENDIAN_SWAP(ENDIAN_NONE),
2485#endif
2486			     CONST_BUF_NO_STRIDE(0),
2487                             ALT_CONST(0),
2488                             BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
2489    shader[i++] = VTX_DWORD_PAD;
2490
2491    return i;
2492}
2493
2494/* comp ps --------------------------------------- */
2495int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
2496{
2497    int i = 0;
2498
2499    /* 0 */
2500    shader[i++] = CF_DWORD0(ADDR(3),
2501			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2502    shader[i++] = CF_DWORD1(POP_COUNT(0),
2503                            CF_CONST(0),
2504                            COND(SQ_CF_COND_BOOL),
2505                            I_COUNT(0),
2506                            VALID_PIXEL_MODE(0),
2507                            CF_INST(SQ_CF_INST_CALL),
2508                            BARRIER(0));
2509    /* 1 */
2510    shader[i++] = CF_DWORD0(ADDR(8),
2511			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2512    shader[i++] = CF_DWORD1(POP_COUNT(0),
2513                            CF_CONST(0),
2514                            COND(SQ_CF_COND_NOT_BOOL),
2515                            I_COUNT(0),
2516                            VALID_PIXEL_MODE(0),
2517                            CF_INST(SQ_CF_INST_CALL),
2518                            BARRIER(0));
2519    /* 2 - end */
2520    shader[i++] = CF_DWORD0(ADDR(0),
2521			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2522    shader[i++] = CF_DWORD1(POP_COUNT(0),
2523			    CF_CONST(0),
2524			    COND(SQ_CF_COND_ACTIVE),
2525			    I_COUNT(0),
2526			    VALID_PIXEL_MODE(0),
2527			    CF_INST(SQ_CF_INST_END),
2528			    BARRIER(1));
2529    /* 3 - mask sub */
2530    shader[i++] = CF_ALU_DWORD0(ADDR(12),
2531				KCACHE_BANK0(0),
2532				KCACHE_BANK1(0),
2533				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2534    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2535				KCACHE_ADDR0(0),
2536				KCACHE_ADDR1(0),
2537				I_COUNT(8),
2538				ALT_CONST(0),
2539				CF_INST(SQ_CF_INST_ALU),
2540				WHOLE_QUAD_MODE(0),
2541				BARRIER(1));
2542
2543    /* 4 */
2544    shader[i++] = CF_DWORD0(ADDR(28),
2545			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2546    shader[i++] = CF_DWORD1(POP_COUNT(0),
2547			    CF_CONST(0),
2548			    COND(SQ_CF_COND_ACTIVE),
2549			    I_COUNT(2),
2550			    VALID_PIXEL_MODE(0),
2551			    CF_INST(SQ_CF_INST_TC),
2552			    BARRIER(1));
2553
2554    /* 5 */
2555    shader[i++] = CF_ALU_DWORD0(ADDR(20),
2556				KCACHE_BANK0(0),
2557				KCACHE_BANK1(0),
2558				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
2559    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2560				KCACHE_ADDR0(0),
2561				KCACHE_ADDR1(0),
2562				I_COUNT(4),
2563				ALT_CONST(0),
2564				CF_INST(SQ_CF_INST_ALU),
2565				WHOLE_QUAD_MODE(0),
2566				BARRIER(1));
2567
2568    /* 6 */
2569    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2570					  TYPE(SQ_EXPORT_PIXEL),
2571					  RW_GPR(2),
2572					  RW_REL(ABSOLUTE),
2573					  INDEX_GPR(0),
2574					  ELEM_SIZE(1));
2575
2576    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2577					       SRC_SEL_Y(SQ_SEL_Y),
2578					       SRC_SEL_Z(SQ_SEL_Z),
2579					       SRC_SEL_W(SQ_SEL_W),
2580					       BURST_COUNT(1),
2581					       VALID_PIXEL_MODE(0),
2582					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2583					       MARK(0),
2584					       BARRIER(1));
2585    /* 7 */
2586    shader[i++] = CF_DWORD0(ADDR(0),
2587			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2588    shader[i++] = CF_DWORD1(POP_COUNT(0),
2589			    CF_CONST(0),
2590			    COND(SQ_CF_COND_ACTIVE),
2591			    I_COUNT(0),
2592			    VALID_PIXEL_MODE(0),
2593			    CF_INST(SQ_CF_INST_RETURN),
2594			    BARRIER(1));
2595
2596    /* 8 - non-mask sub */
2597    shader[i++] = CF_ALU_DWORD0(ADDR(24),
2598				KCACHE_BANK0(0),
2599				KCACHE_BANK1(0),
2600				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
2601    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
2602				KCACHE_ADDR0(0),
2603				KCACHE_ADDR1(0),
2604				I_COUNT(4),
2605				ALT_CONST(0),
2606				CF_INST(SQ_CF_INST_ALU),
2607				WHOLE_QUAD_MODE(0),
2608				BARRIER(1));
2609    /* 9 */
2610    shader[i++] = CF_DWORD0(ADDR(32),
2611			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2612    shader[i++] = CF_DWORD1(POP_COUNT(0),
2613			    CF_CONST(0),
2614			    COND(SQ_CF_COND_ACTIVE),
2615			    I_COUNT(1),
2616			    VALID_PIXEL_MODE(0),
2617			    CF_INST(SQ_CF_INST_TC),
2618			    BARRIER(1));
2619
2620    /* 10 */
2621    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
2622					  TYPE(SQ_EXPORT_PIXEL),
2623					  RW_GPR(0),
2624					  RW_REL(ABSOLUTE),
2625					  INDEX_GPR(0),
2626					  ELEM_SIZE(1));
2627    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
2628					       SRC_SEL_Y(SQ_SEL_Y),
2629					       SRC_SEL_Z(SQ_SEL_Z),
2630					       SRC_SEL_W(SQ_SEL_W),
2631					       BURST_COUNT(1),
2632					       VALID_PIXEL_MODE(0),
2633					       CF_INST(SQ_CF_INST_EXPORT_DONE),
2634					       MARK(0),
2635					       BARRIER(1));
2636
2637    /* 11 */
2638    shader[i++] = CF_DWORD0(ADDR(0),
2639			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
2640    shader[i++] = CF_DWORD1(POP_COUNT(0),
2641			    CF_CONST(0),
2642			    COND(SQ_CF_COND_ACTIVE),
2643			    I_COUNT(0),
2644			    VALID_PIXEL_MODE(0),
2645			    CF_INST(SQ_CF_INST_RETURN),
2646			    BARRIER(1));
2647
2648    /* 12 interpolate src tex coords - mask */
2649    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2650			     SRC0_REL(ABSOLUTE),
2651			     SRC0_ELEM(ELEM_Y),
2652			     SRC0_NEG(0),
2653			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2654			     SRC1_REL(ABSOLUTE),
2655			     SRC1_ELEM(ELEM_X),
2656			     SRC1_NEG(0),
2657			     INDEX_MODE(SQ_INDEX_AR_X),
2658			     PRED_SEL(SQ_PRED_SEL_OFF),
2659			     LAST(0));
2660    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2661				 SRC1_ABS(0),
2662				 UPDATE_EXECUTE_MASK(0),
2663				 UPDATE_PRED(0),
2664				 WRITE_MASK(1),
2665				 OMOD(SQ_ALU_OMOD_OFF),
2666				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2667				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2668				 DST_GPR(1),
2669				 DST_REL(ABSOLUTE),
2670				 DST_ELEM(ELEM_X),
2671				 CLAMP(0));
2672    /* 13 */
2673    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2674			     SRC0_REL(ABSOLUTE),
2675			     SRC0_ELEM(ELEM_X),
2676			     SRC0_NEG(0),
2677			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2678			     SRC1_REL(ABSOLUTE),
2679			     SRC1_ELEM(ELEM_X),
2680			     SRC1_NEG(0),
2681			     INDEX_MODE(SQ_INDEX_AR_X),
2682			     PRED_SEL(SQ_PRED_SEL_OFF),
2683			     LAST(0));
2684    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2685				 SRC1_ABS(0),
2686				 UPDATE_EXECUTE_MASK(0),
2687				 UPDATE_PRED(0),
2688				 WRITE_MASK(1),
2689				 OMOD(SQ_ALU_OMOD_OFF),
2690				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2691				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2692				 DST_GPR(1),
2693				 DST_REL(ABSOLUTE),
2694				 DST_ELEM(ELEM_Y),
2695				 CLAMP(0));
2696    /* 14 */
2697    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2698			     SRC0_REL(ABSOLUTE),
2699			     SRC0_ELEM(ELEM_Y),
2700			     SRC0_NEG(0),
2701			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2702			     SRC1_REL(ABSOLUTE),
2703			     SRC1_ELEM(ELEM_X),
2704			     SRC1_NEG(0),
2705			     INDEX_MODE(SQ_INDEX_AR_X),
2706			     PRED_SEL(SQ_PRED_SEL_OFF),
2707			     LAST(0));
2708    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2709				 SRC1_ABS(0),
2710				 UPDATE_EXECUTE_MASK(0),
2711				 UPDATE_PRED(0),
2712				 WRITE_MASK(0),
2713				 OMOD(SQ_ALU_OMOD_OFF),
2714				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2715				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2716				 DST_GPR(1),
2717				 DST_REL(ABSOLUTE),
2718				 DST_ELEM(ELEM_Z),
2719				 CLAMP(0));
2720    /* 15 */
2721    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2722			     SRC0_REL(ABSOLUTE),
2723			     SRC0_ELEM(ELEM_X),
2724			     SRC0_NEG(0),
2725			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2726			     SRC1_REL(ABSOLUTE),
2727			     SRC1_ELEM(ELEM_X),
2728			     SRC1_NEG(0),
2729			     INDEX_MODE(SQ_INDEX_AR_X),
2730			     PRED_SEL(SQ_PRED_SEL_OFF),
2731			     LAST(1));
2732    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2733				 SRC1_ABS(0),
2734				 UPDATE_EXECUTE_MASK(0),
2735				 UPDATE_PRED(0),
2736				 WRITE_MASK(0),
2737				 OMOD(SQ_ALU_OMOD_OFF),
2738				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2739				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2740				 DST_GPR(1),
2741				 DST_REL(ABSOLUTE),
2742				 DST_ELEM(ELEM_W),
2743				 CLAMP(0));
2744
2745    /* 16 interpolate mask tex coords */
2746    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2747			     SRC0_REL(ABSOLUTE),
2748			     SRC0_ELEM(ELEM_Y),
2749			     SRC0_NEG(0),
2750			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2751			     SRC1_REL(ABSOLUTE),
2752			     SRC1_ELEM(ELEM_X),
2753			     SRC1_NEG(0),
2754			     INDEX_MODE(SQ_INDEX_AR_X),
2755			     PRED_SEL(SQ_PRED_SEL_OFF),
2756			     LAST(0));
2757    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2758				 SRC1_ABS(0),
2759				 UPDATE_EXECUTE_MASK(0),
2760				 UPDATE_PRED(0),
2761				 WRITE_MASK(1),
2762				 OMOD(SQ_ALU_OMOD_OFF),
2763				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2764				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2765				 DST_GPR(0),
2766				 DST_REL(ABSOLUTE),
2767				 DST_ELEM(ELEM_X),
2768				 CLAMP(0));
2769    /* 17 */
2770    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2771			     SRC0_REL(ABSOLUTE),
2772			     SRC0_ELEM(ELEM_X),
2773			     SRC0_NEG(0),
2774			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2775			     SRC1_REL(ABSOLUTE),
2776			     SRC1_ELEM(ELEM_X),
2777			     SRC1_NEG(0),
2778			     INDEX_MODE(SQ_INDEX_AR_X),
2779			     PRED_SEL(SQ_PRED_SEL_OFF),
2780			     LAST(0));
2781    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2782				 SRC1_ABS(0),
2783				 UPDATE_EXECUTE_MASK(0),
2784				 UPDATE_PRED(0),
2785				 WRITE_MASK(1),
2786				 OMOD(SQ_ALU_OMOD_OFF),
2787				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2788				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2789				 DST_GPR(0),
2790				 DST_REL(ABSOLUTE),
2791				 DST_ELEM(ELEM_Y),
2792				 CLAMP(0));
2793    /* 18 */
2794    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2795			     SRC0_REL(ABSOLUTE),
2796			     SRC0_ELEM(ELEM_Y),
2797			     SRC0_NEG(0),
2798			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2799			     SRC1_REL(ABSOLUTE),
2800			     SRC1_ELEM(ELEM_X),
2801			     SRC1_NEG(0),
2802			     INDEX_MODE(SQ_INDEX_AR_X),
2803			     PRED_SEL(SQ_PRED_SEL_OFF),
2804			     LAST(0));
2805    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2806				 SRC1_ABS(0),
2807				 UPDATE_EXECUTE_MASK(0),
2808				 UPDATE_PRED(0),
2809				 WRITE_MASK(0),
2810				 OMOD(SQ_ALU_OMOD_OFF),
2811				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2812				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2813				 DST_GPR(0),
2814				 DST_REL(ABSOLUTE),
2815				 DST_ELEM(ELEM_Z),
2816				 CLAMP(0));
2817    /* 19 */
2818    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2819			     SRC0_REL(ABSOLUTE),
2820			     SRC0_ELEM(ELEM_X),
2821			     SRC0_NEG(0),
2822			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
2823			     SRC1_REL(ABSOLUTE),
2824			     SRC1_ELEM(ELEM_X),
2825			     SRC1_NEG(0),
2826			     INDEX_MODE(SQ_INDEX_AR_X),
2827			     PRED_SEL(SQ_PRED_SEL_OFF),
2828			     LAST(1));
2829    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2830				 SRC1_ABS(0),
2831				 UPDATE_EXECUTE_MASK(0),
2832				 UPDATE_PRED(0),
2833				 WRITE_MASK(0),
2834				 OMOD(SQ_ALU_OMOD_OFF),
2835				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2836				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2837				 DST_GPR(0),
2838				 DST_REL(ABSOLUTE),
2839				 DST_ELEM(ELEM_W),
2840				 CLAMP(0));
2841
2842    /* 20 - alu 0 */
2843    /* MUL gpr[2].x gpr[0].x gpr[1].x */
2844    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2845			     SRC0_REL(ABSOLUTE),
2846			     SRC0_ELEM(ELEM_X),
2847			     SRC0_NEG(0),
2848			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2849			     SRC1_REL(ABSOLUTE),
2850			     SRC1_ELEM(ELEM_X),
2851			     SRC1_NEG(0),
2852			     INDEX_MODE(SQ_INDEX_LOOP),
2853			     PRED_SEL(SQ_PRED_SEL_OFF),
2854			     LAST(0));
2855    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2856				 SRC1_ABS(0),
2857				 UPDATE_EXECUTE_MASK(0),
2858				 UPDATE_PRED(0),
2859				 WRITE_MASK(1),
2860				 OMOD(SQ_ALU_OMOD_OFF),
2861				 ALU_INST(SQ_OP2_INST_MUL),
2862				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2863				 DST_GPR(2),
2864				 DST_REL(ABSOLUTE),
2865				 DST_ELEM(ELEM_X),
2866				 CLAMP(1));
2867    /* 21 - alu 1 */
2868    /* MUL gpr[2].y gpr[0].y gpr[1].y */
2869    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2870			     SRC0_REL(ABSOLUTE),
2871			     SRC0_ELEM(ELEM_Y),
2872			     SRC0_NEG(0),
2873			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2874			     SRC1_REL(ABSOLUTE),
2875			     SRC1_ELEM(ELEM_Y),
2876			     SRC1_NEG(0),
2877			     INDEX_MODE(SQ_INDEX_LOOP),
2878			     PRED_SEL(SQ_PRED_SEL_OFF),
2879			     LAST(0));
2880    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2881				 SRC1_ABS(0),
2882				 UPDATE_EXECUTE_MASK(0),
2883				 UPDATE_PRED(0),
2884				 WRITE_MASK(1),
2885				 OMOD(SQ_ALU_OMOD_OFF),
2886				 ALU_INST(SQ_OP2_INST_MUL),
2887				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2888				 DST_GPR(2),
2889				 DST_REL(ABSOLUTE),
2890				 DST_ELEM(ELEM_Y),
2891				 CLAMP(1));
2892    /* 22 - alu 2 */
2893    /* MUL gpr[2].z gpr[0].z gpr[1].z */
2894    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2895			     SRC0_REL(ABSOLUTE),
2896			     SRC0_ELEM(ELEM_Z),
2897			     SRC0_NEG(0),
2898			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2899			     SRC1_REL(ABSOLUTE),
2900			     SRC1_ELEM(ELEM_Z),
2901			     SRC1_NEG(0),
2902			     INDEX_MODE(SQ_INDEX_LOOP),
2903			     PRED_SEL(SQ_PRED_SEL_OFF),
2904			     LAST(0));
2905    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2906				 SRC1_ABS(0),
2907				 UPDATE_EXECUTE_MASK(0),
2908				 UPDATE_PRED(0),
2909				 WRITE_MASK(1),
2910				 OMOD(SQ_ALU_OMOD_OFF),
2911				 ALU_INST(SQ_OP2_INST_MUL),
2912				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2913				 DST_GPR(2),
2914				 DST_REL(ABSOLUTE),
2915				 DST_ELEM(ELEM_Z),
2916				 CLAMP(1));
2917    /* 23 - alu 3 */
2918    /* MUL gpr[2].w gpr[0].w gpr[1].w */
2919    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2920			     SRC0_REL(ABSOLUTE),
2921			     SRC0_ELEM(ELEM_W),
2922			     SRC0_NEG(0),
2923			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
2924			     SRC1_REL(ABSOLUTE),
2925			     SRC1_ELEM(ELEM_W),
2926			     SRC1_NEG(0),
2927			     INDEX_MODE(SQ_INDEX_LOOP),
2928			     PRED_SEL(SQ_PRED_SEL_OFF),
2929			     LAST(1));
2930    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2931				 SRC1_ABS(0),
2932				 UPDATE_EXECUTE_MASK(0),
2933				 UPDATE_PRED(0),
2934				 WRITE_MASK(1),
2935				 OMOD(SQ_ALU_OMOD_OFF),
2936				 ALU_INST(SQ_OP2_INST_MUL),
2937				 BANK_SWIZZLE(SQ_ALU_VEC_012),
2938				 DST_GPR(2),
2939				 DST_REL(ABSOLUTE),
2940				 DST_ELEM(ELEM_W),
2941				 CLAMP(1));
2942
2943    /* 24 - interpolate tex coords - non-mask */
2944    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2945			     SRC0_REL(ABSOLUTE),
2946			     SRC0_ELEM(ELEM_Y),
2947			     SRC0_NEG(0),
2948			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2949			     SRC1_REL(ABSOLUTE),
2950			     SRC1_ELEM(ELEM_X),
2951			     SRC1_NEG(0),
2952			     INDEX_MODE(SQ_INDEX_AR_X),
2953			     PRED_SEL(SQ_PRED_SEL_OFF),
2954			     LAST(0));
2955    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2956				 SRC1_ABS(0),
2957				 UPDATE_EXECUTE_MASK(0),
2958				 UPDATE_PRED(0),
2959				 WRITE_MASK(1),
2960				 OMOD(SQ_ALU_OMOD_OFF),
2961				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2962				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2963				 DST_GPR(0),
2964				 DST_REL(ABSOLUTE),
2965				 DST_ELEM(ELEM_X),
2966				 CLAMP(0));
2967    /* 25 */
2968    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2969			     SRC0_REL(ABSOLUTE),
2970			     SRC0_ELEM(ELEM_X),
2971			     SRC0_NEG(0),
2972			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2973			     SRC1_REL(ABSOLUTE),
2974			     SRC1_ELEM(ELEM_X),
2975			     SRC1_NEG(0),
2976			     INDEX_MODE(SQ_INDEX_AR_X),
2977			     PRED_SEL(SQ_PRED_SEL_OFF),
2978			     LAST(0));
2979    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
2980				 SRC1_ABS(0),
2981				 UPDATE_EXECUTE_MASK(0),
2982				 UPDATE_PRED(0),
2983				 WRITE_MASK(1),
2984				 OMOD(SQ_ALU_OMOD_OFF),
2985				 ALU_INST(SQ_OP2_INST_INTERP_XY),
2986				 BANK_SWIZZLE(SQ_ALU_VEC_210),
2987				 DST_GPR(0),
2988				 DST_REL(ABSOLUTE),
2989				 DST_ELEM(ELEM_Y),
2990				 CLAMP(0));
2991    /* 26 */
2992    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
2993			     SRC0_REL(ABSOLUTE),
2994			     SRC0_ELEM(ELEM_Y),
2995			     SRC0_NEG(0),
2996			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
2997			     SRC1_REL(ABSOLUTE),
2998			     SRC1_ELEM(ELEM_X),
2999			     SRC1_NEG(0),
3000			     INDEX_MODE(SQ_INDEX_AR_X),
3001			     PRED_SEL(SQ_PRED_SEL_OFF),
3002			     LAST(0));
3003    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3004				 SRC1_ABS(0),
3005				 UPDATE_EXECUTE_MASK(0),
3006				 UPDATE_PRED(0),
3007				 WRITE_MASK(0),
3008				 OMOD(SQ_ALU_OMOD_OFF),
3009				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3010				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3011				 DST_GPR(0),
3012				 DST_REL(ABSOLUTE),
3013				 DST_ELEM(ELEM_Z),
3014				 CLAMP(0));
3015    /* 27 */
3016    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
3017			     SRC0_REL(ABSOLUTE),
3018			     SRC0_ELEM(ELEM_X),
3019			     SRC0_NEG(0),
3020			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
3021			     SRC1_REL(ABSOLUTE),
3022			     SRC1_ELEM(ELEM_X),
3023			     SRC1_NEG(0),
3024			     INDEX_MODE(SQ_INDEX_AR_X),
3025			     PRED_SEL(SQ_PRED_SEL_OFF),
3026			     LAST(1));
3027    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
3028				 SRC1_ABS(0),
3029				 UPDATE_EXECUTE_MASK(0),
3030				 UPDATE_PRED(0),
3031				 WRITE_MASK(0),
3032				 OMOD(SQ_ALU_OMOD_OFF),
3033				 ALU_INST(SQ_OP2_INST_INTERP_XY),
3034				 BANK_SWIZZLE(SQ_ALU_VEC_210),
3035				 DST_GPR(0),
3036				 DST_REL(ABSOLUTE),
3037				 DST_ELEM(ELEM_W),
3038				 CLAMP(0));
3039
3040    /* 28/29 - src - mask */
3041    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3042			     INST_MOD(0),
3043			     FETCH_WHOLE_QUAD(0),
3044			     RESOURCE_ID(0),
3045			     SRC_GPR(1),
3046			     SRC_REL(ABSOLUTE),
3047			     ALT_CONST(0),
3048			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3049			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3050    shader[i++] = TEX_DWORD1(DST_GPR(1),
3051			     DST_REL(ABSOLUTE),
3052			     DST_SEL_X(SQ_SEL_X),
3053			     DST_SEL_Y(SQ_SEL_Y),
3054			     DST_SEL_Z(SQ_SEL_Z),
3055			     DST_SEL_W(SQ_SEL_W),
3056			     LOD_BIAS(0),
3057			     COORD_TYPE_X(TEX_NORMALIZED),
3058			     COORD_TYPE_Y(TEX_NORMALIZED),
3059			     COORD_TYPE_Z(TEX_NORMALIZED),
3060			     COORD_TYPE_W(TEX_NORMALIZED));
3061    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3062			     OFFSET_Y(0),
3063			     OFFSET_Z(0),
3064			     SAMPLER_ID(0),
3065			     SRC_SEL_X(SQ_SEL_X),
3066			     SRC_SEL_Y(SQ_SEL_Y),
3067			     SRC_SEL_Z(SQ_SEL_0),
3068			     SRC_SEL_W(SQ_SEL_1));
3069    shader[i++] = TEX_DWORD_PAD;
3070    /* 30/31 - mask */
3071    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3072			     INST_MOD(0),
3073			     FETCH_WHOLE_QUAD(0),
3074			     RESOURCE_ID(1),
3075			     SRC_GPR(0),
3076			     SRC_REL(ABSOLUTE),
3077                             ALT_CONST(0),
3078                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3079                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3080    shader[i++] = TEX_DWORD1(DST_GPR(0),
3081			     DST_REL(ABSOLUTE),
3082			     DST_SEL_X(SQ_SEL_X),
3083			     DST_SEL_Y(SQ_SEL_Y),
3084			     DST_SEL_Z(SQ_SEL_Z),
3085			     DST_SEL_W(SQ_SEL_W),
3086			     LOD_BIAS(0),
3087			     COORD_TYPE_X(TEX_NORMALIZED),
3088			     COORD_TYPE_Y(TEX_NORMALIZED),
3089			     COORD_TYPE_Z(TEX_NORMALIZED),
3090			     COORD_TYPE_W(TEX_NORMALIZED));
3091    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3092			     OFFSET_Y(0),
3093			     OFFSET_Z(0),
3094			     SAMPLER_ID(1),
3095			     SRC_SEL_X(SQ_SEL_X),
3096			     SRC_SEL_Y(SQ_SEL_Y),
3097			     SRC_SEL_Z(SQ_SEL_0),
3098			     SRC_SEL_W(SQ_SEL_1));
3099    shader[i++] = TEX_DWORD_PAD;
3100
3101    /* 32/33 - src - non-mask */
3102    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
3103			     INST_MOD(0),
3104			     FETCH_WHOLE_QUAD(0),
3105			     RESOURCE_ID(0),
3106			     SRC_GPR(0),
3107			     SRC_REL(ABSOLUTE),
3108			     ALT_CONST(0),
3109			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
3110			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
3111    shader[i++] = TEX_DWORD1(DST_GPR(0),
3112			     DST_REL(ABSOLUTE),
3113			     DST_SEL_X(SQ_SEL_X),
3114			     DST_SEL_Y(SQ_SEL_Y),
3115			     DST_SEL_Z(SQ_SEL_Z),
3116			     DST_SEL_W(SQ_SEL_W),
3117			     LOD_BIAS(0),
3118			     COORD_TYPE_X(TEX_NORMALIZED),
3119			     COORD_TYPE_Y(TEX_NORMALIZED),
3120			     COORD_TYPE_Z(TEX_NORMALIZED),
3121			     COORD_TYPE_W(TEX_NORMALIZED));
3122    shader[i++] = TEX_DWORD2(OFFSET_X(0),
3123			     OFFSET_Y(0),
3124			     OFFSET_Z(0),
3125			     SAMPLER_ID(0),
3126			     SRC_SEL_X(SQ_SEL_X),
3127			     SRC_SEL_Y(SQ_SEL_Y),
3128			     SRC_SEL_Z(SQ_SEL_0),
3129			     SRC_SEL_W(SQ_SEL_1));
3130    shader[i++] = TEX_DWORD_PAD;
3131
3132    return i;
3133}
3134
3135#endif
3136