r600_shader.c revision b13dfe66
1b7e1c893Smrg/* 2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc. 3b7e1c893Smrg * 4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"), 6b7e1c893Smrg * to deal in the Software without restriction, including without limitation 7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions: 10b7e1c893Smrg * 11b7e1c893Smrg * The above copyright notice and this permission notice (including the next 12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the 13b7e1c893Smrg * Software. 14b7e1c893Smrg * 15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21b7e1c893Smrg * SOFTWARE. 22b7e1c893Smrg * 23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com> 24b7e1c893Smrg * 25b7e1c893Smrg */ 26b7e1c893Smrg 27b7e1c893Smrg#ifdef HAVE_CONFIG_H 28b7e1c893Smrg#include "config.h" 29b7e1c893Smrg#endif 30b7e1c893Smrg 31b7e1c893Smrg#include "xf86.h" 32b7e1c893Smrg 33b7e1c893Smrg#include "radeon.h" 34b7e1c893Smrg#include "r600_shader.h" 35b7e1c893Smrg#include "r600_reg.h" 36b7e1c893Smrg 37b7e1c893Smrg/* solid vs --------------------------------------- */ 38b7e1c893Smrgint R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) 39b7e1c893Smrg{ 40b7e1c893Smrg int i = 0; 41b7e1c893Smrg 42b7e1c893Smrg /* 0 */ 43b7e1c893Smrg shader[i++] = CF_DWORD0(ADDR(4)); 44b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 45b7e1c893Smrg CF_CONST(0), 46b7e1c893Smrg COND(SQ_CF_COND_ACTIVE), 47b7e1c893Smrg I_COUNT(1), 48b7e1c893Smrg CALL_COUNT(0), 49b7e1c893Smrg END_OF_PROGRAM(0), 50b7e1c893Smrg VALID_PIXEL_MODE(0), 51b7e1c893Smrg CF_INST(SQ_CF_INST_VTX), 52b7e1c893Smrg WHOLE_QUAD_MODE(0), 53b7e1c893Smrg BARRIER(1)); 54b7e1c893Smrg /* 1 */ 55b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 56b7e1c893Smrg TYPE(SQ_EXPORT_POS), 57b7e1c893Smrg RW_GPR(1), 58b7e1c893Smrg RW_REL(ABSOLUTE), 59b7e1c893Smrg INDEX_GPR(0), 60b7e1c893Smrg ELEM_SIZE(0)); 61b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 62b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 63b7e1c893Smrg SRC_SEL_Z(SQ_SEL_Z), 64b7e1c893Smrg SRC_SEL_W(SQ_SEL_W), 65b7e1c893Smrg R6xx_ELEM_LOOP(0), 66b7e1c893Smrg BURST_COUNT(1), 67b7e1c893Smrg END_OF_PROGRAM(0), 68b7e1c893Smrg VALID_PIXEL_MODE(0), 69b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 70b7e1c893Smrg WHOLE_QUAD_MODE(0), 71b7e1c893Smrg BARRIER(1)); 72b7e1c893Smrg /* 2 - always export a param whether it's used or not */ 73b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 74b7e1c893Smrg TYPE(SQ_EXPORT_PARAM), 75b7e1c893Smrg RW_GPR(0), 76b7e1c893Smrg RW_REL(ABSOLUTE), 77b7e1c893Smrg INDEX_GPR(0), 78b7e1c893Smrg ELEM_SIZE(0)); 79b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 80b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 81b7e1c893Smrg SRC_SEL_Z(SQ_SEL_Z), 82b7e1c893Smrg SRC_SEL_W(SQ_SEL_W), 83b7e1c893Smrg R6xx_ELEM_LOOP(0), 84b7e1c893Smrg BURST_COUNT(0), 85b7e1c893Smrg END_OF_PROGRAM(1), 86b7e1c893Smrg VALID_PIXEL_MODE(0), 87b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 88b7e1c893Smrg WHOLE_QUAD_MODE(0), 89b7e1c893Smrg BARRIER(0)); 90b7e1c893Smrg /* 3 - padding */ 91b7e1c893Smrg shader[i++] = 0x00000000; 92b7e1c893Smrg shader[i++] = 0x00000000; 93b7e1c893Smrg /* 4/5 */ 94b7e1c893Smrg shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 95b7e1c893Smrg FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 96b7e1c893Smrg FETCH_WHOLE_QUAD(0), 97b7e1c893Smrg BUFFER_ID(0), 98b7e1c893Smrg SRC_GPR(0), 99b7e1c893Smrg SRC_REL(ABSOLUTE), 100b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 101b7e1c893Smrg MEGA_FETCH_COUNT(8)); 102b7e1c893Smrg shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 103b7e1c893Smrg DST_REL(0), 104b7e1c893Smrg DST_SEL_X(SQ_SEL_X), 105b7e1c893Smrg DST_SEL_Y(SQ_SEL_Y), 106b7e1c893Smrg DST_SEL_Z(SQ_SEL_0), 107b7e1c893Smrg DST_SEL_W(SQ_SEL_1), 108b7e1c893Smrg USE_CONST_FIELDS(0), 109ad43ddacSmrg DATA_FORMAT(FMT_32_32_FLOAT), 110ad43ddacSmrg NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 111ad43ddacSmrg FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 112b7e1c893Smrg SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 113b7e1c893Smrg shader[i++] = VTX_DWORD2(OFFSET(0), 114b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 115b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_8IN32), 116b13dfe66Smrg#else 117b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_NONE), 118b13dfe66Smrg#endif 119b7e1c893Smrg CONST_BUF_NO_STRIDE(0), 120b7e1c893Smrg MEGA_FETCH(1)); 121b7e1c893Smrg shader[i++] = VTX_DWORD_PAD; 122b7e1c893Smrg 123b7e1c893Smrg return i; 124b7e1c893Smrg} 125b7e1c893Smrg 126b7e1c893Smrg/* solid ps --------------------------------------- */ 127b7e1c893Smrgint R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) 128b7e1c893Smrg{ 129b7e1c893Smrg int i = 0; 130b7e1c893Smrg 131b7e1c893Smrg /* 0 */ 132b7e1c893Smrg shader[i++] = CF_ALU_DWORD0(ADDR(2), 133b7e1c893Smrg KCACHE_BANK0(0), 134b7e1c893Smrg KCACHE_BANK1(0), 135b7e1c893Smrg KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 136b7e1c893Smrg shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 137b7e1c893Smrg KCACHE_ADDR0(0), 138b7e1c893Smrg KCACHE_ADDR1(0), 139b7e1c893Smrg I_COUNT(4), 140b7e1c893Smrg USES_WATERFALL(0), 141b7e1c893Smrg CF_INST(SQ_CF_INST_ALU), 142b7e1c893Smrg WHOLE_QUAD_MODE(0), 143b7e1c893Smrg BARRIER(1)); 144b7e1c893Smrg /* 1 */ 145b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 146b7e1c893Smrg TYPE(SQ_EXPORT_PIXEL), 147b7e1c893Smrg RW_GPR(0), 148b7e1c893Smrg RW_REL(ABSOLUTE), 149b7e1c893Smrg INDEX_GPR(0), 150b7e1c893Smrg ELEM_SIZE(1)); 151b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 152b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 153b7e1c893Smrg SRC_SEL_Z(SQ_SEL_Z), 154b7e1c893Smrg SRC_SEL_W(SQ_SEL_W), 155b7e1c893Smrg R6xx_ELEM_LOOP(0), 156b7e1c893Smrg BURST_COUNT(1), 157b7e1c893Smrg END_OF_PROGRAM(1), 158b7e1c893Smrg VALID_PIXEL_MODE(0), 159b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 160b7e1c893Smrg WHOLE_QUAD_MODE(0), 161b7e1c893Smrg BARRIER(1)); 162b7e1c893Smrg 163b7e1c893Smrg /* 2 */ 164921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 165b7e1c893Smrg SRC0_REL(ABSOLUTE), 166b7e1c893Smrg SRC0_ELEM(ELEM_X), 167b7e1c893Smrg SRC0_NEG(0), 168921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 0), 169b7e1c893Smrg SRC1_REL(ABSOLUTE), 170b7e1c893Smrg SRC1_ELEM(ELEM_X), 171b7e1c893Smrg SRC1_NEG(0), 172b7e1c893Smrg INDEX_MODE(SQ_INDEX_AR_X), 173b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 174b7e1c893Smrg LAST(0)); 175b7e1c893Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 176b7e1c893Smrg SRC0_ABS(0), 177b7e1c893Smrg SRC1_ABS(0), 178b7e1c893Smrg UPDATE_EXECUTE_MASK(0), 179b7e1c893Smrg UPDATE_PRED(0), 180b7e1c893Smrg WRITE_MASK(1), 181b7e1c893Smrg FOG_MERGE(0), 182b7e1c893Smrg OMOD(SQ_ALU_OMOD_OFF), 183b7e1c893Smrg ALU_INST(SQ_OP2_INST_MOV), 184b7e1c893Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 185b7e1c893Smrg DST_GPR(0), 186b7e1c893Smrg DST_REL(ABSOLUTE), 187b7e1c893Smrg DST_ELEM(ELEM_X), 188b7e1c893Smrg CLAMP(1)); 189b7e1c893Smrg /* 3 */ 190921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 191b7e1c893Smrg SRC0_REL(ABSOLUTE), 192b7e1c893Smrg SRC0_ELEM(ELEM_Y), 193b7e1c893Smrg SRC0_NEG(0), 194921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 0), 195b7e1c893Smrg SRC1_REL(ABSOLUTE), 196b7e1c893Smrg SRC1_ELEM(ELEM_Y), 197b7e1c893Smrg SRC1_NEG(0), 198b7e1c893Smrg INDEX_MODE(SQ_INDEX_AR_X), 199b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 200b7e1c893Smrg LAST(0)); 201b7e1c893Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 202b7e1c893Smrg SRC0_ABS(0), 203b7e1c893Smrg SRC1_ABS(0), 204b7e1c893Smrg UPDATE_EXECUTE_MASK(0), 205b7e1c893Smrg UPDATE_PRED(0), 206b7e1c893Smrg WRITE_MASK(1), 207b7e1c893Smrg FOG_MERGE(0), 208b7e1c893Smrg OMOD(SQ_ALU_OMOD_OFF), 209b7e1c893Smrg ALU_INST(SQ_OP2_INST_MOV), 210b7e1c893Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 211b7e1c893Smrg DST_GPR(0), 212b7e1c893Smrg DST_REL(ABSOLUTE), 213b7e1c893Smrg DST_ELEM(ELEM_Y), 214b7e1c893Smrg CLAMP(1)); 215b7e1c893Smrg /* 4 */ 216921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 217b7e1c893Smrg SRC0_REL(ABSOLUTE), 218b7e1c893Smrg SRC0_ELEM(ELEM_Z), 219b7e1c893Smrg SRC0_NEG(0), 220921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 0), 221b7e1c893Smrg SRC1_REL(ABSOLUTE), 222b7e1c893Smrg SRC1_ELEM(ELEM_Z), 223b7e1c893Smrg SRC1_NEG(0), 224b7e1c893Smrg INDEX_MODE(SQ_INDEX_AR_X), 225b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 226b7e1c893Smrg LAST(0)); 227b7e1c893Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 228b7e1c893Smrg SRC0_ABS(0), 229b7e1c893Smrg SRC1_ABS(0), 230b7e1c893Smrg UPDATE_EXECUTE_MASK(0), 231b7e1c893Smrg UPDATE_PRED(0), 232b7e1c893Smrg WRITE_MASK(1), 233b7e1c893Smrg FOG_MERGE(0), 234b7e1c893Smrg OMOD(SQ_ALU_OMOD_OFF), 235b7e1c893Smrg ALU_INST(SQ_OP2_INST_MOV), 236b7e1c893Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 237b7e1c893Smrg DST_GPR(0), 238b7e1c893Smrg DST_REL(ABSOLUTE), 239b7e1c893Smrg DST_ELEM(ELEM_Z), 240b7e1c893Smrg CLAMP(1)); 241b7e1c893Smrg /* 5 */ 242921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 243b7e1c893Smrg SRC0_REL(ABSOLUTE), 244b7e1c893Smrg SRC0_ELEM(ELEM_W), 245b7e1c893Smrg SRC0_NEG(0), 246921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 0), 247b7e1c893Smrg SRC1_REL(ABSOLUTE), 248b7e1c893Smrg SRC1_ELEM(ELEM_W), 249b7e1c893Smrg SRC1_NEG(0), 250b7e1c893Smrg INDEX_MODE(SQ_INDEX_AR_X), 251b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 252b7e1c893Smrg LAST(1)); 253b7e1c893Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 254b7e1c893Smrg SRC0_ABS(0), 255b7e1c893Smrg SRC1_ABS(0), 256b7e1c893Smrg UPDATE_EXECUTE_MASK(0), 257b7e1c893Smrg UPDATE_PRED(0), 258b7e1c893Smrg WRITE_MASK(1), 259b7e1c893Smrg FOG_MERGE(0), 260b7e1c893Smrg OMOD(SQ_ALU_OMOD_OFF), 261b7e1c893Smrg ALU_INST(SQ_OP2_INST_MOV), 262b7e1c893Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 263b7e1c893Smrg DST_GPR(0), 264b7e1c893Smrg DST_REL(ABSOLUTE), 265b7e1c893Smrg DST_ELEM(ELEM_W), 266b7e1c893Smrg CLAMP(1)); 267b7e1c893Smrg 268b7e1c893Smrg return i; 269b7e1c893Smrg} 270b7e1c893Smrg 271b7e1c893Smrg/* copy vs --------------------------------------- */ 272b7e1c893Smrgint R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) 273b7e1c893Smrg{ 274b7e1c893Smrg int i = 0; 275b7e1c893Smrg 276b7e1c893Smrg /* 0 */ 277b7e1c893Smrg shader[i++] = CF_DWORD0(ADDR(4)); 278b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 279b7e1c893Smrg CF_CONST(0), 280b7e1c893Smrg COND(SQ_CF_COND_ACTIVE), 281b7e1c893Smrg I_COUNT(2), 282b7e1c893Smrg CALL_COUNT(0), 283b7e1c893Smrg END_OF_PROGRAM(0), 284b7e1c893Smrg VALID_PIXEL_MODE(0), 285b7e1c893Smrg CF_INST(SQ_CF_INST_VTX), 286b7e1c893Smrg WHOLE_QUAD_MODE(0), 287b7e1c893Smrg BARRIER(1)); 288b7e1c893Smrg /* 1 */ 289b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 290b7e1c893Smrg TYPE(SQ_EXPORT_POS), 291b7e1c893Smrg RW_GPR(1), 292b7e1c893Smrg RW_REL(ABSOLUTE), 293b7e1c893Smrg INDEX_GPR(0), 294b7e1c893Smrg ELEM_SIZE(0)); 295b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 296b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 297b7e1c893Smrg SRC_SEL_Z(SQ_SEL_Z), 298b7e1c893Smrg SRC_SEL_W(SQ_SEL_W), 299b7e1c893Smrg R6xx_ELEM_LOOP(0), 300b7e1c893Smrg BURST_COUNT(0), 301b7e1c893Smrg END_OF_PROGRAM(0), 302b7e1c893Smrg VALID_PIXEL_MODE(0), 303b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 304b7e1c893Smrg WHOLE_QUAD_MODE(0), 305b7e1c893Smrg BARRIER(1)); 306b7e1c893Smrg /* 2 */ 307b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 308b7e1c893Smrg TYPE(SQ_EXPORT_PARAM), 309b7e1c893Smrg RW_GPR(0), 310b7e1c893Smrg RW_REL(ABSOLUTE), 311b7e1c893Smrg INDEX_GPR(0), 312b7e1c893Smrg ELEM_SIZE(0)); 313b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 314b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 315b7e1c893Smrg SRC_SEL_Z(SQ_SEL_Z), 316b7e1c893Smrg SRC_SEL_W(SQ_SEL_W), 317b7e1c893Smrg R6xx_ELEM_LOOP(0), 318b7e1c893Smrg BURST_COUNT(0), 319b7e1c893Smrg END_OF_PROGRAM(1), 320b7e1c893Smrg VALID_PIXEL_MODE(0), 321b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 322b7e1c893Smrg WHOLE_QUAD_MODE(0), 323b7e1c893Smrg BARRIER(0)); 324b7e1c893Smrg /* 3 */ 325b7e1c893Smrg shader[i++] = 0x00000000; 326b7e1c893Smrg shader[i++] = 0x00000000; 327b7e1c893Smrg /* 4/5 */ 328b7e1c893Smrg shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 329b7e1c893Smrg FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 330b7e1c893Smrg FETCH_WHOLE_QUAD(0), 331b7e1c893Smrg BUFFER_ID(0), 332b7e1c893Smrg SRC_GPR(0), 333b7e1c893Smrg SRC_REL(ABSOLUTE), 334b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 335b7e1c893Smrg MEGA_FETCH_COUNT(16)); 336b7e1c893Smrg shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 337b7e1c893Smrg DST_REL(0), 338b7e1c893Smrg DST_SEL_X(SQ_SEL_X), 339b7e1c893Smrg DST_SEL_Y(SQ_SEL_Y), 340b7e1c893Smrg DST_SEL_Z(SQ_SEL_0), 341b7e1c893Smrg DST_SEL_W(SQ_SEL_1), 342b7e1c893Smrg USE_CONST_FIELDS(0), 343ad43ddacSmrg DATA_FORMAT(FMT_32_32_FLOAT), 344ad43ddacSmrg NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 345ad43ddacSmrg FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 346b7e1c893Smrg SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 347b7e1c893Smrg shader[i++] = VTX_DWORD2(OFFSET(0), 348b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 349b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_8IN32), 350b13dfe66Smrg#else 351b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_NONE), 352b13dfe66Smrg#endif 353b7e1c893Smrg CONST_BUF_NO_STRIDE(0), 354b7e1c893Smrg MEGA_FETCH(1)); 355b7e1c893Smrg shader[i++] = VTX_DWORD_PAD; 356b7e1c893Smrg /* 6/7 */ 357b7e1c893Smrg shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 358b7e1c893Smrg FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 359b7e1c893Smrg FETCH_WHOLE_QUAD(0), 360b7e1c893Smrg BUFFER_ID(0), 361b7e1c893Smrg SRC_GPR(0), 362b7e1c893Smrg SRC_REL(ABSOLUTE), 363b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 364b7e1c893Smrg MEGA_FETCH_COUNT(8)); 365b7e1c893Smrg shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 366b7e1c893Smrg DST_REL(0), 367b7e1c893Smrg DST_SEL_X(SQ_SEL_X), 368b7e1c893Smrg DST_SEL_Y(SQ_SEL_Y), 369b7e1c893Smrg DST_SEL_Z(SQ_SEL_0), 370b7e1c893Smrg DST_SEL_W(SQ_SEL_1), 371b7e1c893Smrg USE_CONST_FIELDS(0), 372ad43ddacSmrg DATA_FORMAT(FMT_32_32_FLOAT), 373ad43ddacSmrg NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 374ad43ddacSmrg FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 375b7e1c893Smrg SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 376b7e1c893Smrg shader[i++] = VTX_DWORD2(OFFSET(8), 377b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 378b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_8IN32), 379b13dfe66Smrg#else 380b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_NONE), 381b13dfe66Smrg#endif 382b7e1c893Smrg CONST_BUF_NO_STRIDE(0), 383b7e1c893Smrg MEGA_FETCH(0)); 384b7e1c893Smrg shader[i++] = VTX_DWORD_PAD; 385b7e1c893Smrg 386b7e1c893Smrg return i; 387b7e1c893Smrg} 388b7e1c893Smrg 389b7e1c893Smrg/* copy ps --------------------------------------- */ 390b7e1c893Smrgint R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) 391b7e1c893Smrg{ 392b7e1c893Smrg int i=0; 393b7e1c893Smrg 394b7e1c893Smrg /* CF INST 0 */ 395b7e1c893Smrg shader[i++] = CF_DWORD0(ADDR(2)); 396b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 397b7e1c893Smrg CF_CONST(0), 398b7e1c893Smrg COND(SQ_CF_COND_ACTIVE), 399b7e1c893Smrg I_COUNT(1), 400b7e1c893Smrg CALL_COUNT(0), 401b7e1c893Smrg END_OF_PROGRAM(0), 402b7e1c893Smrg VALID_PIXEL_MODE(0), 403b7e1c893Smrg CF_INST(SQ_CF_INST_TEX), 404b7e1c893Smrg WHOLE_QUAD_MODE(0), 405b7e1c893Smrg BARRIER(1)); 406b7e1c893Smrg /* CF INST 1 */ 407b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 408b7e1c893Smrg TYPE(SQ_EXPORT_PIXEL), 409b7e1c893Smrg RW_GPR(0), 410b7e1c893Smrg RW_REL(ABSOLUTE), 411b7e1c893Smrg INDEX_GPR(0), 412b7e1c893Smrg ELEM_SIZE(1)); 413b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 414b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 415b7e1c893Smrg SRC_SEL_Z(SQ_SEL_Z), 416b7e1c893Smrg SRC_SEL_W(SQ_SEL_W), 417b7e1c893Smrg R6xx_ELEM_LOOP(0), 418b7e1c893Smrg BURST_COUNT(1), 419b7e1c893Smrg END_OF_PROGRAM(1), 420b7e1c893Smrg VALID_PIXEL_MODE(0), 421b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 422b7e1c893Smrg WHOLE_QUAD_MODE(0), 423b7e1c893Smrg BARRIER(1)); 424b7e1c893Smrg /* TEX INST 0 */ 425b7e1c893Smrg shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 426b7e1c893Smrg BC_FRAC_MODE(0), 427b7e1c893Smrg FETCH_WHOLE_QUAD(0), 428b7e1c893Smrg RESOURCE_ID(0), 429b7e1c893Smrg SRC_GPR(0), 430b7e1c893Smrg SRC_REL(ABSOLUTE), 431b7e1c893Smrg R7xx_ALT_CONST(0)); 432b7e1c893Smrg shader[i++] = TEX_DWORD1(DST_GPR(0), 433b7e1c893Smrg DST_REL(ABSOLUTE), 434b7e1c893Smrg DST_SEL_X(SQ_SEL_X), /* R */ 435b7e1c893Smrg DST_SEL_Y(SQ_SEL_Y), /* G */ 436b7e1c893Smrg DST_SEL_Z(SQ_SEL_Z), /* B */ 437b7e1c893Smrg DST_SEL_W(SQ_SEL_W), /* A */ 438b7e1c893Smrg LOD_BIAS(0), 439b7e1c893Smrg COORD_TYPE_X(TEX_UNNORMALIZED), 440b7e1c893Smrg COORD_TYPE_Y(TEX_UNNORMALIZED), 441b7e1c893Smrg COORD_TYPE_Z(TEX_UNNORMALIZED), 442b7e1c893Smrg COORD_TYPE_W(TEX_UNNORMALIZED)); 443b7e1c893Smrg shader[i++] = TEX_DWORD2(OFFSET_X(0), 444b7e1c893Smrg OFFSET_Y(0), 445b7e1c893Smrg OFFSET_Z(0), 446b7e1c893Smrg SAMPLER_ID(0), 447b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 448b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 449b7e1c893Smrg SRC_SEL_Z(SQ_SEL_0), 450b7e1c893Smrg SRC_SEL_W(SQ_SEL_1)); 451b7e1c893Smrg shader[i++] = TEX_DWORD_PAD; 452b7e1c893Smrg 453b7e1c893Smrg return i; 454b7e1c893Smrg} 455b7e1c893Smrg 456b7e1c893Smrg/* 457b7e1c893Smrg * ; xv vertex shader 458b7e1c893Smrg * 00 VTX: ADDR(4) CNT(2) 459b7e1c893Smrg * 0 VFETCH R1.xy01, R0.x, fc0 MEGA(16) FORMAT(32_32_FLOAT) 460b7e1c893Smrg * FORMAT_COMP(SIGNED) 461b7e1c893Smrg * 1 VFETCH R0.xy01, R0.x, fc0 MINI(8) OFFSET(8) FORMAT(32_32_FLOAT) 462b7e1c893Smrg * FORMAT_COMP(SIGNED) 463b7e1c893Smrg * 01 EXP_DONE: POS0, R1 464b7e1c893Smrg * 02 EXP_DONE: PARAM0, R0 NO_BARRIER 465b7e1c893Smrg * END_OF_PROGRAM 466b7e1c893Smrg */ 467b7e1c893Smrgint R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) 468b7e1c893Smrg{ 469b7e1c893Smrg int i = 0; 470b7e1c893Smrg 471b7e1c893Smrg /* 0 */ 472ad43ddacSmrg shader[i++] = CF_DWORD0(ADDR(6)); 473b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 474b7e1c893Smrg CF_CONST(0), 475b7e1c893Smrg COND(SQ_CF_COND_ACTIVE), 476b7e1c893Smrg I_COUNT(2), 477b7e1c893Smrg CALL_COUNT(0), 478b7e1c893Smrg END_OF_PROGRAM(0), 479b7e1c893Smrg VALID_PIXEL_MODE(0), 480b7e1c893Smrg CF_INST(SQ_CF_INST_VTX), 481b7e1c893Smrg WHOLE_QUAD_MODE(0), 482b7e1c893Smrg BARRIER(1)); 483ad43ddacSmrg 484ad43ddacSmrg /* 1 - ALU */ 485ad43ddacSmrg shader[i++] = CF_ALU_DWORD0(ADDR(4), 486ad43ddacSmrg KCACHE_BANK0(0), 487ad43ddacSmrg KCACHE_BANK1(0), 488ad43ddacSmrg KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 489ad43ddacSmrg shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 490ad43ddacSmrg KCACHE_ADDR0(0), 491ad43ddacSmrg KCACHE_ADDR1(0), 492ad43ddacSmrg I_COUNT(2), 493ad43ddacSmrg USES_WATERFALL(0), 494ad43ddacSmrg CF_INST(SQ_CF_INST_ALU), 495ad43ddacSmrg WHOLE_QUAD_MODE(0), 496ad43ddacSmrg BARRIER(1)); 497ad43ddacSmrg 498ad43ddacSmrg /* 2 */ 499b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 500b7e1c893Smrg TYPE(SQ_EXPORT_POS), 501b7e1c893Smrg RW_GPR(1), 502b7e1c893Smrg RW_REL(ABSOLUTE), 503b7e1c893Smrg INDEX_GPR(0), 504b7e1c893Smrg ELEM_SIZE(3)); 505b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 506b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 507b7e1c893Smrg SRC_SEL_Z(SQ_SEL_Z), 508b7e1c893Smrg SRC_SEL_W(SQ_SEL_W), 509b7e1c893Smrg R6xx_ELEM_LOOP(0), 510b7e1c893Smrg BURST_COUNT(1), 511b7e1c893Smrg END_OF_PROGRAM(0), 512b7e1c893Smrg VALID_PIXEL_MODE(0), 513b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 514b7e1c893Smrg WHOLE_QUAD_MODE(0), 515b7e1c893Smrg BARRIER(1)); 516ad43ddacSmrg /* 3 */ 517b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 518b7e1c893Smrg TYPE(SQ_EXPORT_PARAM), 519b7e1c893Smrg RW_GPR(0), 520b7e1c893Smrg RW_REL(ABSOLUTE), 521b7e1c893Smrg INDEX_GPR(0), 522b7e1c893Smrg ELEM_SIZE(3)); 523b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 524b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 525b7e1c893Smrg SRC_SEL_Z(SQ_SEL_Z), 526b7e1c893Smrg SRC_SEL_W(SQ_SEL_W), 527b7e1c893Smrg R6xx_ELEM_LOOP(0), 528b7e1c893Smrg BURST_COUNT(1), 529b7e1c893Smrg END_OF_PROGRAM(1), 530b7e1c893Smrg VALID_PIXEL_MODE(0), 531b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 532b7e1c893Smrg WHOLE_QUAD_MODE(0), 533b7e1c893Smrg BARRIER(0)); 534ad43ddacSmrg 535ad43ddacSmrg 536ad43ddacSmrg /* 4 texX / w */ 537921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 538ad43ddacSmrg SRC0_REL(ABSOLUTE), 539ad43ddacSmrg SRC0_ELEM(ELEM_X), 540ad43ddacSmrg SRC0_NEG(0), 541921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 542ad43ddacSmrg SRC1_REL(ABSOLUTE), 543ad43ddacSmrg SRC1_ELEM(ELEM_X), 544ad43ddacSmrg SRC1_NEG(0), 545ad43ddacSmrg INDEX_MODE(SQ_INDEX_AR_X), 546ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 547ad43ddacSmrg LAST(0)); 548ad43ddacSmrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 549ad43ddacSmrg SRC0_ABS(0), 550ad43ddacSmrg SRC1_ABS(0), 551ad43ddacSmrg UPDATE_EXECUTE_MASK(0), 552ad43ddacSmrg UPDATE_PRED(0), 553ad43ddacSmrg WRITE_MASK(1), 554ad43ddacSmrg FOG_MERGE(0), 555ad43ddacSmrg OMOD(SQ_ALU_OMOD_OFF), 556ad43ddacSmrg ALU_INST(SQ_OP2_INST_MUL), 557ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 558ad43ddacSmrg DST_GPR(0), 559ad43ddacSmrg DST_REL(ABSOLUTE), 560ad43ddacSmrg DST_ELEM(ELEM_X), 561ad43ddacSmrg CLAMP(0)); 562ad43ddacSmrg 563ad43ddacSmrg /* 5 texY / h */ 564921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 565ad43ddacSmrg SRC0_REL(ABSOLUTE), 566ad43ddacSmrg SRC0_ELEM(ELEM_Y), 567ad43ddacSmrg SRC0_NEG(0), 568921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 569ad43ddacSmrg SRC1_REL(ABSOLUTE), 570ad43ddacSmrg SRC1_ELEM(ELEM_Y), 571ad43ddacSmrg SRC1_NEG(0), 572ad43ddacSmrg INDEX_MODE(SQ_INDEX_AR_X), 573ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 574ad43ddacSmrg LAST(1)); 575ad43ddacSmrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 576ad43ddacSmrg SRC0_ABS(0), 577ad43ddacSmrg SRC1_ABS(0), 578ad43ddacSmrg UPDATE_EXECUTE_MASK(0), 579ad43ddacSmrg UPDATE_PRED(0), 580ad43ddacSmrg WRITE_MASK(1), 581ad43ddacSmrg FOG_MERGE(0), 582ad43ddacSmrg OMOD(SQ_ALU_OMOD_OFF), 583ad43ddacSmrg ALU_INST(SQ_OP2_INST_MUL), 584ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 585ad43ddacSmrg DST_GPR(0), 586ad43ddacSmrg DST_REL(ABSOLUTE), 587ad43ddacSmrg DST_ELEM(ELEM_Y), 588ad43ddacSmrg CLAMP(0)); 589ad43ddacSmrg 590ad43ddacSmrg /* 6/7 */ 591b7e1c893Smrg shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 592b7e1c893Smrg FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 593b7e1c893Smrg FETCH_WHOLE_QUAD(0), 594b7e1c893Smrg BUFFER_ID(0), 595b7e1c893Smrg SRC_GPR(0), 596b7e1c893Smrg SRC_REL(ABSOLUTE), 597b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 598b7e1c893Smrg MEGA_FETCH_COUNT(16)); 599b7e1c893Smrg shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 600b7e1c893Smrg DST_REL(ABSOLUTE), 601b7e1c893Smrg DST_SEL_X(SQ_SEL_X), 602b7e1c893Smrg DST_SEL_Y(SQ_SEL_Y), 603b7e1c893Smrg DST_SEL_Z(SQ_SEL_0), 604b7e1c893Smrg DST_SEL_W(SQ_SEL_1), 605b7e1c893Smrg USE_CONST_FIELDS(0), 606b7e1c893Smrg DATA_FORMAT(FMT_32_32_FLOAT), 607ad43ddacSmrg NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 608b7e1c893Smrg FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 609b7e1c893Smrg SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 610b7e1c893Smrg shader[i++] = VTX_DWORD2(OFFSET(0), 611b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 612b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_8IN32), 613b13dfe66Smrg#else 614b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_NONE), 615b13dfe66Smrg#endif 616b7e1c893Smrg CONST_BUF_NO_STRIDE(0), 617b7e1c893Smrg MEGA_FETCH(1)); 618b7e1c893Smrg shader[i++] = VTX_DWORD_PAD; 619ad43ddacSmrg /* 8/9 */ 620b7e1c893Smrg shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 621b7e1c893Smrg FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 622b7e1c893Smrg FETCH_WHOLE_QUAD(0), 623b7e1c893Smrg BUFFER_ID(0), 624b7e1c893Smrg SRC_GPR(0), 625b7e1c893Smrg SRC_REL(ABSOLUTE), 626b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 627b7e1c893Smrg MEGA_FETCH_COUNT(8)); 628b7e1c893Smrg shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 629b7e1c893Smrg DST_REL(ABSOLUTE), 630b7e1c893Smrg DST_SEL_X(SQ_SEL_X), 631b7e1c893Smrg DST_SEL_Y(SQ_SEL_Y), 632b7e1c893Smrg DST_SEL_Z(SQ_SEL_0), 633b7e1c893Smrg DST_SEL_W(SQ_SEL_1), 634b7e1c893Smrg USE_CONST_FIELDS(0), 635b7e1c893Smrg DATA_FORMAT(FMT_32_32_FLOAT), 636ad43ddacSmrg NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 637b7e1c893Smrg FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 638b7e1c893Smrg SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 639b7e1c893Smrg shader[i++] = VTX_DWORD2(OFFSET(8), 640b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 641b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_8IN32), 642b13dfe66Smrg#else 643b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_NONE), 644b13dfe66Smrg#endif 645b7e1c893Smrg CONST_BUF_NO_STRIDE(0), 646b7e1c893Smrg MEGA_FETCH(0)); 647b7e1c893Smrg shader[i++] = VTX_DWORD_PAD; 648b7e1c893Smrg 649b7e1c893Smrg return i; 650b7e1c893Smrg} 651b7e1c893Smrg 652b7e1c893Smrgint R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) 653b7e1c893Smrg{ 654b7e1c893Smrg int i = 0; 655b7e1c893Smrg 656b7e1c893Smrg /* 0 */ 657ad43ddacSmrg shader[i++] = CF_DWORD0(ADDR(16)); 658b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 659b7e1c893Smrg CF_CONST(0), 660b7e1c893Smrg COND(SQ_CF_COND_BOOL), 661b7e1c893Smrg I_COUNT(0), 662b7e1c893Smrg CALL_COUNT(0), 663b7e1c893Smrg END_OF_PROGRAM(0), 664b7e1c893Smrg VALID_PIXEL_MODE(0), 665b7e1c893Smrg CF_INST(SQ_CF_INST_CALL), 666b7e1c893Smrg WHOLE_QUAD_MODE(0), 667b7e1c893Smrg BARRIER(0)); 668b7e1c893Smrg /* 1 */ 669ad43ddacSmrg shader[i++] = CF_DWORD0(ADDR(24)); 670b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 671b7e1c893Smrg CF_CONST(0), 672b7e1c893Smrg COND(SQ_CF_COND_NOT_BOOL), 673b7e1c893Smrg I_COUNT(0), 674b7e1c893Smrg CALL_COUNT(0), 675b7e1c893Smrg END_OF_PROGRAM(0), 676b7e1c893Smrg VALID_PIXEL_MODE(0), 677b7e1c893Smrg CF_INST(SQ_CF_INST_CALL), 678b7e1c893Smrg WHOLE_QUAD_MODE(0), 679b7e1c893Smrg BARRIER(0)); 680b7e1c893Smrg /* 2 */ 681b7e1c893Smrg shader[i++] = CF_ALU_DWORD0(ADDR(4), 682b7e1c893Smrg KCACHE_BANK0(0), 683b7e1c893Smrg KCACHE_BANK1(0), 684b7e1c893Smrg KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 685b7e1c893Smrg shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 686b7e1c893Smrg KCACHE_ADDR0(0), 687b7e1c893Smrg KCACHE_ADDR1(0), 688ad43ddacSmrg I_COUNT(12), 689b7e1c893Smrg USES_WATERFALL(0), 690b7e1c893Smrg CF_INST(SQ_CF_INST_ALU), 691b7e1c893Smrg WHOLE_QUAD_MODE(0), 692b7e1c893Smrg BARRIER(1)); 693b7e1c893Smrg /* 3 */ 694b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 695b7e1c893Smrg TYPE(SQ_EXPORT_PIXEL), 696b7e1c893Smrg RW_GPR(2), 697b7e1c893Smrg RW_REL(ABSOLUTE), 698b7e1c893Smrg INDEX_GPR(0), 699b7e1c893Smrg ELEM_SIZE(3)); 700b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 701b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 702b7e1c893Smrg SRC_SEL_Z(SQ_SEL_Z), 703b7e1c893Smrg SRC_SEL_W(SQ_SEL_W), 704b7e1c893Smrg R6xx_ELEM_LOOP(0), 705b7e1c893Smrg BURST_COUNT(1), 706b7e1c893Smrg END_OF_PROGRAM(1), 707b7e1c893Smrg VALID_PIXEL_MODE(0), 708b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 709b7e1c893Smrg WHOLE_QUAD_MODE(0), 710b7e1c893Smrg BARRIER(1)); 711ad43ddacSmrg /* 4,5,6,7 */ 712ad43ddacSmrg /* r2.x = MAD(c0.w, r1.x, c0.x) */ 713921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 714b7e1c893Smrg SRC0_REL(ABSOLUTE), 715ad43ddacSmrg SRC0_ELEM(ELEM_W), 716b7e1c893Smrg SRC0_NEG(0), 717921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 1), 718b7e1c893Smrg SRC1_REL(ABSOLUTE), 719b7e1c893Smrg SRC1_ELEM(ELEM_X), 720b7e1c893Smrg SRC1_NEG(0), 721b7e1c893Smrg INDEX_MODE(SQ_INDEX_LOOP), 722b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 723b7e1c893Smrg LAST(0)); 724921a55d8Smrg shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0), 725b7e1c893Smrg SRC2_REL(ABSOLUTE), 726ad43ddacSmrg SRC2_ELEM(ELEM_X), 727b7e1c893Smrg SRC2_NEG(0), 728b7e1c893Smrg ALU_INST(SQ_OP3_INST_MULADD), 729b7e1c893Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 730ad43ddacSmrg DST_GPR(2), 731b7e1c893Smrg DST_REL(ABSOLUTE), 732b7e1c893Smrg DST_ELEM(ELEM_X), 733ad43ddacSmrg CLAMP(0)); 734ad43ddacSmrg /* r2.y = MAD(c0.w, r1.x, c0.y) */ 735921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 736b7e1c893Smrg SRC0_REL(ABSOLUTE), 737ad43ddacSmrg SRC0_ELEM(ELEM_W), 738b7e1c893Smrg SRC0_NEG(0), 739921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 1), 740b7e1c893Smrg SRC1_REL(ABSOLUTE), 741ad43ddacSmrg SRC1_ELEM(ELEM_X), 742b7e1c893Smrg SRC1_NEG(0), 743b7e1c893Smrg INDEX_MODE(SQ_INDEX_LOOP), 744b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 745b7e1c893Smrg LAST(0)); 746921a55d8Smrg shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0), 747b7e1c893Smrg SRC2_REL(ABSOLUTE), 748ad43ddacSmrg SRC2_ELEM(ELEM_Y), 749b7e1c893Smrg SRC2_NEG(0), 750b7e1c893Smrg ALU_INST(SQ_OP3_INST_MULADD), 751b7e1c893Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 752ad43ddacSmrg DST_GPR(2), 753b7e1c893Smrg DST_REL(ABSOLUTE), 754b7e1c893Smrg DST_ELEM(ELEM_Y), 755b7e1c893Smrg CLAMP(0)); 756ad43ddacSmrg /* r2.z = MAD(c0.w, r1.x, c0.z) */ 757921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 758b7e1c893Smrg SRC0_REL(ABSOLUTE), 759ad43ddacSmrg SRC0_ELEM(ELEM_W), 760b7e1c893Smrg SRC0_NEG(0), 761921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 1), 762b7e1c893Smrg SRC1_REL(ABSOLUTE), 763ad43ddacSmrg SRC1_ELEM(ELEM_X), 764b7e1c893Smrg SRC1_NEG(0), 765b7e1c893Smrg INDEX_MODE(SQ_INDEX_LOOP), 766b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 767b7e1c893Smrg LAST(0)); 768921a55d8Smrg shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0), 769b7e1c893Smrg SRC2_REL(ABSOLUTE), 770ad43ddacSmrg SRC2_ELEM(ELEM_Z), 771b7e1c893Smrg SRC2_NEG(0), 772b7e1c893Smrg ALU_INST(SQ_OP3_INST_MULADD), 773b7e1c893Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 774ad43ddacSmrg DST_GPR(2), 775b7e1c893Smrg DST_REL(ABSOLUTE), 776b7e1c893Smrg DST_ELEM(ELEM_Z), 777b7e1c893Smrg CLAMP(0)); 778ad43ddacSmrg /* r2.w = MAD(0, 0, 1) */ 779b7e1c893Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 780b7e1c893Smrg SRC0_REL(ABSOLUTE), 781b7e1c893Smrg SRC0_ELEM(ELEM_X), 782b7e1c893Smrg SRC0_NEG(0), 783b7e1c893Smrg SRC1_SEL(SQ_ALU_SRC_0), 784b7e1c893Smrg SRC1_REL(ABSOLUTE), 785b7e1c893Smrg SRC1_ELEM(ELEM_X), 786b7e1c893Smrg SRC1_NEG(0), 787b7e1c893Smrg INDEX_MODE(SQ_INDEX_LOOP), 788b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 789b7e1c893Smrg LAST(1)); 790ad43ddacSmrg shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 791ad43ddacSmrg SRC2_REL(ABSOLUTE), 792ad43ddacSmrg SRC2_ELEM(ELEM_X), 793ad43ddacSmrg SRC2_NEG(0), 794ad43ddacSmrg ALU_INST(SQ_OP3_INST_MULADD), 795b7e1c893Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 796ad43ddacSmrg DST_GPR(2), 797b7e1c893Smrg DST_REL(ABSOLUTE), 798b7e1c893Smrg DST_ELEM(ELEM_W), 799b7e1c893Smrg CLAMP(0)); 800ad43ddacSmrg 801ad43ddacSmrg /* 8,9,10,11 */ 802ad43ddacSmrg /* r2.x = MAD(c1.x, r1.y, pv.x) */ 803921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 804b7e1c893Smrg SRC0_REL(ABSOLUTE), 805b7e1c893Smrg SRC0_ELEM(ELEM_X), 806b7e1c893Smrg SRC0_NEG(0), 807921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 1), 808b7e1c893Smrg SRC1_REL(ABSOLUTE), 809ad43ddacSmrg SRC1_ELEM(ELEM_Y), 810b7e1c893Smrg SRC1_NEG(0), 811b7e1c893Smrg INDEX_MODE(SQ_INDEX_LOOP), 812b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 813b7e1c893Smrg LAST(0)); 814ad43ddacSmrg shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 815ad43ddacSmrg SRC2_REL(ABSOLUTE), 816ad43ddacSmrg SRC2_ELEM(ELEM_X), 817ad43ddacSmrg SRC2_NEG(0), 818ad43ddacSmrg ALU_INST(SQ_OP3_INST_MULADD), 819ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 820b7e1c893Smrg DST_GPR(2), 821b7e1c893Smrg DST_REL(ABSOLUTE), 822b7e1c893Smrg DST_ELEM(ELEM_X), 823ad43ddacSmrg CLAMP(0)); 824ad43ddacSmrg /* r2.y = MAD(c1.y, r1.y, pv.y) */ 825921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 826b7e1c893Smrg SRC0_REL(ABSOLUTE), 827b7e1c893Smrg SRC0_ELEM(ELEM_Y), 828b7e1c893Smrg SRC0_NEG(0), 829921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 1), 830b7e1c893Smrg SRC1_REL(ABSOLUTE), 831b7e1c893Smrg SRC1_ELEM(ELEM_Y), 832b7e1c893Smrg SRC1_NEG(0), 833b7e1c893Smrg INDEX_MODE(SQ_INDEX_LOOP), 834b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 835b7e1c893Smrg LAST(0)); 836ad43ddacSmrg shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 837ad43ddacSmrg SRC2_REL(ABSOLUTE), 838ad43ddacSmrg SRC2_ELEM(ELEM_Y), 839ad43ddacSmrg SRC2_NEG(0), 840ad43ddacSmrg ALU_INST(SQ_OP3_INST_MULADD), 841ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 842ad43ddacSmrg DST_GPR(2), 843b7e1c893Smrg DST_REL(ABSOLUTE), 844b7e1c893Smrg DST_ELEM(ELEM_Y), 845ad43ddacSmrg CLAMP(0)); 846ad43ddacSmrg /* r2.z = MAD(c1.z, r1.y, pv.z) */ 847921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 848b7e1c893Smrg SRC0_REL(ABSOLUTE), 849b7e1c893Smrg SRC0_ELEM(ELEM_Z), 850b7e1c893Smrg SRC0_NEG(0), 851921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 1), 852b7e1c893Smrg SRC1_REL(ABSOLUTE), 853ad43ddacSmrg SRC1_ELEM(ELEM_Y), 854b7e1c893Smrg SRC1_NEG(0), 855b7e1c893Smrg INDEX_MODE(SQ_INDEX_LOOP), 856b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 857b7e1c893Smrg LAST(0)); 858ad43ddacSmrg shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 859ad43ddacSmrg SRC2_REL(ABSOLUTE), 860ad43ddacSmrg SRC2_ELEM(ELEM_Z), 861ad43ddacSmrg SRC2_NEG(0), 862ad43ddacSmrg ALU_INST(SQ_OP3_INST_MULADD), 863ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 864ad43ddacSmrg DST_GPR(2), 865b7e1c893Smrg DST_REL(ABSOLUTE), 866b7e1c893Smrg DST_ELEM(ELEM_Z), 867ad43ddacSmrg CLAMP(0)); 868ad43ddacSmrg /* r2.w = MAD(0, 0, 1) */ 869ad43ddacSmrg shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 870b7e1c893Smrg SRC0_REL(ABSOLUTE), 871ad43ddacSmrg SRC0_ELEM(ELEM_X), 872b7e1c893Smrg SRC0_NEG(0), 873ad43ddacSmrg SRC1_SEL(SQ_ALU_SRC_0), 874b7e1c893Smrg SRC1_REL(ABSOLUTE), 875ad43ddacSmrg SRC1_ELEM(ELEM_X), 876b7e1c893Smrg SRC1_NEG(0), 877b7e1c893Smrg INDEX_MODE(SQ_INDEX_LOOP), 878b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 879b7e1c893Smrg LAST(1)); 880ad43ddacSmrg shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 881ad43ddacSmrg SRC2_REL(ABSOLUTE), 882ad43ddacSmrg SRC2_ELEM(ELEM_W), 883ad43ddacSmrg SRC2_NEG(0), 884ad43ddacSmrg ALU_INST(SQ_OP3_INST_MULADD), 885ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 886ad43ddacSmrg DST_GPR(2), 887b7e1c893Smrg DST_REL(ABSOLUTE), 888b7e1c893Smrg DST_ELEM(ELEM_W), 889ad43ddacSmrg CLAMP(0)); 890ad43ddacSmrg /* 12,13,14,15 */ 891ad43ddacSmrg /* r2.x = MAD(c2.x, r1.z, pv.x) */ 892921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), 893b7e1c893Smrg SRC0_REL(ABSOLUTE), 894b7e1c893Smrg SRC0_ELEM(ELEM_X), 895b7e1c893Smrg SRC0_NEG(0), 896921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 1), 897b7e1c893Smrg SRC1_REL(ABSOLUTE), 898ad43ddacSmrg SRC1_ELEM(ELEM_Z), 899b7e1c893Smrg SRC1_NEG(0), 900b7e1c893Smrg INDEX_MODE(SQ_INDEX_LOOP), 901b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 902b7e1c893Smrg LAST(0)); 903ad43ddacSmrg shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 904ad43ddacSmrg SRC2_REL(ABSOLUTE), 905ad43ddacSmrg SRC2_ELEM(ELEM_X), 906ad43ddacSmrg SRC2_NEG(0), 907ad43ddacSmrg ALU_INST(SQ_OP3_INST_MULADD), 908ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 909ad43ddacSmrg DST_GPR(2), 910b7e1c893Smrg DST_REL(ABSOLUTE), 911b7e1c893Smrg DST_ELEM(ELEM_X), 912b7e1c893Smrg CLAMP(1)); 913ad43ddacSmrg /* r2.y = MAD(c2.y, r1.z, pv.y) */ 914921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), 915b7e1c893Smrg SRC0_REL(ABSOLUTE), 916b7e1c893Smrg SRC0_ELEM(ELEM_Y), 917b7e1c893Smrg SRC0_NEG(0), 918921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 1), 919b7e1c893Smrg SRC1_REL(ABSOLUTE), 920ad43ddacSmrg SRC1_ELEM(ELEM_Z), 921b7e1c893Smrg SRC1_NEG(0), 922b7e1c893Smrg INDEX_MODE(SQ_INDEX_LOOP), 923b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 924b7e1c893Smrg LAST(0)); 925ad43ddacSmrg shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 926ad43ddacSmrg SRC2_REL(ABSOLUTE), 927ad43ddacSmrg SRC2_ELEM(ELEM_Y), 928ad43ddacSmrg SRC2_NEG(0), 929ad43ddacSmrg ALU_INST(SQ_OP3_INST_MULADD), 930ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 931b7e1c893Smrg DST_GPR(2), 932b7e1c893Smrg DST_REL(ABSOLUTE), 933b7e1c893Smrg DST_ELEM(ELEM_Y), 934b7e1c893Smrg CLAMP(1)); 935ad43ddacSmrg /* r2.z = MAD(c2.z, r1.z, pv.z) */ 936921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), 937b7e1c893Smrg SRC0_REL(ABSOLUTE), 938b7e1c893Smrg SRC0_ELEM(ELEM_Z), 939b7e1c893Smrg SRC0_NEG(0), 940921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 1), 941b7e1c893Smrg SRC1_REL(ABSOLUTE), 942b7e1c893Smrg SRC1_ELEM(ELEM_Z), 943b7e1c893Smrg SRC1_NEG(0), 944b7e1c893Smrg INDEX_MODE(SQ_INDEX_LOOP), 945b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 946b7e1c893Smrg LAST(0)); 947ad43ddacSmrg shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 948ad43ddacSmrg SRC2_REL(ABSOLUTE), 949ad43ddacSmrg SRC2_ELEM(ELEM_Z), 950ad43ddacSmrg SRC2_NEG(0), 951ad43ddacSmrg ALU_INST(SQ_OP3_INST_MULADD), 952ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 953ad43ddacSmrg DST_GPR(2), 954b7e1c893Smrg DST_REL(ABSOLUTE), 955b7e1c893Smrg DST_ELEM(ELEM_Z), 956b7e1c893Smrg CLAMP(1)); 957ad43ddacSmrg /* r2.w = MAD(0, 0, 1) */ 958ad43ddacSmrg shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 959b7e1c893Smrg SRC0_REL(ABSOLUTE), 960b7e1c893Smrg SRC0_ELEM(ELEM_X), 961b7e1c893Smrg SRC0_NEG(0), 962ad43ddacSmrg SRC1_SEL(SQ_ALU_SRC_0), 963b7e1c893Smrg SRC1_REL(ABSOLUTE), 964b7e1c893Smrg SRC1_ELEM(ELEM_X), 965b7e1c893Smrg SRC1_NEG(0), 966b7e1c893Smrg INDEX_MODE(SQ_INDEX_LOOP), 967b7e1c893Smrg PRED_SEL(SQ_PRED_SEL_OFF), 968b7e1c893Smrg LAST(1)); 969ad43ddacSmrg shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 970ad43ddacSmrg SRC2_REL(ABSOLUTE), 971ad43ddacSmrg SRC2_ELEM(ELEM_X), 972ad43ddacSmrg SRC2_NEG(0), 973ad43ddacSmrg ALU_INST(SQ_OP3_INST_MULADD), 974ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 975ad43ddacSmrg DST_GPR(2), 976b7e1c893Smrg DST_REL(ABSOLUTE), 977b7e1c893Smrg DST_ELEM(ELEM_W), 978b7e1c893Smrg CLAMP(1)); 979ad43ddacSmrg 980ad43ddacSmrg /* 16 */ 981ad43ddacSmrg shader[i++] = CF_DWORD0(ADDR(18)); 982b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 983b7e1c893Smrg CF_CONST(0), 984b7e1c893Smrg COND(SQ_CF_COND_ACTIVE), 985b7e1c893Smrg I_COUNT(3), 986b7e1c893Smrg CALL_COUNT(0), 987b7e1c893Smrg END_OF_PROGRAM(0), 988b7e1c893Smrg VALID_PIXEL_MODE(0), 989b7e1c893Smrg CF_INST(SQ_CF_INST_TEX), 990b7e1c893Smrg WHOLE_QUAD_MODE(0), 991b7e1c893Smrg BARRIER(1)); 992ad43ddacSmrg /* 17 */ 993b7e1c893Smrg shader[i++] = CF_DWORD0(ADDR(0)); 994b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 995b7e1c893Smrg CF_CONST(0), 996b7e1c893Smrg COND(SQ_CF_COND_ACTIVE), 997b7e1c893Smrg I_COUNT(0), 998b7e1c893Smrg CALL_COUNT(0), 999b7e1c893Smrg END_OF_PROGRAM(0), 1000b7e1c893Smrg VALID_PIXEL_MODE(0), 1001b7e1c893Smrg CF_INST(SQ_CF_INST_RETURN), 1002b7e1c893Smrg WHOLE_QUAD_MODE(0), 1003b7e1c893Smrg BARRIER(1)); 1004ad43ddacSmrg /* 18/19 */ 1005b7e1c893Smrg shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1006b7e1c893Smrg BC_FRAC_MODE(0), 1007b7e1c893Smrg FETCH_WHOLE_QUAD(0), 1008b7e1c893Smrg RESOURCE_ID(0), 1009b7e1c893Smrg SRC_GPR(0), 1010b7e1c893Smrg SRC_REL(ABSOLUTE), 1011b7e1c893Smrg R7xx_ALT_CONST(0)); 1012b7e1c893Smrg shader[i++] = TEX_DWORD1(DST_GPR(1), 1013b7e1c893Smrg DST_REL(ABSOLUTE), 1014b7e1c893Smrg DST_SEL_X(SQ_SEL_X), 1015b7e1c893Smrg DST_SEL_Y(SQ_SEL_MASK), 1016b7e1c893Smrg DST_SEL_Z(SQ_SEL_MASK), 1017b7e1c893Smrg DST_SEL_W(SQ_SEL_1), 1018b7e1c893Smrg LOD_BIAS(0), 1019b7e1c893Smrg COORD_TYPE_X(TEX_NORMALIZED), 1020b7e1c893Smrg COORD_TYPE_Y(TEX_NORMALIZED), 1021b7e1c893Smrg COORD_TYPE_Z(TEX_NORMALIZED), 1022b7e1c893Smrg COORD_TYPE_W(TEX_NORMALIZED)); 1023b7e1c893Smrg shader[i++] = TEX_DWORD2(OFFSET_X(0), 1024b7e1c893Smrg OFFSET_Y(0), 1025b7e1c893Smrg OFFSET_Z(0), 1026b7e1c893Smrg SAMPLER_ID(0), 1027b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 1028b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 1029b7e1c893Smrg SRC_SEL_Z(SQ_SEL_0), 1030b7e1c893Smrg SRC_SEL_W(SQ_SEL_1)); 1031b7e1c893Smrg shader[i++] = TEX_DWORD_PAD; 1032ad43ddacSmrg /* 20/21 */ 1033b7e1c893Smrg shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1034b7e1c893Smrg BC_FRAC_MODE(0), 1035b7e1c893Smrg FETCH_WHOLE_QUAD(0), 1036b7e1c893Smrg RESOURCE_ID(1), 1037b7e1c893Smrg SRC_GPR(0), 1038b7e1c893Smrg SRC_REL(ABSOLUTE), 1039b7e1c893Smrg R7xx_ALT_CONST(0)); 1040b7e1c893Smrg shader[i++] = TEX_DWORD1(DST_GPR(1), 1041b7e1c893Smrg DST_REL(ABSOLUTE), 1042b7e1c893Smrg DST_SEL_X(SQ_SEL_MASK), 1043b7e1c893Smrg DST_SEL_Y(SQ_SEL_MASK), 1044b7e1c893Smrg DST_SEL_Z(SQ_SEL_X), 1045b7e1c893Smrg DST_SEL_W(SQ_SEL_MASK), 1046b7e1c893Smrg LOD_BIAS(0), 1047b7e1c893Smrg COORD_TYPE_X(TEX_NORMALIZED), 1048b7e1c893Smrg COORD_TYPE_Y(TEX_NORMALIZED), 1049b7e1c893Smrg COORD_TYPE_Z(TEX_NORMALIZED), 1050b7e1c893Smrg COORD_TYPE_W(TEX_NORMALIZED)); 1051b7e1c893Smrg shader[i++] = TEX_DWORD2(OFFSET_X(0), 1052b7e1c893Smrg OFFSET_Y(0), 1053b7e1c893Smrg OFFSET_Z(0), 1054b7e1c893Smrg SAMPLER_ID(1), 1055b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 1056b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 1057b7e1c893Smrg SRC_SEL_Z(SQ_SEL_0), 1058b7e1c893Smrg SRC_SEL_W(SQ_SEL_1)); 1059b7e1c893Smrg shader[i++] = TEX_DWORD_PAD; 1060ad43ddacSmrg /* 22/23 */ 1061b7e1c893Smrg shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1062b7e1c893Smrg BC_FRAC_MODE(0), 1063b7e1c893Smrg FETCH_WHOLE_QUAD(0), 1064b7e1c893Smrg RESOURCE_ID(2), 1065b7e1c893Smrg SRC_GPR(0), 1066b7e1c893Smrg SRC_REL(ABSOLUTE), 1067b7e1c893Smrg R7xx_ALT_CONST(0)); 1068b7e1c893Smrg shader[i++] = TEX_DWORD1(DST_GPR(1), 1069b7e1c893Smrg DST_REL(ABSOLUTE), 1070b7e1c893Smrg DST_SEL_X(SQ_SEL_MASK), 1071b7e1c893Smrg DST_SEL_Y(SQ_SEL_X), 1072b7e1c893Smrg DST_SEL_Z(SQ_SEL_MASK), 1073b7e1c893Smrg DST_SEL_W(SQ_SEL_MASK), 1074b7e1c893Smrg LOD_BIAS(0), 1075b7e1c893Smrg COORD_TYPE_X(TEX_NORMALIZED), 1076b7e1c893Smrg COORD_TYPE_Y(TEX_NORMALIZED), 1077b7e1c893Smrg COORD_TYPE_Z(TEX_NORMALIZED), 1078b7e1c893Smrg COORD_TYPE_W(TEX_NORMALIZED)); 1079b7e1c893Smrg shader[i++] = TEX_DWORD2(OFFSET_X(0), 1080b7e1c893Smrg OFFSET_Y(0), 1081b7e1c893Smrg OFFSET_Z(0), 1082b7e1c893Smrg SAMPLER_ID(2), 1083b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 1084b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 1085b7e1c893Smrg SRC_SEL_Z(SQ_SEL_0), 1086b7e1c893Smrg SRC_SEL_W(SQ_SEL_1)); 1087b7e1c893Smrg shader[i++] = TEX_DWORD_PAD; 1088ad43ddacSmrg /* 24 */ 1089ad43ddacSmrg shader[i++] = CF_DWORD0(ADDR(26)); 1090b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 1091b7e1c893Smrg CF_CONST(0), 1092b7e1c893Smrg COND(SQ_CF_COND_ACTIVE), 1093b7e1c893Smrg I_COUNT(2), 1094b7e1c893Smrg CALL_COUNT(0), 1095b7e1c893Smrg END_OF_PROGRAM(0), 1096b7e1c893Smrg VALID_PIXEL_MODE(0), 1097b7e1c893Smrg CF_INST(SQ_CF_INST_TEX), 1098b7e1c893Smrg WHOLE_QUAD_MODE(0), 1099b7e1c893Smrg BARRIER(1)); 1100ad43ddacSmrg /* 25 */ 1101b7e1c893Smrg shader[i++] = CF_DWORD0(ADDR(0)); 1102b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 1103b7e1c893Smrg CF_CONST(0), 1104b7e1c893Smrg COND(SQ_CF_COND_ACTIVE), 1105b7e1c893Smrg I_COUNT(0), 1106b7e1c893Smrg CALL_COUNT(0), 1107b7e1c893Smrg END_OF_PROGRAM(0), 1108b7e1c893Smrg VALID_PIXEL_MODE(0), 1109b7e1c893Smrg CF_INST(SQ_CF_INST_RETURN), 1110b7e1c893Smrg WHOLE_QUAD_MODE(0), 1111b7e1c893Smrg BARRIER(1)); 1112ad43ddacSmrg /* 26/27 */ 1113b7e1c893Smrg shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1114b7e1c893Smrg BC_FRAC_MODE(0), 1115b7e1c893Smrg FETCH_WHOLE_QUAD(0), 1116b7e1c893Smrg RESOURCE_ID(0), 1117b7e1c893Smrg SRC_GPR(0), 1118b7e1c893Smrg SRC_REL(ABSOLUTE), 1119b7e1c893Smrg R7xx_ALT_CONST(0)); 1120b7e1c893Smrg shader[i++] = TEX_DWORD1(DST_GPR(1), 1121b7e1c893Smrg DST_REL(ABSOLUTE), 1122b7e1c893Smrg DST_SEL_X(SQ_SEL_X), 1123b7e1c893Smrg DST_SEL_Y(SQ_SEL_MASK), 1124b7e1c893Smrg DST_SEL_Z(SQ_SEL_MASK), 1125b7e1c893Smrg DST_SEL_W(SQ_SEL_1), 1126b7e1c893Smrg LOD_BIAS(0), 1127b7e1c893Smrg COORD_TYPE_X(TEX_NORMALIZED), 1128b7e1c893Smrg COORD_TYPE_Y(TEX_NORMALIZED), 1129b7e1c893Smrg COORD_TYPE_Z(TEX_NORMALIZED), 1130b7e1c893Smrg COORD_TYPE_W(TEX_NORMALIZED)); 1131b7e1c893Smrg shader[i++] = TEX_DWORD2(OFFSET_X(0), 1132b7e1c893Smrg OFFSET_Y(0), 1133b7e1c893Smrg OFFSET_Z(0), 1134b7e1c893Smrg SAMPLER_ID(0), 1135b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 1136b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 1137b7e1c893Smrg SRC_SEL_Z(SQ_SEL_0), 1138b7e1c893Smrg SRC_SEL_W(SQ_SEL_1)); 1139b7e1c893Smrg shader[i++] = TEX_DWORD_PAD; 1140ad43ddacSmrg /* 28/29 */ 1141b7e1c893Smrg shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1142b7e1c893Smrg BC_FRAC_MODE(0), 1143b7e1c893Smrg FETCH_WHOLE_QUAD(0), 1144b7e1c893Smrg RESOURCE_ID(1), 1145b7e1c893Smrg SRC_GPR(0), 1146b7e1c893Smrg SRC_REL(ABSOLUTE), 1147b7e1c893Smrg R7xx_ALT_CONST(0)); 1148b7e1c893Smrg shader[i++] = TEX_DWORD1(DST_GPR(1), 1149b7e1c893Smrg DST_REL(ABSOLUTE), 1150b7e1c893Smrg DST_SEL_X(SQ_SEL_MASK), 1151b7e1c893Smrg DST_SEL_Y(SQ_SEL_X), 1152b7e1c893Smrg DST_SEL_Z(SQ_SEL_Y), 1153b7e1c893Smrg DST_SEL_W(SQ_SEL_MASK), 1154b7e1c893Smrg LOD_BIAS(0), 1155b7e1c893Smrg COORD_TYPE_X(TEX_NORMALIZED), 1156b7e1c893Smrg COORD_TYPE_Y(TEX_NORMALIZED), 1157b7e1c893Smrg COORD_TYPE_Z(TEX_NORMALIZED), 1158b7e1c893Smrg COORD_TYPE_W(TEX_NORMALIZED)); 1159b7e1c893Smrg shader[i++] = TEX_DWORD2(OFFSET_X(0), 1160b7e1c893Smrg OFFSET_Y(0), 1161b7e1c893Smrg OFFSET_Z(0), 1162b7e1c893Smrg SAMPLER_ID(1), 1163b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 1164b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 1165b7e1c893Smrg SRC_SEL_Z(SQ_SEL_0), 1166b7e1c893Smrg SRC_SEL_W(SQ_SEL_1)); 1167b7e1c893Smrg shader[i++] = TEX_DWORD_PAD; 1168b7e1c893Smrg 1169b7e1c893Smrg return i; 1170b7e1c893Smrg} 1171b7e1c893Smrg 1172b7e1c893Smrg/* comp vs --------------------------------------- */ 1173b7e1c893Smrgint R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) 1174b7e1c893Smrg{ 1175b7e1c893Smrg int i = 0; 1176b7e1c893Smrg 1177b7e1c893Smrg /* 0 */ 1178b7e1c893Smrg shader[i++] = CF_DWORD0(ADDR(3)); 1179b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 1180b7e1c893Smrg CF_CONST(0), 1181b7e1c893Smrg COND(SQ_CF_COND_BOOL), 1182b7e1c893Smrg I_COUNT(0), 1183b7e1c893Smrg CALL_COUNT(0), 1184b7e1c893Smrg END_OF_PROGRAM(0), 1185b7e1c893Smrg VALID_PIXEL_MODE(0), 1186b7e1c893Smrg CF_INST(SQ_CF_INST_CALL), 1187b7e1c893Smrg WHOLE_QUAD_MODE(0), 1188b7e1c893Smrg BARRIER(0)); 1189b7e1c893Smrg /* 1 */ 11900974d292Smrg shader[i++] = CF_DWORD0(ADDR(9)); 1191b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 1192b7e1c893Smrg CF_CONST(0), 1193b7e1c893Smrg COND(SQ_CF_COND_NOT_BOOL), 1194b7e1c893Smrg I_COUNT(0), 1195b7e1c893Smrg CALL_COUNT(0), 1196b7e1c893Smrg END_OF_PROGRAM(0), 1197b7e1c893Smrg VALID_PIXEL_MODE(0), 1198b7e1c893Smrg CF_INST(SQ_CF_INST_CALL), 1199b7e1c893Smrg WHOLE_QUAD_MODE(0), 1200b7e1c893Smrg BARRIER(0)); 1201b7e1c893Smrg /* 2 */ 12022f39173dSmrg shader[i++] = CF_DWORD0(ADDR(0)); 1203b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 1204b7e1c893Smrg CF_CONST(0), 1205b7e1c893Smrg COND(SQ_CF_COND_ACTIVE), 1206b7e1c893Smrg I_COUNT(0), 1207b7e1c893Smrg CALL_COUNT(0), 1208b7e1c893Smrg END_OF_PROGRAM(1), 1209b7e1c893Smrg VALID_PIXEL_MODE(0), 1210b7e1c893Smrg CF_INST(SQ_CF_INST_NOP), 1211b7e1c893Smrg WHOLE_QUAD_MODE(0), 1212b7e1c893Smrg BARRIER(1)); 1213b7e1c893Smrg /* 3 - mask sub */ 1214921a55d8Smrg shader[i++] = CF_DWORD0(ADDR(44)); 1215b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 1216b7e1c893Smrg CF_CONST(0), 1217b7e1c893Smrg COND(SQ_CF_COND_ACTIVE), 1218b7e1c893Smrg I_COUNT(3), 1219b7e1c893Smrg CALL_COUNT(0), 1220b7e1c893Smrg END_OF_PROGRAM(0), 1221b7e1c893Smrg VALID_PIXEL_MODE(0), 1222b7e1c893Smrg CF_INST(SQ_CF_INST_VTX), 1223b7e1c893Smrg WHOLE_QUAD_MODE(0), 1224b7e1c893Smrg BARRIER(1)); 1225ad43ddacSmrg 1226ad43ddacSmrg /* 4 - ALU */ 12270974d292Smrg shader[i++] = CF_ALU_DWORD0(ADDR(14), 1228ad43ddacSmrg KCACHE_BANK0(0), 1229ad43ddacSmrg KCACHE_BANK1(0), 1230ad43ddacSmrg KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 1231ad43ddacSmrg shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1232ad43ddacSmrg KCACHE_ADDR0(0), 1233ad43ddacSmrg KCACHE_ADDR1(0), 1234921a55d8Smrg I_COUNT(20), 1235ad43ddacSmrg USES_WATERFALL(0), 1236ad43ddacSmrg CF_INST(SQ_CF_INST_ALU), 1237ad43ddacSmrg WHOLE_QUAD_MODE(0), 1238ad43ddacSmrg BARRIER(1)); 1239ad43ddacSmrg 1240ad43ddacSmrg /* 5 - dst */ 1241b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1242b7e1c893Smrg TYPE(SQ_EXPORT_POS), 1243b7e1c893Smrg RW_GPR(2), 1244b7e1c893Smrg RW_REL(ABSOLUTE), 1245b7e1c893Smrg INDEX_GPR(0), 1246b7e1c893Smrg ELEM_SIZE(0)); 1247b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1248b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 1249ad43ddacSmrg SRC_SEL_Z(SQ_SEL_0), 1250ad43ddacSmrg SRC_SEL_W(SQ_SEL_1), 1251b7e1c893Smrg R6xx_ELEM_LOOP(0), 1252b7e1c893Smrg BURST_COUNT(1), 1253b7e1c893Smrg END_OF_PROGRAM(0), 1254b7e1c893Smrg VALID_PIXEL_MODE(0), 1255b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 1256b7e1c893Smrg WHOLE_QUAD_MODE(0), 1257b7e1c893Smrg BARRIER(1)); 1258ad43ddacSmrg /* 6 - src */ 1259b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1260b7e1c893Smrg TYPE(SQ_EXPORT_PARAM), 1261b7e1c893Smrg RW_GPR(1), 1262b7e1c893Smrg RW_REL(ABSOLUTE), 1263b7e1c893Smrg INDEX_GPR(0), 1264b7e1c893Smrg ELEM_SIZE(0)); 1265b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1266b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 1267ad43ddacSmrg SRC_SEL_Z(SQ_SEL_0), 1268ad43ddacSmrg SRC_SEL_W(SQ_SEL_1), 1269b7e1c893Smrg R6xx_ELEM_LOOP(0), 1270b7e1c893Smrg BURST_COUNT(1), 1271b7e1c893Smrg END_OF_PROGRAM(0), 1272b7e1c893Smrg VALID_PIXEL_MODE(0), 1273b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT), 1274b7e1c893Smrg WHOLE_QUAD_MODE(0), 1275b7e1c893Smrg BARRIER(0)); 1276ad43ddacSmrg /* 7 - mask */ 1277b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), 1278b7e1c893Smrg TYPE(SQ_EXPORT_PARAM), 1279b7e1c893Smrg RW_GPR(0), 1280b7e1c893Smrg RW_REL(ABSOLUTE), 1281b7e1c893Smrg INDEX_GPR(0), 1282b7e1c893Smrg ELEM_SIZE(0)); 1283b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1284b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 1285ad43ddacSmrg SRC_SEL_Z(SQ_SEL_0), 1286ad43ddacSmrg SRC_SEL_W(SQ_SEL_1), 1287b7e1c893Smrg R6xx_ELEM_LOOP(0), 1288b7e1c893Smrg BURST_COUNT(1), 1289b7e1c893Smrg END_OF_PROGRAM(0), 1290b7e1c893Smrg VALID_PIXEL_MODE(0), 1291b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 1292b7e1c893Smrg WHOLE_QUAD_MODE(0), 1293b7e1c893Smrg BARRIER(0)); 1294ad43ddacSmrg /* 8 */ 1295b7e1c893Smrg shader[i++] = CF_DWORD0(ADDR(0)); 1296b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 1297b7e1c893Smrg CF_CONST(0), 1298b7e1c893Smrg COND(SQ_CF_COND_ACTIVE), 1299b7e1c893Smrg I_COUNT(0), 1300b7e1c893Smrg CALL_COUNT(0), 1301b7e1c893Smrg END_OF_PROGRAM(0), 1302b7e1c893Smrg VALID_PIXEL_MODE(0), 1303b7e1c893Smrg CF_INST(SQ_CF_INST_RETURN), 1304b7e1c893Smrg WHOLE_QUAD_MODE(0), 1305b7e1c893Smrg BARRIER(1)); 13060974d292Smrg /* 9 - non-mask sub */ 1307921a55d8Smrg shader[i++] = CF_DWORD0(ADDR(50)); 13080974d292Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 13090974d292Smrg CF_CONST(0), 13100974d292Smrg COND(SQ_CF_COND_ACTIVE), 13110974d292Smrg I_COUNT(2), 13120974d292Smrg CALL_COUNT(0), 13130974d292Smrg END_OF_PROGRAM(0), 13140974d292Smrg VALID_PIXEL_MODE(0), 13150974d292Smrg CF_INST(SQ_CF_INST_VTX), 13160974d292Smrg WHOLE_QUAD_MODE(0), 13170974d292Smrg BARRIER(1)); 1318b7e1c893Smrg 13190974d292Smrg /* 10 - ALU */ 1320921a55d8Smrg shader[i++] = CF_ALU_DWORD0(ADDR(34), 13210974d292Smrg KCACHE_BANK0(0), 13220974d292Smrg KCACHE_BANK1(0), 13230974d292Smrg KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 13240974d292Smrg shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 13250974d292Smrg KCACHE_ADDR0(0), 13260974d292Smrg KCACHE_ADDR1(0), 1327921a55d8Smrg I_COUNT(10), 13280974d292Smrg USES_WATERFALL(0), 13290974d292Smrg CF_INST(SQ_CF_INST_ALU), 13300974d292Smrg WHOLE_QUAD_MODE(0), 13310974d292Smrg BARRIER(1)); 1332ad43ddacSmrg 13330974d292Smrg /* 11 - dst */ 13340974d292Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 13350974d292Smrg TYPE(SQ_EXPORT_POS), 13360974d292Smrg RW_GPR(1), 13370974d292Smrg RW_REL(ABSOLUTE), 13380974d292Smrg INDEX_GPR(0), 13390974d292Smrg ELEM_SIZE(0)); 13400974d292Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 13410974d292Smrg SRC_SEL_Y(SQ_SEL_Y), 13420974d292Smrg SRC_SEL_Z(SQ_SEL_0), 13430974d292Smrg SRC_SEL_W(SQ_SEL_1), 13440974d292Smrg R6xx_ELEM_LOOP(0), 13450974d292Smrg BURST_COUNT(0), 13460974d292Smrg END_OF_PROGRAM(0), 13470974d292Smrg VALID_PIXEL_MODE(0), 13480974d292Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 13490974d292Smrg WHOLE_QUAD_MODE(0), 13500974d292Smrg BARRIER(1)); 13510974d292Smrg /* 12 - src */ 13520974d292Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 13530974d292Smrg TYPE(SQ_EXPORT_PARAM), 13540974d292Smrg RW_GPR(0), 13550974d292Smrg RW_REL(ABSOLUTE), 13560974d292Smrg INDEX_GPR(0), 13570974d292Smrg ELEM_SIZE(0)); 13580974d292Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 13590974d292Smrg SRC_SEL_Y(SQ_SEL_Y), 13600974d292Smrg SRC_SEL_Z(SQ_SEL_0), 13610974d292Smrg SRC_SEL_W(SQ_SEL_1), 13620974d292Smrg R6xx_ELEM_LOOP(0), 13630974d292Smrg BURST_COUNT(0), 13640974d292Smrg END_OF_PROGRAM(0), 13650974d292Smrg VALID_PIXEL_MODE(0), 13660974d292Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 13670974d292Smrg WHOLE_QUAD_MODE(0), 13680974d292Smrg BARRIER(0)); 13690974d292Smrg /* 13 */ 13700974d292Smrg shader[i++] = CF_DWORD0(ADDR(0)); 13710974d292Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 13720974d292Smrg CF_CONST(0), 13730974d292Smrg COND(SQ_CF_COND_ACTIVE), 13740974d292Smrg I_COUNT(0), 13750974d292Smrg CALL_COUNT(0), 13760974d292Smrg END_OF_PROGRAM(0), 13770974d292Smrg VALID_PIXEL_MODE(0), 13780974d292Smrg CF_INST(SQ_CF_INST_RETURN), 13790974d292Smrg WHOLE_QUAD_MODE(0), 13800974d292Smrg BARRIER(1)); 13810974d292Smrg 13820974d292Smrg 1383921a55d8Smrg /* 14 srcX.x DOT4 - mask */ 1384921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1385921a55d8Smrg SRC0_REL(ABSOLUTE), 1386921a55d8Smrg SRC0_ELEM(ELEM_X), 1387921a55d8Smrg SRC0_NEG(0), 1388921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1389921a55d8Smrg SRC1_REL(ABSOLUTE), 1390921a55d8Smrg SRC1_ELEM(ELEM_X), 1391921a55d8Smrg SRC1_NEG(0), 1392921a55d8Smrg INDEX_MODE(SQ_INDEX_LOOP), 1393921a55d8Smrg PRED_SEL(SQ_PRED_SEL_OFF), 1394921a55d8Smrg LAST(0)); 1395921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1396921a55d8Smrg SRC0_ABS(0), 1397921a55d8Smrg SRC1_ABS(0), 1398921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1399921a55d8Smrg UPDATE_PRED(0), 1400921a55d8Smrg WRITE_MASK(1), 1401921a55d8Smrg FOG_MERGE(0), 1402921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1403921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1404921a55d8Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1405921a55d8Smrg DST_GPR(3), 1406921a55d8Smrg DST_REL(ABSOLUTE), 1407921a55d8Smrg DST_ELEM(ELEM_X), 1408921a55d8Smrg CLAMP(0)); 1409921a55d8Smrg 1410921a55d8Smrg /* 15 srcX.y DOT4 - mask */ 1411921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 14120974d292Smrg SRC0_REL(ABSOLUTE), 14130974d292Smrg SRC0_ELEM(ELEM_Y), 14140974d292Smrg SRC0_NEG(0), 1415921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 14160974d292Smrg SRC1_REL(ABSOLUTE), 14170974d292Smrg SRC1_ELEM(ELEM_Y), 14180974d292Smrg SRC1_NEG(0), 14190974d292Smrg INDEX_MODE(SQ_INDEX_LOOP), 14200974d292Smrg PRED_SEL(SQ_PRED_SEL_OFF), 1421921a55d8Smrg LAST(0)); 1422921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1423921a55d8Smrg SRC0_ABS(0), 1424921a55d8Smrg SRC1_ABS(0), 1425921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1426921a55d8Smrg UPDATE_PRED(0), 1427921a55d8Smrg WRITE_MASK(0), 1428921a55d8Smrg FOG_MERGE(0), 1429921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1430921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 14310974d292Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1432921a55d8Smrg DST_GPR(3), 1433921a55d8Smrg DST_REL(ABSOLUTE), 1434921a55d8Smrg DST_ELEM(ELEM_Y), 1435921a55d8Smrg CLAMP(0)); 1436921a55d8Smrg 1437921a55d8Smrg /* 16 srcX.z DOT4 - mask */ 1438921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1439921a55d8Smrg SRC0_REL(ABSOLUTE), 1440921a55d8Smrg SRC0_ELEM(ELEM_Z), 1441921a55d8Smrg SRC0_NEG(0), 1442921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1443921a55d8Smrg SRC1_REL(ABSOLUTE), 1444921a55d8Smrg SRC1_ELEM(ELEM_Z), 1445921a55d8Smrg SRC1_NEG(0), 1446921a55d8Smrg INDEX_MODE(SQ_INDEX_LOOP), 1447921a55d8Smrg PRED_SEL(SQ_PRED_SEL_OFF), 1448921a55d8Smrg LAST(0)); 1449921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1450921a55d8Smrg SRC0_ABS(0), 1451921a55d8Smrg SRC1_ABS(0), 1452921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1453921a55d8Smrg UPDATE_PRED(0), 1454921a55d8Smrg WRITE_MASK(0), 1455921a55d8Smrg FOG_MERGE(0), 1456921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1457921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1458921a55d8Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1459921a55d8Smrg DST_GPR(3), 14600974d292Smrg DST_REL(ABSOLUTE), 14610974d292Smrg DST_ELEM(ELEM_Z), 14620974d292Smrg CLAMP(0)); 1463921a55d8Smrg 1464921a55d8Smrg /* 17 srcX.w DOT4 - mask */ 1465921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1466ad43ddacSmrg SRC0_REL(ABSOLUTE), 1467921a55d8Smrg SRC0_ELEM(ELEM_W), 1468ad43ddacSmrg SRC0_NEG(0), 1469921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1470ad43ddacSmrg SRC1_REL(ABSOLUTE), 1471921a55d8Smrg SRC1_ELEM(ELEM_W), 1472ad43ddacSmrg SRC1_NEG(0), 1473ad43ddacSmrg INDEX_MODE(SQ_INDEX_LOOP), 1474ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 1475ad43ddacSmrg LAST(1)); 1476921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1477921a55d8Smrg SRC0_ABS(0), 1478921a55d8Smrg SRC1_ABS(0), 1479921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1480921a55d8Smrg UPDATE_PRED(0), 1481921a55d8Smrg WRITE_MASK(0), 1482921a55d8Smrg FOG_MERGE(0), 1483921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1484921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1485ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1486921a55d8Smrg DST_GPR(3), 1487ad43ddacSmrg DST_REL(ABSOLUTE), 1488ad43ddacSmrg DST_ELEM(ELEM_W), 1489ad43ddacSmrg CLAMP(0)); 1490ad43ddacSmrg 1491921a55d8Smrg /* 18 srcY.x DOT4 - mask */ 1492921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1493ad43ddacSmrg SRC0_REL(ABSOLUTE), 1494ad43ddacSmrg SRC0_ELEM(ELEM_X), 1495ad43ddacSmrg SRC0_NEG(0), 1496921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1497ad43ddacSmrg SRC1_REL(ABSOLUTE), 1498ad43ddacSmrg SRC1_ELEM(ELEM_X), 1499ad43ddacSmrg SRC1_NEG(0), 1500ad43ddacSmrg INDEX_MODE(SQ_INDEX_LOOP), 1501ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 1502ad43ddacSmrg LAST(0)); 1503921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1504921a55d8Smrg SRC0_ABS(0), 1505921a55d8Smrg SRC1_ABS(0), 1506921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1507921a55d8Smrg UPDATE_PRED(0), 1508921a55d8Smrg WRITE_MASK(0), 1509921a55d8Smrg FOG_MERGE(0), 1510921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1511921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1512ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1513921a55d8Smrg DST_GPR(3), 1514ad43ddacSmrg DST_REL(ABSOLUTE), 1515ad43ddacSmrg DST_ELEM(ELEM_X), 1516ad43ddacSmrg CLAMP(0)); 1517921a55d8Smrg 1518921a55d8Smrg /* 19 srcY.y DOT4 - mask */ 1519921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1520921a55d8Smrg SRC0_REL(ABSOLUTE), 1521921a55d8Smrg SRC0_ELEM(ELEM_Y), 1522921a55d8Smrg SRC0_NEG(0), 1523921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1524921a55d8Smrg SRC1_REL(ABSOLUTE), 1525921a55d8Smrg SRC1_ELEM(ELEM_Y), 1526921a55d8Smrg SRC1_NEG(0), 1527921a55d8Smrg INDEX_MODE(SQ_INDEX_LOOP), 1528921a55d8Smrg PRED_SEL(SQ_PRED_SEL_OFF), 1529921a55d8Smrg LAST(0)); 1530921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1531921a55d8Smrg SRC0_ABS(0), 1532921a55d8Smrg SRC1_ABS(0), 1533921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1534921a55d8Smrg UPDATE_PRED(0), 1535921a55d8Smrg WRITE_MASK(1), 1536921a55d8Smrg FOG_MERGE(0), 1537921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1538921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1539921a55d8Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1540921a55d8Smrg DST_GPR(3), 1541921a55d8Smrg DST_REL(ABSOLUTE), 1542921a55d8Smrg DST_ELEM(ELEM_Y), 1543921a55d8Smrg CLAMP(0)); 1544921a55d8Smrg 1545921a55d8Smrg /* 20 srcY.z DOT4 - mask */ 1546921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1547921a55d8Smrg SRC0_REL(ABSOLUTE), 1548921a55d8Smrg SRC0_ELEM(ELEM_Z), 1549921a55d8Smrg SRC0_NEG(0), 1550921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1551921a55d8Smrg SRC1_REL(ABSOLUTE), 1552921a55d8Smrg SRC1_ELEM(ELEM_Z), 1553921a55d8Smrg SRC1_NEG(0), 1554921a55d8Smrg INDEX_MODE(SQ_INDEX_LOOP), 1555921a55d8Smrg PRED_SEL(SQ_PRED_SEL_OFF), 1556921a55d8Smrg LAST(0)); 1557921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1558921a55d8Smrg SRC0_ABS(0), 1559921a55d8Smrg SRC1_ABS(0), 1560921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1561921a55d8Smrg UPDATE_PRED(0), 1562921a55d8Smrg WRITE_MASK(0), 1563921a55d8Smrg FOG_MERGE(0), 1564921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1565921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1566921a55d8Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1567921a55d8Smrg DST_GPR(3), 1568921a55d8Smrg DST_REL(ABSOLUTE), 1569921a55d8Smrg DST_ELEM(ELEM_Z), 1570921a55d8Smrg CLAMP(0)); 1571921a55d8Smrg 1572921a55d8Smrg /* 21 srcY.w DOT4 - mask */ 1573921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1574921a55d8Smrg SRC0_REL(ABSOLUTE), 1575921a55d8Smrg SRC0_ELEM(ELEM_W), 1576921a55d8Smrg SRC0_NEG(0), 1577921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1578921a55d8Smrg SRC1_REL(ABSOLUTE), 1579921a55d8Smrg SRC1_ELEM(ELEM_W), 1580921a55d8Smrg SRC1_NEG(0), 1581921a55d8Smrg INDEX_MODE(SQ_INDEX_LOOP), 1582921a55d8Smrg PRED_SEL(SQ_PRED_SEL_OFF), 1583921a55d8Smrg LAST(1)); 1584921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1585921a55d8Smrg SRC0_ABS(0), 1586921a55d8Smrg SRC1_ABS(0), 1587921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1588921a55d8Smrg UPDATE_PRED(0), 1589921a55d8Smrg WRITE_MASK(0), 1590921a55d8Smrg FOG_MERGE(0), 1591921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1592921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1593921a55d8Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1594921a55d8Smrg DST_GPR(3), 1595921a55d8Smrg DST_REL(ABSOLUTE), 1596921a55d8Smrg DST_ELEM(ELEM_W), 1597921a55d8Smrg CLAMP(0)); 1598921a55d8Smrg 1599921a55d8Smrg /* 22 maskX.x DOT4 - mask */ 1600921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1601ad43ddacSmrg SRC0_REL(ABSOLUTE), 1602ad43ddacSmrg SRC0_ELEM(ELEM_X), 1603ad43ddacSmrg SRC0_NEG(0), 1604921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1605ad43ddacSmrg SRC1_REL(ABSOLUTE), 1606ad43ddacSmrg SRC1_ELEM(ELEM_X), 1607ad43ddacSmrg SRC1_NEG(0), 1608ad43ddacSmrg INDEX_MODE(SQ_INDEX_LOOP), 1609ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 1610921a55d8Smrg LAST(0)); 1611921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1612921a55d8Smrg SRC0_ABS(0), 1613921a55d8Smrg SRC1_ABS(0), 1614921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1615921a55d8Smrg UPDATE_PRED(0), 1616921a55d8Smrg WRITE_MASK(1), 1617921a55d8Smrg FOG_MERGE(0), 1618921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1619921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1620ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1621921a55d8Smrg DST_GPR(4), 1622ad43ddacSmrg DST_REL(ABSOLUTE), 1623921a55d8Smrg DST_ELEM(ELEM_X), 1624ad43ddacSmrg CLAMP(0)); 1625ad43ddacSmrg 1626921a55d8Smrg /* 23 maskX.y DOT4 - mask */ 1627921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1628ad43ddacSmrg SRC0_REL(ABSOLUTE), 1629ad43ddacSmrg SRC0_ELEM(ELEM_Y), 1630ad43ddacSmrg SRC0_NEG(0), 1631921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1632ad43ddacSmrg SRC1_REL(ABSOLUTE), 1633ad43ddacSmrg SRC1_ELEM(ELEM_Y), 1634ad43ddacSmrg SRC1_NEG(0), 1635ad43ddacSmrg INDEX_MODE(SQ_INDEX_LOOP), 1636ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 1637921a55d8Smrg LAST(0)); 1638921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1639921a55d8Smrg SRC0_ABS(0), 1640921a55d8Smrg SRC1_ABS(0), 1641921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1642921a55d8Smrg UPDATE_PRED(0), 1643921a55d8Smrg WRITE_MASK(0), 1644921a55d8Smrg FOG_MERGE(0), 1645921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1646921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1647ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1648921a55d8Smrg DST_GPR(4), 1649921a55d8Smrg DST_REL(ABSOLUTE), 1650921a55d8Smrg DST_ELEM(ELEM_Y), 1651921a55d8Smrg CLAMP(0)); 1652921a55d8Smrg 1653921a55d8Smrg /* 24 maskX.z DOT4 - mask */ 1654921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1655921a55d8Smrg SRC0_REL(ABSOLUTE), 1656921a55d8Smrg SRC0_ELEM(ELEM_Z), 1657921a55d8Smrg SRC0_NEG(0), 1658921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1659921a55d8Smrg SRC1_REL(ABSOLUTE), 1660921a55d8Smrg SRC1_ELEM(ELEM_Z), 1661921a55d8Smrg SRC1_NEG(0), 1662921a55d8Smrg INDEX_MODE(SQ_INDEX_LOOP), 1663921a55d8Smrg PRED_SEL(SQ_PRED_SEL_OFF), 1664921a55d8Smrg LAST(0)); 1665921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1666921a55d8Smrg SRC0_ABS(0), 1667921a55d8Smrg SRC1_ABS(0), 1668921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1669921a55d8Smrg UPDATE_PRED(0), 1670921a55d8Smrg WRITE_MASK(0), 1671921a55d8Smrg FOG_MERGE(0), 1672921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1673921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1674921a55d8Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1675921a55d8Smrg DST_GPR(4), 1676ad43ddacSmrg DST_REL(ABSOLUTE), 1677ad43ddacSmrg DST_ELEM(ELEM_Z), 1678ad43ddacSmrg CLAMP(0)); 1679ad43ddacSmrg 1680921a55d8Smrg /* 25 maskX.w DOT4 - mask */ 1681921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1682ad43ddacSmrg SRC0_REL(ABSOLUTE), 1683921a55d8Smrg SRC0_ELEM(ELEM_W), 1684ad43ddacSmrg SRC0_NEG(0), 1685921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1686ad43ddacSmrg SRC1_REL(ABSOLUTE), 1687921a55d8Smrg SRC1_ELEM(ELEM_W), 1688ad43ddacSmrg SRC1_NEG(0), 1689ad43ddacSmrg INDEX_MODE(SQ_INDEX_LOOP), 1690ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 1691ad43ddacSmrg LAST(1)); 1692921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1693921a55d8Smrg SRC0_ABS(0), 1694921a55d8Smrg SRC1_ABS(0), 1695921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1696921a55d8Smrg UPDATE_PRED(0), 1697921a55d8Smrg WRITE_MASK(0), 1698921a55d8Smrg FOG_MERGE(0), 1699921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1700921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1701ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1702921a55d8Smrg DST_GPR(4), 1703ad43ddacSmrg DST_REL(ABSOLUTE), 1704ad43ddacSmrg DST_ELEM(ELEM_W), 1705ad43ddacSmrg CLAMP(0)); 1706ad43ddacSmrg 1707921a55d8Smrg /* 26 maskY.x DOT4 - mask */ 1708921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1709ad43ddacSmrg SRC0_REL(ABSOLUTE), 1710ad43ddacSmrg SRC0_ELEM(ELEM_X), 1711ad43ddacSmrg SRC0_NEG(0), 1712921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1713ad43ddacSmrg SRC1_REL(ABSOLUTE), 1714ad43ddacSmrg SRC1_ELEM(ELEM_X), 1715ad43ddacSmrg SRC1_NEG(0), 1716ad43ddacSmrg INDEX_MODE(SQ_INDEX_LOOP), 1717ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 1718ad43ddacSmrg LAST(0)); 1719921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1720921a55d8Smrg SRC0_ABS(0), 1721921a55d8Smrg SRC1_ABS(0), 1722921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1723921a55d8Smrg UPDATE_PRED(0), 1724921a55d8Smrg WRITE_MASK(0), 1725921a55d8Smrg FOG_MERGE(0), 1726921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1727921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1728ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1729921a55d8Smrg DST_GPR(4), 1730ad43ddacSmrg DST_REL(ABSOLUTE), 1731ad43ddacSmrg DST_ELEM(ELEM_X), 1732ad43ddacSmrg CLAMP(0)); 1733921a55d8Smrg 1734921a55d8Smrg /* 27 maskY.y DOT4 - mask */ 1735921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1736ad43ddacSmrg SRC0_REL(ABSOLUTE), 1737921a55d8Smrg SRC0_ELEM(ELEM_Y), 1738ad43ddacSmrg SRC0_NEG(0), 1739921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1740ad43ddacSmrg SRC1_REL(ABSOLUTE), 1741921a55d8Smrg SRC1_ELEM(ELEM_Y), 1742ad43ddacSmrg SRC1_NEG(0), 1743ad43ddacSmrg INDEX_MODE(SQ_INDEX_LOOP), 1744ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 1745921a55d8Smrg LAST(0)); 1746921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1747921a55d8Smrg SRC0_ABS(0), 1748921a55d8Smrg SRC1_ABS(0), 1749921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1750921a55d8Smrg UPDATE_PRED(0), 1751921a55d8Smrg WRITE_MASK(1), 1752921a55d8Smrg FOG_MERGE(0), 1753921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1754921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1755ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1756921a55d8Smrg DST_GPR(4), 1757ad43ddacSmrg DST_REL(ABSOLUTE), 1758ad43ddacSmrg DST_ELEM(ELEM_Y), 1759ad43ddacSmrg CLAMP(0)); 1760ad43ddacSmrg 1761921a55d8Smrg /* 28 maskY.z DOT4 - mask */ 1762921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1763921a55d8Smrg SRC0_REL(ABSOLUTE), 1764921a55d8Smrg SRC0_ELEM(ELEM_Z), 1765921a55d8Smrg SRC0_NEG(0), 1766921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1767921a55d8Smrg SRC1_REL(ABSOLUTE), 1768921a55d8Smrg SRC1_ELEM(ELEM_Z), 1769921a55d8Smrg SRC1_NEG(0), 1770921a55d8Smrg INDEX_MODE(SQ_INDEX_LOOP), 1771921a55d8Smrg PRED_SEL(SQ_PRED_SEL_OFF), 1772921a55d8Smrg LAST(0)); 1773921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1774921a55d8Smrg SRC0_ABS(0), 1775921a55d8Smrg SRC1_ABS(0), 1776921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1777921a55d8Smrg UPDATE_PRED(0), 1778921a55d8Smrg WRITE_MASK(0), 1779921a55d8Smrg FOG_MERGE(0), 1780921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1781921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1782921a55d8Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1783921a55d8Smrg DST_GPR(4), 1784921a55d8Smrg DST_REL(ABSOLUTE), 1785921a55d8Smrg DST_ELEM(ELEM_Z), 1786921a55d8Smrg CLAMP(0)); 1787921a55d8Smrg 1788921a55d8Smrg /* 29 maskY.w DOT4 - mask */ 1789921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1790921a55d8Smrg SRC0_REL(ABSOLUTE), 1791921a55d8Smrg SRC0_ELEM(ELEM_W), 1792921a55d8Smrg SRC0_NEG(0), 1793921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1794921a55d8Smrg SRC1_REL(ABSOLUTE), 1795921a55d8Smrg SRC1_ELEM(ELEM_W), 1796921a55d8Smrg SRC1_NEG(0), 1797921a55d8Smrg INDEX_MODE(SQ_INDEX_LOOP), 1798921a55d8Smrg PRED_SEL(SQ_PRED_SEL_OFF), 1799921a55d8Smrg LAST(1)); 1800921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1801921a55d8Smrg SRC0_ABS(0), 1802921a55d8Smrg SRC1_ABS(0), 1803921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1804921a55d8Smrg UPDATE_PRED(0), 1805921a55d8Smrg WRITE_MASK(0), 1806921a55d8Smrg FOG_MERGE(0), 1807921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1808921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1809921a55d8Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1810921a55d8Smrg DST_GPR(4), 1811921a55d8Smrg DST_REL(ABSOLUTE), 1812921a55d8Smrg DST_ELEM(ELEM_W), 1813921a55d8Smrg CLAMP(0)); 1814921a55d8Smrg 1815921a55d8Smrg /* 30 srcX / w */ 1816921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1817ad43ddacSmrg SRC0_REL(ABSOLUTE), 1818ad43ddacSmrg SRC0_ELEM(ELEM_X), 1819ad43ddacSmrg SRC0_NEG(0), 1820921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1821ad43ddacSmrg SRC1_REL(ABSOLUTE), 1822ad43ddacSmrg SRC1_ELEM(ELEM_W), 1823ad43ddacSmrg SRC1_NEG(0), 1824ad43ddacSmrg INDEX_MODE(SQ_INDEX_AR_X), 1825ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 1826ad43ddacSmrg LAST(1)); 1827ad43ddacSmrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1828ad43ddacSmrg SRC0_ABS(0), 1829ad43ddacSmrg SRC1_ABS(0), 1830ad43ddacSmrg UPDATE_EXECUTE_MASK(0), 1831ad43ddacSmrg UPDATE_PRED(0), 1832ad43ddacSmrg WRITE_MASK(1), 1833ad43ddacSmrg FOG_MERGE(0), 1834ad43ddacSmrg OMOD(SQ_ALU_OMOD_OFF), 1835ad43ddacSmrg ALU_INST(SQ_OP2_INST_MUL), 1836ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1837ad43ddacSmrg DST_GPR(1), 1838ad43ddacSmrg DST_REL(ABSOLUTE), 1839ad43ddacSmrg DST_ELEM(ELEM_X), 1840ad43ddacSmrg CLAMP(0)); 1841ad43ddacSmrg 1842921a55d8Smrg /* 31 srcY / h */ 1843921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1844ad43ddacSmrg SRC0_REL(ABSOLUTE), 1845ad43ddacSmrg SRC0_ELEM(ELEM_Y), 1846ad43ddacSmrg SRC0_NEG(0), 1847921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1848ad43ddacSmrg SRC1_REL(ABSOLUTE), 1849ad43ddacSmrg SRC1_ELEM(ELEM_W), 1850ad43ddacSmrg SRC1_NEG(0), 1851ad43ddacSmrg INDEX_MODE(SQ_INDEX_AR_X), 1852ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 1853ad43ddacSmrg LAST(1)); 1854ad43ddacSmrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1855ad43ddacSmrg SRC0_ABS(0), 1856ad43ddacSmrg SRC1_ABS(0), 1857ad43ddacSmrg UPDATE_EXECUTE_MASK(0), 1858ad43ddacSmrg UPDATE_PRED(0), 1859ad43ddacSmrg WRITE_MASK(1), 1860ad43ddacSmrg FOG_MERGE(0), 1861ad43ddacSmrg OMOD(SQ_ALU_OMOD_OFF), 1862ad43ddacSmrg ALU_INST(SQ_OP2_INST_MUL), 1863ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1864ad43ddacSmrg DST_GPR(1), 1865ad43ddacSmrg DST_REL(ABSOLUTE), 1866ad43ddacSmrg DST_ELEM(ELEM_Y), 1867ad43ddacSmrg CLAMP(0)); 1868ad43ddacSmrg 1869921a55d8Smrg /* 32 maskX / w */ 1870921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 1871ad43ddacSmrg SRC0_REL(ABSOLUTE), 1872ad43ddacSmrg SRC0_ELEM(ELEM_X), 1873ad43ddacSmrg SRC0_NEG(0), 1874921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1875ad43ddacSmrg SRC1_REL(ABSOLUTE), 1876ad43ddacSmrg SRC1_ELEM(ELEM_W), 1877ad43ddacSmrg SRC1_NEG(0), 1878ad43ddacSmrg INDEX_MODE(SQ_INDEX_AR_X), 1879ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 1880ad43ddacSmrg LAST(1)); 1881ad43ddacSmrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1882ad43ddacSmrg SRC0_ABS(0), 1883ad43ddacSmrg SRC1_ABS(0), 1884ad43ddacSmrg UPDATE_EXECUTE_MASK(0), 1885ad43ddacSmrg UPDATE_PRED(0), 1886ad43ddacSmrg WRITE_MASK(1), 1887ad43ddacSmrg FOG_MERGE(0), 1888ad43ddacSmrg OMOD(SQ_ALU_OMOD_OFF), 1889ad43ddacSmrg ALU_INST(SQ_OP2_INST_MUL), 1890ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1891ad43ddacSmrg DST_GPR(0), 1892ad43ddacSmrg DST_REL(ABSOLUTE), 1893ad43ddacSmrg DST_ELEM(ELEM_X), 1894ad43ddacSmrg CLAMP(0)); 1895ad43ddacSmrg 1896921a55d8Smrg /* 33 maskY / h */ 1897921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 1898ad43ddacSmrg SRC0_REL(ABSOLUTE), 1899ad43ddacSmrg SRC0_ELEM(ELEM_Y), 1900ad43ddacSmrg SRC0_NEG(0), 1901921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1902ad43ddacSmrg SRC1_REL(ABSOLUTE), 1903ad43ddacSmrg SRC1_ELEM(ELEM_W), 1904ad43ddacSmrg SRC1_NEG(0), 1905ad43ddacSmrg INDEX_MODE(SQ_INDEX_AR_X), 1906ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 1907ad43ddacSmrg LAST(1)); 1908ad43ddacSmrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1909ad43ddacSmrg SRC0_ABS(0), 1910ad43ddacSmrg SRC1_ABS(0), 1911ad43ddacSmrg UPDATE_EXECUTE_MASK(0), 1912ad43ddacSmrg UPDATE_PRED(0), 1913ad43ddacSmrg WRITE_MASK(1), 1914ad43ddacSmrg FOG_MERGE(0), 1915ad43ddacSmrg OMOD(SQ_ALU_OMOD_OFF), 1916ad43ddacSmrg ALU_INST(SQ_OP2_INST_MUL), 1917ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1918ad43ddacSmrg DST_GPR(0), 1919ad43ddacSmrg DST_REL(ABSOLUTE), 1920ad43ddacSmrg DST_ELEM(ELEM_Y), 1921ad43ddacSmrg CLAMP(0)); 1922ad43ddacSmrg 1923921a55d8Smrg /* 34 srcX.x DOT4 - non-mask */ 1924921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1925921a55d8Smrg SRC0_REL(ABSOLUTE), 1926921a55d8Smrg SRC0_ELEM(ELEM_X), 1927921a55d8Smrg SRC0_NEG(0), 1928921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1929921a55d8Smrg SRC1_REL(ABSOLUTE), 1930921a55d8Smrg SRC1_ELEM(ELEM_X), 1931921a55d8Smrg SRC1_NEG(0), 1932921a55d8Smrg INDEX_MODE(SQ_INDEX_LOOP), 1933921a55d8Smrg PRED_SEL(SQ_PRED_SEL_OFF), 1934921a55d8Smrg LAST(0)); 1935921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1936921a55d8Smrg SRC0_ABS(0), 1937921a55d8Smrg SRC1_ABS(0), 1938921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1939921a55d8Smrg UPDATE_PRED(0), 1940921a55d8Smrg WRITE_MASK(1), 1941921a55d8Smrg FOG_MERGE(0), 1942921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1943921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1944921a55d8Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1945921a55d8Smrg DST_GPR(2), 1946921a55d8Smrg DST_REL(ABSOLUTE), 1947921a55d8Smrg DST_ELEM(ELEM_X), 1948921a55d8Smrg CLAMP(0)); 1949921a55d8Smrg 1950921a55d8Smrg /* 35 srcX.y DOT4 - non-mask */ 1951921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1952ad43ddacSmrg SRC0_REL(ABSOLUTE), 1953ad43ddacSmrg SRC0_ELEM(ELEM_Y), 1954ad43ddacSmrg SRC0_NEG(0), 1955921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1956ad43ddacSmrg SRC1_REL(ABSOLUTE), 1957ad43ddacSmrg SRC1_ELEM(ELEM_Y), 1958ad43ddacSmrg SRC1_NEG(0), 1959ad43ddacSmrg INDEX_MODE(SQ_INDEX_LOOP), 1960ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 1961921a55d8Smrg LAST(0)); 1962921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1963921a55d8Smrg SRC0_ABS(0), 1964921a55d8Smrg SRC1_ABS(0), 1965921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1966921a55d8Smrg UPDATE_PRED(0), 1967921a55d8Smrg WRITE_MASK(0), 1968921a55d8Smrg FOG_MERGE(0), 1969921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1970921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1971ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1972921a55d8Smrg DST_GPR(2), 1973921a55d8Smrg DST_REL(ABSOLUTE), 1974921a55d8Smrg DST_ELEM(ELEM_Y), 1975921a55d8Smrg CLAMP(0)); 1976921a55d8Smrg 1977921a55d8Smrg /* 36 srcX.z DOT4 - non-mask */ 1978921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1979921a55d8Smrg SRC0_REL(ABSOLUTE), 1980921a55d8Smrg SRC0_ELEM(ELEM_Z), 1981921a55d8Smrg SRC0_NEG(0), 1982921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1983921a55d8Smrg SRC1_REL(ABSOLUTE), 1984921a55d8Smrg SRC1_ELEM(ELEM_Z), 1985921a55d8Smrg SRC1_NEG(0), 1986921a55d8Smrg INDEX_MODE(SQ_INDEX_LOOP), 1987921a55d8Smrg PRED_SEL(SQ_PRED_SEL_OFF), 1988921a55d8Smrg LAST(0)); 1989921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 1990921a55d8Smrg SRC0_ABS(0), 1991921a55d8Smrg SRC1_ABS(0), 1992921a55d8Smrg UPDATE_EXECUTE_MASK(0), 1993921a55d8Smrg UPDATE_PRED(0), 1994921a55d8Smrg WRITE_MASK(0), 1995921a55d8Smrg FOG_MERGE(0), 1996921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 1997921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 1998921a55d8Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 1999921a55d8Smrg DST_GPR(2), 2000ad43ddacSmrg DST_REL(ABSOLUTE), 2001ad43ddacSmrg DST_ELEM(ELEM_Z), 2002ad43ddacSmrg CLAMP(0)); 2003921a55d8Smrg 2004921a55d8Smrg /* 37 srcX.w DOT4 - non-mask */ 2005921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2006ad43ddacSmrg SRC0_REL(ABSOLUTE), 2007921a55d8Smrg SRC0_ELEM(ELEM_W), 2008ad43ddacSmrg SRC0_NEG(0), 2009921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 2010ad43ddacSmrg SRC1_REL(ABSOLUTE), 2011921a55d8Smrg SRC1_ELEM(ELEM_W), 2012ad43ddacSmrg SRC1_NEG(0), 2013ad43ddacSmrg INDEX_MODE(SQ_INDEX_LOOP), 2014ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 2015ad43ddacSmrg LAST(1)); 2016921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 2017921a55d8Smrg SRC0_ABS(0), 2018921a55d8Smrg SRC1_ABS(0), 2019921a55d8Smrg UPDATE_EXECUTE_MASK(0), 2020921a55d8Smrg UPDATE_PRED(0), 2021921a55d8Smrg WRITE_MASK(0), 2022921a55d8Smrg FOG_MERGE(0), 2023921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 2024921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 2025ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 2026921a55d8Smrg DST_GPR(2), 2027ad43ddacSmrg DST_REL(ABSOLUTE), 2028ad43ddacSmrg DST_ELEM(ELEM_W), 2029ad43ddacSmrg CLAMP(0)); 2030ad43ddacSmrg 2031921a55d8Smrg /* 38 srcY.x DOT4 - non-mask */ 2032921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2033ad43ddacSmrg SRC0_REL(ABSOLUTE), 2034ad43ddacSmrg SRC0_ELEM(ELEM_X), 2035ad43ddacSmrg SRC0_NEG(0), 2036921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2037ad43ddacSmrg SRC1_REL(ABSOLUTE), 2038ad43ddacSmrg SRC1_ELEM(ELEM_X), 2039ad43ddacSmrg SRC1_NEG(0), 2040ad43ddacSmrg INDEX_MODE(SQ_INDEX_LOOP), 2041ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 2042ad43ddacSmrg LAST(0)); 2043921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 2044921a55d8Smrg SRC0_ABS(0), 2045921a55d8Smrg SRC1_ABS(0), 2046921a55d8Smrg UPDATE_EXECUTE_MASK(0), 2047921a55d8Smrg UPDATE_PRED(0), 2048921a55d8Smrg WRITE_MASK(0), 2049921a55d8Smrg FOG_MERGE(0), 2050921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 2051921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 2052ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 2053921a55d8Smrg DST_GPR(2), 2054ad43ddacSmrg DST_REL(ABSOLUTE), 2055ad43ddacSmrg DST_ELEM(ELEM_X), 2056ad43ddacSmrg CLAMP(0)); 2057921a55d8Smrg 2058921a55d8Smrg /* 39 srcY.y DOT4 - non-mask */ 2059921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2060ad43ddacSmrg SRC0_REL(ABSOLUTE), 2061921a55d8Smrg SRC0_ELEM(ELEM_Y), 2062ad43ddacSmrg SRC0_NEG(0), 2063921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2064ad43ddacSmrg SRC1_REL(ABSOLUTE), 2065921a55d8Smrg SRC1_ELEM(ELEM_Y), 2066ad43ddacSmrg SRC1_NEG(0), 2067ad43ddacSmrg INDEX_MODE(SQ_INDEX_LOOP), 2068ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 2069921a55d8Smrg LAST(0)); 2070921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 2071921a55d8Smrg SRC0_ABS(0), 2072921a55d8Smrg SRC1_ABS(0), 2073921a55d8Smrg UPDATE_EXECUTE_MASK(0), 2074921a55d8Smrg UPDATE_PRED(0), 2075921a55d8Smrg WRITE_MASK(1), 2076921a55d8Smrg FOG_MERGE(0), 2077921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 2078921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 2079ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 2080921a55d8Smrg DST_GPR(2), 2081ad43ddacSmrg DST_REL(ABSOLUTE), 2082ad43ddacSmrg DST_ELEM(ELEM_Y), 2083ad43ddacSmrg CLAMP(0)); 2084921a55d8Smrg 2085921a55d8Smrg /* 40 srcY.z DOT4 - non-mask */ 2086921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2087921a55d8Smrg SRC0_REL(ABSOLUTE), 2088921a55d8Smrg SRC0_ELEM(ELEM_Z), 2089921a55d8Smrg SRC0_NEG(0), 2090921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2091921a55d8Smrg SRC1_REL(ABSOLUTE), 2092921a55d8Smrg SRC1_ELEM(ELEM_Z), 2093921a55d8Smrg SRC1_NEG(0), 2094921a55d8Smrg INDEX_MODE(SQ_INDEX_LOOP), 2095921a55d8Smrg PRED_SEL(SQ_PRED_SEL_OFF), 2096921a55d8Smrg LAST(0)); 2097921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 2098921a55d8Smrg SRC0_ABS(0), 2099921a55d8Smrg SRC1_ABS(0), 2100921a55d8Smrg UPDATE_EXECUTE_MASK(0), 2101921a55d8Smrg UPDATE_PRED(0), 2102921a55d8Smrg WRITE_MASK(0), 2103921a55d8Smrg FOG_MERGE(0), 2104921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 2105921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 2106921a55d8Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 2107921a55d8Smrg DST_GPR(2), 2108921a55d8Smrg DST_REL(ABSOLUTE), 2109921a55d8Smrg DST_ELEM(ELEM_Z), 2110921a55d8Smrg CLAMP(0)); 2111921a55d8Smrg 2112921a55d8Smrg /* 41 srcY.w DOT4 - non-mask */ 2113921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2114921a55d8Smrg SRC0_REL(ABSOLUTE), 2115921a55d8Smrg SRC0_ELEM(ELEM_W), 2116921a55d8Smrg SRC0_NEG(0), 2117921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2118921a55d8Smrg SRC1_REL(ABSOLUTE), 2119921a55d8Smrg SRC1_ELEM(ELEM_W), 2120921a55d8Smrg SRC1_NEG(0), 2121921a55d8Smrg INDEX_MODE(SQ_INDEX_LOOP), 2122921a55d8Smrg PRED_SEL(SQ_PRED_SEL_OFF), 2123921a55d8Smrg LAST(1)); 2124921a55d8Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 2125921a55d8Smrg SRC0_ABS(0), 2126921a55d8Smrg SRC1_ABS(0), 2127921a55d8Smrg UPDATE_EXECUTE_MASK(0), 2128921a55d8Smrg UPDATE_PRED(0), 2129921a55d8Smrg WRITE_MASK(0), 2130921a55d8Smrg FOG_MERGE(0), 2131921a55d8Smrg OMOD(SQ_ALU_OMOD_OFF), 2132921a55d8Smrg ALU_INST(SQ_OP2_INST_DOT4), 2133921a55d8Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 2134921a55d8Smrg DST_GPR(2), 2135921a55d8Smrg DST_REL(ABSOLUTE), 2136921a55d8Smrg DST_ELEM(ELEM_W), 2137921a55d8Smrg CLAMP(0)); 2138921a55d8Smrg 2139921a55d8Smrg /* 42 srcX / w */ 2140921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2141ad43ddacSmrg SRC0_REL(ABSOLUTE), 2142ad43ddacSmrg SRC0_ELEM(ELEM_X), 2143ad43ddacSmrg SRC0_NEG(0), 2144921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 2145ad43ddacSmrg SRC1_REL(ABSOLUTE), 2146ad43ddacSmrg SRC1_ELEM(ELEM_W), 2147ad43ddacSmrg SRC1_NEG(0), 2148ad43ddacSmrg INDEX_MODE(SQ_INDEX_AR_X), 2149ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 2150ad43ddacSmrg LAST(1)); 2151ad43ddacSmrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 2152ad43ddacSmrg SRC0_ABS(0), 2153ad43ddacSmrg SRC1_ABS(0), 2154ad43ddacSmrg UPDATE_EXECUTE_MASK(0), 2155ad43ddacSmrg UPDATE_PRED(0), 2156ad43ddacSmrg WRITE_MASK(1), 2157ad43ddacSmrg FOG_MERGE(0), 2158ad43ddacSmrg OMOD(SQ_ALU_OMOD_OFF), 2159ad43ddacSmrg ALU_INST(SQ_OP2_INST_MUL), 2160ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 2161ad43ddacSmrg DST_GPR(0), 2162ad43ddacSmrg DST_REL(ABSOLUTE), 2163ad43ddacSmrg DST_ELEM(ELEM_X), 2164ad43ddacSmrg CLAMP(0)); 2165ad43ddacSmrg 2166921a55d8Smrg /* 43 srcY / h */ 2167921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2168ad43ddacSmrg SRC0_REL(ABSOLUTE), 2169ad43ddacSmrg SRC0_ELEM(ELEM_Y), 2170ad43ddacSmrg SRC0_NEG(0), 2171921a55d8Smrg SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2172ad43ddacSmrg SRC1_REL(ABSOLUTE), 2173ad43ddacSmrg SRC1_ELEM(ELEM_W), 2174ad43ddacSmrg SRC1_NEG(0), 2175ad43ddacSmrg INDEX_MODE(SQ_INDEX_AR_X), 2176ad43ddacSmrg PRED_SEL(SQ_PRED_SEL_OFF), 2177ad43ddacSmrg LAST(1)); 2178ad43ddacSmrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 2179ad43ddacSmrg SRC0_ABS(0), 2180ad43ddacSmrg SRC1_ABS(0), 2181ad43ddacSmrg UPDATE_EXECUTE_MASK(0), 2182ad43ddacSmrg UPDATE_PRED(0), 2183ad43ddacSmrg WRITE_MASK(1), 2184ad43ddacSmrg FOG_MERGE(0), 2185ad43ddacSmrg OMOD(SQ_ALU_OMOD_OFF), 2186ad43ddacSmrg ALU_INST(SQ_OP2_INST_MUL), 2187ad43ddacSmrg BANK_SWIZZLE(SQ_ALU_VEC_012), 2188ad43ddacSmrg DST_GPR(0), 2189ad43ddacSmrg DST_REL(ABSOLUTE), 2190ad43ddacSmrg DST_ELEM(ELEM_Y), 2191ad43ddacSmrg CLAMP(0)); 2192ad43ddacSmrg 2193921a55d8Smrg /* 44/45 - dst - mask */ 2194b7e1c893Smrg shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2195b7e1c893Smrg FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2196b7e1c893Smrg FETCH_WHOLE_QUAD(0), 2197b7e1c893Smrg BUFFER_ID(0), 2198b7e1c893Smrg SRC_GPR(0), 2199b7e1c893Smrg SRC_REL(ABSOLUTE), 2200b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 22010974d292Smrg MEGA_FETCH_COUNT(24)); 22020974d292Smrg shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), 2203b7e1c893Smrg DST_REL(0), 2204b7e1c893Smrg DST_SEL_X(SQ_SEL_X), 2205b7e1c893Smrg DST_SEL_Y(SQ_SEL_Y), 2206b7e1c893Smrg DST_SEL_Z(SQ_SEL_0), 2207b7e1c893Smrg DST_SEL_W(SQ_SEL_1), 2208b7e1c893Smrg USE_CONST_FIELDS(0), 2209ad43ddacSmrg DATA_FORMAT(FMT_32_32_FLOAT), 2210ad43ddacSmrg NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2211ad43ddacSmrg FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2212b7e1c893Smrg SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2213b7e1c893Smrg shader[i++] = VTX_DWORD2(OFFSET(0), 2214b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 2215b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2216b13dfe66Smrg#else 2217b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_NONE), 2218b13dfe66Smrg#endif 2219b7e1c893Smrg CONST_BUF_NO_STRIDE(0), 2220b7e1c893Smrg MEGA_FETCH(1)); 2221b7e1c893Smrg shader[i++] = VTX_DWORD_PAD; 2222921a55d8Smrg /* 46/47 - src */ 2223b7e1c893Smrg shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2224b7e1c893Smrg FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2225b7e1c893Smrg FETCH_WHOLE_QUAD(0), 2226b7e1c893Smrg BUFFER_ID(0), 2227b7e1c893Smrg SRC_GPR(0), 2228b7e1c893Smrg SRC_REL(ABSOLUTE), 2229b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 2230b7e1c893Smrg MEGA_FETCH_COUNT(8)); 22310974d292Smrg shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2232b7e1c893Smrg DST_REL(0), 2233b7e1c893Smrg DST_SEL_X(SQ_SEL_X), 2234b7e1c893Smrg DST_SEL_Y(SQ_SEL_Y), 2235ad43ddacSmrg DST_SEL_Z(SQ_SEL_1), 2236ad43ddacSmrg DST_SEL_W(SQ_SEL_0), 2237b7e1c893Smrg USE_CONST_FIELDS(0), 2238ad43ddacSmrg DATA_FORMAT(FMT_32_32_FLOAT), 2239ad43ddacSmrg NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2240ad43ddacSmrg FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2241b7e1c893Smrg SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2242b7e1c893Smrg shader[i++] = VTX_DWORD2(OFFSET(8), 2243b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 2244b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2245b13dfe66Smrg#else 2246b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_NONE), 2247b13dfe66Smrg#endif 2248b7e1c893Smrg CONST_BUF_NO_STRIDE(0), 2249b7e1c893Smrg MEGA_FETCH(0)); 2250b7e1c893Smrg shader[i++] = VTX_DWORD_PAD; 2251921a55d8Smrg /* 48/49 - mask */ 22520974d292Smrg shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 22530974d292Smrg FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 22540974d292Smrg FETCH_WHOLE_QUAD(0), 22550974d292Smrg BUFFER_ID(0), 22560974d292Smrg SRC_GPR(0), 22570974d292Smrg SRC_REL(ABSOLUTE), 22580974d292Smrg SRC_SEL_X(SQ_SEL_X), 22590974d292Smrg MEGA_FETCH_COUNT(8)); 22600974d292Smrg shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 22610974d292Smrg DST_REL(0), 22620974d292Smrg DST_SEL_X(SQ_SEL_X), 22630974d292Smrg DST_SEL_Y(SQ_SEL_Y), 22640974d292Smrg DST_SEL_Z(SQ_SEL_1), 22650974d292Smrg DST_SEL_W(SQ_SEL_0), 22660974d292Smrg USE_CONST_FIELDS(0), 22670974d292Smrg DATA_FORMAT(FMT_32_32_FLOAT), 22680974d292Smrg NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 22690974d292Smrg FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 22700974d292Smrg SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 22710974d292Smrg shader[i++] = VTX_DWORD2(OFFSET(16), 2272b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 2273b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2274b13dfe66Smrg#else 2275b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_NONE), 2276b13dfe66Smrg#endif 22770974d292Smrg CONST_BUF_NO_STRIDE(0), 22780974d292Smrg MEGA_FETCH(0)); 22790974d292Smrg shader[i++] = VTX_DWORD_PAD; 2280b7e1c893Smrg 2281921a55d8Smrg /* 50/51 - dst - non-mask */ 22820974d292Smrg shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 22830974d292Smrg FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 22840974d292Smrg FETCH_WHOLE_QUAD(0), 22850974d292Smrg BUFFER_ID(0), 22860974d292Smrg SRC_GPR(0), 22870974d292Smrg SRC_REL(ABSOLUTE), 22880974d292Smrg SRC_SEL_X(SQ_SEL_X), 22890974d292Smrg MEGA_FETCH_COUNT(16)); 22900974d292Smrg shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 22910974d292Smrg DST_REL(0), 22920974d292Smrg DST_SEL_X(SQ_SEL_X), 22930974d292Smrg DST_SEL_Y(SQ_SEL_Y), 22940974d292Smrg DST_SEL_Z(SQ_SEL_0), 22950974d292Smrg DST_SEL_W(SQ_SEL_1), 22960974d292Smrg USE_CONST_FIELDS(0), 22970974d292Smrg DATA_FORMAT(FMT_32_32_FLOAT), 22980974d292Smrg NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 22990974d292Smrg FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 23000974d292Smrg SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 23010974d292Smrg shader[i++] = VTX_DWORD2(OFFSET(0), 2302b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 2303b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2304b13dfe66Smrg#else 2305b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_NONE), 2306b13dfe66Smrg#endif 23070974d292Smrg CONST_BUF_NO_STRIDE(0), 23080974d292Smrg MEGA_FETCH(1)); 23090974d292Smrg shader[i++] = VTX_DWORD_PAD; 2310921a55d8Smrg /* 52/53 - src */ 23110974d292Smrg shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 23120974d292Smrg FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 23130974d292Smrg FETCH_WHOLE_QUAD(0), 23140974d292Smrg BUFFER_ID(0), 23150974d292Smrg SRC_GPR(0), 23160974d292Smrg SRC_REL(ABSOLUTE), 23170974d292Smrg SRC_SEL_X(SQ_SEL_X), 23180974d292Smrg MEGA_FETCH_COUNT(8)); 23190974d292Smrg shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 23200974d292Smrg DST_REL(0), 23210974d292Smrg DST_SEL_X(SQ_SEL_X), 23220974d292Smrg DST_SEL_Y(SQ_SEL_Y), 23230974d292Smrg DST_SEL_Z(SQ_SEL_1), 23240974d292Smrg DST_SEL_W(SQ_SEL_0), 23250974d292Smrg USE_CONST_FIELDS(0), 23260974d292Smrg DATA_FORMAT(FMT_32_32_FLOAT), 23270974d292Smrg NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 23280974d292Smrg FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 23290974d292Smrg SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 23300974d292Smrg shader[i++] = VTX_DWORD2(OFFSET(8), 2331b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 2332b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2333b13dfe66Smrg#else 2334b13dfe66Smrg ENDIAN_SWAP(SQ_ENDIAN_NONE), 2335b13dfe66Smrg#endif 23360974d292Smrg CONST_BUF_NO_STRIDE(0), 23370974d292Smrg MEGA_FETCH(0)); 23380974d292Smrg shader[i++] = VTX_DWORD_PAD; 23390974d292Smrg 23400974d292Smrg return i; 23410974d292Smrg} 23420974d292Smrg 23430974d292Smrg/* comp ps --------------------------------------- */ 23440974d292Smrgint R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) 23450974d292Smrg{ 23460974d292Smrg int i = 0; 23470974d292Smrg 23480974d292Smrg /* 0 */ 23490974d292Smrg shader[i++] = CF_DWORD0(ADDR(3)); 23500974d292Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 23510974d292Smrg CF_CONST(0), 23520974d292Smrg COND(SQ_CF_COND_BOOL), 23530974d292Smrg I_COUNT(0), 23540974d292Smrg CALL_COUNT(0), 23550974d292Smrg END_OF_PROGRAM(0), 23560974d292Smrg VALID_PIXEL_MODE(0), 23570974d292Smrg CF_INST(SQ_CF_INST_CALL), 23580974d292Smrg WHOLE_QUAD_MODE(0), 23590974d292Smrg BARRIER(0)); 23600974d292Smrg /* 1 */ 23610974d292Smrg shader[i++] = CF_DWORD0(ADDR(7)); 23620974d292Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 23630974d292Smrg CF_CONST(0), 23640974d292Smrg COND(SQ_CF_COND_NOT_BOOL), 23650974d292Smrg I_COUNT(0), 23660974d292Smrg CALL_COUNT(0), 23670974d292Smrg END_OF_PROGRAM(0), 23680974d292Smrg VALID_PIXEL_MODE(0), 23690974d292Smrg CF_INST(SQ_CF_INST_CALL), 23700974d292Smrg WHOLE_QUAD_MODE(0), 23710974d292Smrg BARRIER(0)); 23720974d292Smrg /* 2 */ 23730974d292Smrg shader[i++] = CF_DWORD0(ADDR(0)); 23740974d292Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 23750974d292Smrg CF_CONST(0), 23760974d292Smrg COND(SQ_CF_COND_ACTIVE), 23770974d292Smrg I_COUNT(0), 23780974d292Smrg CALL_COUNT(0), 23790974d292Smrg END_OF_PROGRAM(1), 23800974d292Smrg VALID_PIXEL_MODE(0), 23810974d292Smrg CF_INST(SQ_CF_INST_NOP), 23820974d292Smrg WHOLE_QUAD_MODE(0), 23830974d292Smrg BARRIER(1)); 23840974d292Smrg 23850974d292Smrg /* 3 - mask sub */ 23860974d292Smrg shader[i++] = CF_DWORD0(ADDR(14)); 23870974d292Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 23880974d292Smrg CF_CONST(0), 23890974d292Smrg COND(SQ_CF_COND_ACTIVE), 23900974d292Smrg I_COUNT(2), 23910974d292Smrg CALL_COUNT(0), 23920974d292Smrg END_OF_PROGRAM(0), 23930974d292Smrg VALID_PIXEL_MODE(0), 23940974d292Smrg CF_INST(SQ_CF_INST_TEX), 23950974d292Smrg WHOLE_QUAD_MODE(0), 23960974d292Smrg BARRIER(1)); 23970974d292Smrg 23980974d292Smrg /* 4 */ 23990974d292Smrg shader[i++] = CF_ALU_DWORD0(ADDR(10), 24000974d292Smrg KCACHE_BANK0(0), 24010974d292Smrg KCACHE_BANK1(0), 24020974d292Smrg KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 24030974d292Smrg shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 24040974d292Smrg KCACHE_ADDR0(0), 24050974d292Smrg KCACHE_ADDR1(0), 24060974d292Smrg I_COUNT(4), 24070974d292Smrg USES_WATERFALL(0), 24080974d292Smrg CF_INST(SQ_CF_INST_ALU), 24090974d292Smrg WHOLE_QUAD_MODE(0), 24100974d292Smrg BARRIER(1)); 24110974d292Smrg 24120974d292Smrg /* 5 */ 24130974d292Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 24140974d292Smrg TYPE(SQ_EXPORT_PIXEL), 24150974d292Smrg RW_GPR(2), 24160974d292Smrg RW_REL(ABSOLUTE), 24170974d292Smrg INDEX_GPR(0), 24180974d292Smrg ELEM_SIZE(1)); 24190974d292Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 24200974d292Smrg SRC_SEL_Y(SQ_SEL_Y), 24210974d292Smrg SRC_SEL_Z(SQ_SEL_Z), 24220974d292Smrg SRC_SEL_W(SQ_SEL_W), 24230974d292Smrg R6xx_ELEM_LOOP(0), 24240974d292Smrg BURST_COUNT(1), 24250974d292Smrg END_OF_PROGRAM(0), 24260974d292Smrg VALID_PIXEL_MODE(0), 24270974d292Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 24280974d292Smrg WHOLE_QUAD_MODE(0), 24290974d292Smrg BARRIER(1)); 24300974d292Smrg /* 6 */ 24310974d292Smrg shader[i++] = CF_DWORD0(ADDR(0)); 24320974d292Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 24330974d292Smrg CF_CONST(0), 24340974d292Smrg COND(SQ_CF_COND_ACTIVE), 24350974d292Smrg I_COUNT(0), 24360974d292Smrg CALL_COUNT(0), 24370974d292Smrg END_OF_PROGRAM(0), 24380974d292Smrg VALID_PIXEL_MODE(0), 24390974d292Smrg CF_INST(SQ_CF_INST_RETURN), 24400974d292Smrg WHOLE_QUAD_MODE(0), 24410974d292Smrg BARRIER(1)); 24420974d292Smrg 24430974d292Smrg /* 7 non-mask sub */ 24440974d292Smrg shader[i++] = CF_DWORD0(ADDR(18)); 2445b7e1c893Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 2446b7e1c893Smrg CF_CONST(0), 2447b7e1c893Smrg COND(SQ_CF_COND_ACTIVE), 2448b7e1c893Smrg I_COUNT(1), 2449b7e1c893Smrg CALL_COUNT(0), 2450b7e1c893Smrg END_OF_PROGRAM(0), 2451b7e1c893Smrg VALID_PIXEL_MODE(0), 2452b7e1c893Smrg CF_INST(SQ_CF_INST_TEX), 2453b7e1c893Smrg WHOLE_QUAD_MODE(0), 2454b7e1c893Smrg BARRIER(1)); 24550974d292Smrg /* 8 */ 2456b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2457b7e1c893Smrg TYPE(SQ_EXPORT_PIXEL), 2458b7e1c893Smrg RW_GPR(0), 2459b7e1c893Smrg RW_REL(ABSOLUTE), 2460b7e1c893Smrg INDEX_GPR(0), 2461b7e1c893Smrg ELEM_SIZE(1)); 2462b7e1c893Smrg shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2463b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 2464b7e1c893Smrg SRC_SEL_Z(SQ_SEL_Z), 2465b7e1c893Smrg SRC_SEL_W(SQ_SEL_W), 2466b7e1c893Smrg R6xx_ELEM_LOOP(0), 2467b7e1c893Smrg BURST_COUNT(1), 24680974d292Smrg END_OF_PROGRAM(0), 2469b7e1c893Smrg VALID_PIXEL_MODE(0), 2470b7e1c893Smrg CF_INST(SQ_CF_INST_EXPORT_DONE), 2471b7e1c893Smrg WHOLE_QUAD_MODE(0), 2472b7e1c893Smrg BARRIER(1)); 24730974d292Smrg /* 9 */ 24740974d292Smrg shader[i++] = CF_DWORD0(ADDR(0)); 24750974d292Smrg shader[i++] = CF_DWORD1(POP_COUNT(0), 24760974d292Smrg CF_CONST(0), 24770974d292Smrg COND(SQ_CF_COND_ACTIVE), 24780974d292Smrg I_COUNT(0), 24790974d292Smrg CALL_COUNT(0), 24800974d292Smrg END_OF_PROGRAM(0), 24810974d292Smrg VALID_PIXEL_MODE(0), 24820974d292Smrg CF_INST(SQ_CF_INST_RETURN), 24830974d292Smrg WHOLE_QUAD_MODE(0), 24840974d292Smrg BARRIER(1)); 24850974d292Smrg 24860974d292Smrg /* 10 - alu 0 */ 24870974d292Smrg /* MUL gpr[2].x gpr[1].x gpr[0].x */ 2488921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 24890974d292Smrg SRC0_REL(ABSOLUTE), 24900974d292Smrg SRC0_ELEM(ELEM_X), 24910974d292Smrg SRC0_NEG(0), 2492921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 0), 24930974d292Smrg SRC1_REL(ABSOLUTE), 24940974d292Smrg SRC1_ELEM(ELEM_X), 24950974d292Smrg SRC1_NEG(0), 24960974d292Smrg INDEX_MODE(SQ_INDEX_LOOP), 24970974d292Smrg PRED_SEL(SQ_PRED_SEL_OFF), 24980974d292Smrg LAST(0)); 24990974d292Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 25000974d292Smrg SRC0_ABS(0), 25010974d292Smrg SRC1_ABS(0), 25020974d292Smrg UPDATE_EXECUTE_MASK(0), 25030974d292Smrg UPDATE_PRED(0), 25040974d292Smrg WRITE_MASK(1), 25050974d292Smrg FOG_MERGE(0), 25060974d292Smrg OMOD(SQ_ALU_OMOD_OFF), 25070974d292Smrg ALU_INST(SQ_OP2_INST_MUL), 25080974d292Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 25090974d292Smrg DST_GPR(2), 25100974d292Smrg DST_REL(ABSOLUTE), 25110974d292Smrg DST_ELEM(ELEM_X), 25120974d292Smrg CLAMP(1)); 25130974d292Smrg /* 11 - alu 1 */ 25140974d292Smrg /* MUL gpr[2].y gpr[1].y gpr[0].y */ 2515921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 25160974d292Smrg SRC0_REL(ABSOLUTE), 25170974d292Smrg SRC0_ELEM(ELEM_Y), 25180974d292Smrg SRC0_NEG(0), 2519921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 0), 25200974d292Smrg SRC1_REL(ABSOLUTE), 25210974d292Smrg SRC1_ELEM(ELEM_Y), 25220974d292Smrg SRC1_NEG(0), 25230974d292Smrg INDEX_MODE(SQ_INDEX_LOOP), 25240974d292Smrg PRED_SEL(SQ_PRED_SEL_OFF), 25250974d292Smrg LAST(0)); 25260974d292Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 25270974d292Smrg SRC0_ABS(0), 25280974d292Smrg SRC1_ABS(0), 25290974d292Smrg UPDATE_EXECUTE_MASK(0), 25300974d292Smrg UPDATE_PRED(0), 25310974d292Smrg WRITE_MASK(1), 25320974d292Smrg FOG_MERGE(0), 25330974d292Smrg OMOD(SQ_ALU_OMOD_OFF), 25340974d292Smrg ALU_INST(SQ_OP2_INST_MUL), 25350974d292Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 25360974d292Smrg DST_GPR(2), 25370974d292Smrg DST_REL(ABSOLUTE), 25380974d292Smrg DST_ELEM(ELEM_Y), 25390974d292Smrg CLAMP(1)); 25400974d292Smrg /* 12 - alu 2 */ 25410974d292Smrg /* MUL gpr[2].z gpr[1].z gpr[0].z */ 2542921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 25430974d292Smrg SRC0_REL(ABSOLUTE), 25440974d292Smrg SRC0_ELEM(ELEM_Z), 25450974d292Smrg SRC0_NEG(0), 2546921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 0), 25470974d292Smrg SRC1_REL(ABSOLUTE), 25480974d292Smrg SRC1_ELEM(ELEM_Z), 25490974d292Smrg SRC1_NEG(0), 25500974d292Smrg INDEX_MODE(SQ_INDEX_LOOP), 25510974d292Smrg PRED_SEL(SQ_PRED_SEL_OFF), 25520974d292Smrg LAST(0)); 25530974d292Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 25540974d292Smrg SRC0_ABS(0), 25550974d292Smrg SRC1_ABS(0), 25560974d292Smrg UPDATE_EXECUTE_MASK(0), 25570974d292Smrg UPDATE_PRED(0), 25580974d292Smrg WRITE_MASK(1), 25590974d292Smrg FOG_MERGE(0), 25600974d292Smrg OMOD(SQ_ALU_OMOD_OFF), 25610974d292Smrg ALU_INST(SQ_OP2_INST_MUL), 25620974d292Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 25630974d292Smrg DST_GPR(2), 25640974d292Smrg DST_REL(ABSOLUTE), 25650974d292Smrg DST_ELEM(ELEM_Z), 25660974d292Smrg CLAMP(1)); 25670974d292Smrg /* 13 - alu 3 */ 25680974d292Smrg /* MUL gpr[2].w gpr[1].w gpr[0].w */ 2569921a55d8Smrg shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 25700974d292Smrg SRC0_REL(ABSOLUTE), 25710974d292Smrg SRC0_ELEM(ELEM_W), 25720974d292Smrg SRC0_NEG(0), 2573921a55d8Smrg SRC1_SEL(ALU_SRC_GPR_BASE + 0), 25740974d292Smrg SRC1_REL(ABSOLUTE), 25750974d292Smrg SRC1_ELEM(ELEM_W), 25760974d292Smrg SRC1_NEG(0), 25770974d292Smrg INDEX_MODE(SQ_INDEX_LOOP), 25780974d292Smrg PRED_SEL(SQ_PRED_SEL_OFF), 25790974d292Smrg LAST(1)); 25800974d292Smrg shader[i++] = ALU_DWORD1_OP2(ChipSet, 25810974d292Smrg SRC0_ABS(0), 25820974d292Smrg SRC1_ABS(0), 25830974d292Smrg UPDATE_EXECUTE_MASK(0), 25840974d292Smrg UPDATE_PRED(0), 25850974d292Smrg WRITE_MASK(1), 25860974d292Smrg FOG_MERGE(0), 25870974d292Smrg OMOD(SQ_ALU_OMOD_OFF), 25880974d292Smrg ALU_INST(SQ_OP2_INST_MUL), 25890974d292Smrg BANK_SWIZZLE(SQ_ALU_VEC_012), 25900974d292Smrg DST_GPR(2), 25910974d292Smrg DST_REL(ABSOLUTE), 25920974d292Smrg DST_ELEM(ELEM_W), 25930974d292Smrg CLAMP(1)); 2594b7e1c893Smrg 25950974d292Smrg /* 14/15 - src - mask */ 25960974d292Smrg shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 25970974d292Smrg BC_FRAC_MODE(0), 25980974d292Smrg FETCH_WHOLE_QUAD(0), 25990974d292Smrg RESOURCE_ID(0), 26000974d292Smrg SRC_GPR(0), 26010974d292Smrg SRC_REL(ABSOLUTE), 26020974d292Smrg R7xx_ALT_CONST(0)); 26030974d292Smrg shader[i++] = TEX_DWORD1(DST_GPR(0), 26040974d292Smrg DST_REL(ABSOLUTE), 26050974d292Smrg DST_SEL_X(SQ_SEL_X), 26060974d292Smrg DST_SEL_Y(SQ_SEL_Y), 26070974d292Smrg DST_SEL_Z(SQ_SEL_Z), 26080974d292Smrg DST_SEL_W(SQ_SEL_W), 26090974d292Smrg LOD_BIAS(0), 26100974d292Smrg COORD_TYPE_X(TEX_NORMALIZED), 26110974d292Smrg COORD_TYPE_Y(TEX_NORMALIZED), 26120974d292Smrg COORD_TYPE_Z(TEX_NORMALIZED), 26130974d292Smrg COORD_TYPE_W(TEX_NORMALIZED)); 26140974d292Smrg shader[i++] = TEX_DWORD2(OFFSET_X(0), 26150974d292Smrg OFFSET_Y(0), 26160974d292Smrg OFFSET_Z(0), 26170974d292Smrg SAMPLER_ID(0), 26180974d292Smrg SRC_SEL_X(SQ_SEL_X), 26190974d292Smrg SRC_SEL_Y(SQ_SEL_Y), 26200974d292Smrg SRC_SEL_Z(SQ_SEL_0), 26210974d292Smrg SRC_SEL_W(SQ_SEL_1)); 26220974d292Smrg shader[i++] = TEX_DWORD_PAD; 26230974d292Smrg /* 16/17 - mask */ 26240974d292Smrg shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 26250974d292Smrg BC_FRAC_MODE(0), 26260974d292Smrg FETCH_WHOLE_QUAD(0), 26270974d292Smrg RESOURCE_ID(1), 26280974d292Smrg SRC_GPR(1), 26290974d292Smrg SRC_REL(ABSOLUTE), 26300974d292Smrg R7xx_ALT_CONST(0)); 26310974d292Smrg shader[i++] = TEX_DWORD1(DST_GPR(1), 26320974d292Smrg DST_REL(ABSOLUTE), 26330974d292Smrg DST_SEL_X(SQ_SEL_X), 26340974d292Smrg DST_SEL_Y(SQ_SEL_Y), 26350974d292Smrg DST_SEL_Z(SQ_SEL_Z), 26360974d292Smrg DST_SEL_W(SQ_SEL_W), 26370974d292Smrg LOD_BIAS(0), 26380974d292Smrg COORD_TYPE_X(TEX_NORMALIZED), 26390974d292Smrg COORD_TYPE_Y(TEX_NORMALIZED), 26400974d292Smrg COORD_TYPE_Z(TEX_NORMALIZED), 26410974d292Smrg COORD_TYPE_W(TEX_NORMALIZED)); 26420974d292Smrg shader[i++] = TEX_DWORD2(OFFSET_X(0), 26430974d292Smrg OFFSET_Y(0), 26440974d292Smrg OFFSET_Z(0), 26450974d292Smrg SAMPLER_ID(1), 26460974d292Smrg SRC_SEL_X(SQ_SEL_X), 26470974d292Smrg SRC_SEL_Y(SQ_SEL_Y), 26480974d292Smrg SRC_SEL_Z(SQ_SEL_0), 26490974d292Smrg SRC_SEL_W(SQ_SEL_1)); 26500974d292Smrg shader[i++] = TEX_DWORD_PAD; 2651b7e1c893Smrg 26520974d292Smrg /* 18/19 - src - non-mask */ 2653b7e1c893Smrg shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 2654b7e1c893Smrg BC_FRAC_MODE(0), 2655b7e1c893Smrg FETCH_WHOLE_QUAD(0), 2656b7e1c893Smrg RESOURCE_ID(0), 2657b7e1c893Smrg SRC_GPR(0), 2658b7e1c893Smrg SRC_REL(ABSOLUTE), 2659b7e1c893Smrg R7xx_ALT_CONST(0)); 2660b7e1c893Smrg shader[i++] = TEX_DWORD1(DST_GPR(0), 2661b7e1c893Smrg DST_REL(ABSOLUTE), 2662b7e1c893Smrg DST_SEL_X(SQ_SEL_X), 2663b7e1c893Smrg DST_SEL_Y(SQ_SEL_Y), 2664b7e1c893Smrg DST_SEL_Z(SQ_SEL_Z), 2665b7e1c893Smrg DST_SEL_W(SQ_SEL_W), 2666b7e1c893Smrg LOD_BIAS(0), 2667b7e1c893Smrg COORD_TYPE_X(TEX_NORMALIZED), 2668b7e1c893Smrg COORD_TYPE_Y(TEX_NORMALIZED), 2669b7e1c893Smrg COORD_TYPE_Z(TEX_NORMALIZED), 2670b7e1c893Smrg COORD_TYPE_W(TEX_NORMALIZED)); 2671b7e1c893Smrg shader[i++] = TEX_DWORD2(OFFSET_X(0), 2672b7e1c893Smrg OFFSET_Y(0), 2673b7e1c893Smrg OFFSET_Z(0), 2674b7e1c893Smrg SAMPLER_ID(0), 2675b7e1c893Smrg SRC_SEL_X(SQ_SEL_X), 2676b7e1c893Smrg SRC_SEL_Y(SQ_SEL_Y), 2677b7e1c893Smrg SRC_SEL_Z(SQ_SEL_0), 2678b7e1c893Smrg SRC_SEL_W(SQ_SEL_1)); 2679b7e1c893Smrg shader[i++] = TEX_DWORD_PAD; 2680b7e1c893Smrg 2681b7e1c893Smrg return i; 2682b7e1c893Smrg} 2683