1/* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#include "xf86.h" 32 33#include "cayman_shader.h" 34#include "cayman_reg.h" 35 36/* solid vs --------------------------------------- */ 37int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) 38{ 39 int i = 0; 40 41 /* 0 */ 42 shader[i++] = CF_DWORD0(ADDR(4), 43 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 44 shader[i++] = CF_DWORD1(POP_COUNT(0), 45 CF_CONST(0), 46 COND(SQ_CF_COND_ACTIVE), 47 I_COUNT(1), 48 VALID_PIXEL_MODE(0), 49 CF_INST(SQ_CF_INST_TC), 50 BARRIER(1)); 51 /* 1 */ 52 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 53 TYPE(SQ_EXPORT_POS), 54 RW_GPR(1), 55 RW_REL(ABSOLUTE), 56 INDEX_GPR(0), 57 ELEM_SIZE(0)); 58 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 59 SRC_SEL_Y(SQ_SEL_Y), 60 SRC_SEL_Z(SQ_SEL_Z), 61 SRC_SEL_W(SQ_SEL_W), 62 BURST_COUNT(1), 63 VALID_PIXEL_MODE(0), 64 CF_INST(SQ_CF_INST_EXPORT_DONE), 65 MARK(0), 66 BARRIER(1)); 67 /* 2 - always export a param whether it's used or not */ 68 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 69 TYPE(SQ_EXPORT_PARAM), 70 RW_GPR(0), 71 RW_REL(ABSOLUTE), 72 INDEX_GPR(0), 73 ELEM_SIZE(0)); 74 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 75 SRC_SEL_Y(SQ_SEL_Y), 76 SRC_SEL_Z(SQ_SEL_Z), 77 SRC_SEL_W(SQ_SEL_W), 78 BURST_COUNT(0), 79 VALID_PIXEL_MODE(0), 80 CF_INST(SQ_CF_INST_EXPORT_DONE), 81 MARK(0), 82 BARRIER(0)); 83 /* 3 - end */ 84 shader[i++] = CF_DWORD0(ADDR(0), 85 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 86 shader[i++] = CF_DWORD1(POP_COUNT(0), 87 CF_CONST(0), 88 COND(SQ_CF_COND_ACTIVE), 89 I_COUNT(0), 90 VALID_PIXEL_MODE(0), 91 CF_INST(SQ_CF_INST_END), 92 BARRIER(1)); 93 /* 4/5 */ 94 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 95 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 96 FETCH_WHOLE_QUAD(0), 97 BUFFER_ID(0), 98 SRC_GPR(0), 99 SRC_REL(ABSOLUTE), 100 SRC_SEL_X(SQ_SEL_X), 101 SRC_SEL_Y(SQ_SEL_Y), 102 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 103 LDS_REQ(0), 104 COALESCED_READ(0)); 105 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 106 DST_REL(0), 107 DST_SEL_X(SQ_SEL_X), 108 DST_SEL_Y(SQ_SEL_Y), 109 DST_SEL_Z(SQ_SEL_0), 110 DST_SEL_W(SQ_SEL_1), 111 USE_CONST_FIELDS(0), 112 DATA_FORMAT(FMT_32_32_FLOAT), 113 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 114 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 115 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 116 shader[i++] = VTX_DWORD2(OFFSET(0), 117#if X_BYTE_ORDER == X_BIG_ENDIAN 118 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 119#else 120 ENDIAN_SWAP(ENDIAN_NONE), 121#endif 122 CONST_BUF_NO_STRIDE(0), 123 ALT_CONST(0), 124 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 125 shader[i++] = VTX_DWORD_PAD; 126 127 return i; 128} 129 130/* solid ps --------------------------------------- */ 131int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) 132{ 133 int i = 0; 134 135 /* 0 */ 136 shader[i++] = CF_ALU_DWORD0(ADDR(3), 137 KCACHE_BANK0(0), 138 KCACHE_BANK1(0), 139 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 140 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 141 KCACHE_ADDR0(0), 142 KCACHE_ADDR1(0), 143 I_COUNT(4), 144 ALT_CONST(0), 145 CF_INST(SQ_CF_INST_ALU), 146 WHOLE_QUAD_MODE(0), 147 BARRIER(1)); 148 /* 1 */ 149 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 150 TYPE(SQ_EXPORT_PIXEL), 151 RW_GPR(0), 152 RW_REL(ABSOLUTE), 153 INDEX_GPR(0), 154 ELEM_SIZE(1)); 155 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 156 SRC_SEL_Y(SQ_SEL_Y), 157 SRC_SEL_Z(SQ_SEL_Z), 158 SRC_SEL_W(SQ_SEL_W), 159 BURST_COUNT(1), 160 VALID_PIXEL_MODE(0), 161 CF_INST(SQ_CF_INST_EXPORT_DONE), 162 MARK(0), 163 BARRIER(1)); 164 165 /* 2 - end */ 166 shader[i++] = CF_DWORD0(ADDR(0), 167 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 168 shader[i++] = CF_DWORD1(POP_COUNT(0), 169 CF_CONST(0), 170 COND(SQ_CF_COND_ACTIVE), 171 I_COUNT(0), 172 VALID_PIXEL_MODE(0), 173 CF_INST(SQ_CF_INST_END), 174 BARRIER(1)); 175 /* 3 */ 176 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 177 SRC0_REL(ABSOLUTE), 178 SRC0_ELEM(ELEM_X), 179 SRC0_NEG(0), 180 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 181 SRC1_REL(ABSOLUTE), 182 SRC1_ELEM(ELEM_X), 183 SRC1_NEG(0), 184 INDEX_MODE(SQ_INDEX_AR_X), 185 PRED_SEL(SQ_PRED_SEL_OFF), 186 LAST(0)); 187 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 188 SRC1_ABS(0), 189 UPDATE_EXECUTE_MASK(0), 190 UPDATE_PRED(0), 191 WRITE_MASK(1), 192 OMOD(SQ_ALU_OMOD_OFF), 193 ALU_INST(SQ_OP2_INST_MOV), 194 BANK_SWIZZLE(SQ_ALU_VEC_012), 195 DST_GPR(0), 196 DST_REL(ABSOLUTE), 197 DST_ELEM(ELEM_X), 198 CLAMP(1)); 199 /* 4 */ 200 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 201 SRC0_REL(ABSOLUTE), 202 SRC0_ELEM(ELEM_Y), 203 SRC0_NEG(0), 204 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 205 SRC1_REL(ABSOLUTE), 206 SRC1_ELEM(ELEM_Y), 207 SRC1_NEG(0), 208 INDEX_MODE(SQ_INDEX_AR_X), 209 PRED_SEL(SQ_PRED_SEL_OFF), 210 LAST(0)); 211 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 212 SRC1_ABS(0), 213 UPDATE_EXECUTE_MASK(0), 214 UPDATE_PRED(0), 215 WRITE_MASK(1), 216 OMOD(SQ_ALU_OMOD_OFF), 217 ALU_INST(SQ_OP2_INST_MOV), 218 BANK_SWIZZLE(SQ_ALU_VEC_012), 219 DST_GPR(0), 220 DST_REL(ABSOLUTE), 221 DST_ELEM(ELEM_Y), 222 CLAMP(1)); 223 /* 5 */ 224 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 225 SRC0_REL(ABSOLUTE), 226 SRC0_ELEM(ELEM_Z), 227 SRC0_NEG(0), 228 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 229 SRC1_REL(ABSOLUTE), 230 SRC1_ELEM(ELEM_Z), 231 SRC1_NEG(0), 232 INDEX_MODE(SQ_INDEX_AR_X), 233 PRED_SEL(SQ_PRED_SEL_OFF), 234 LAST(0)); 235 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 236 SRC1_ABS(0), 237 UPDATE_EXECUTE_MASK(0), 238 UPDATE_PRED(0), 239 WRITE_MASK(1), 240 OMOD(SQ_ALU_OMOD_OFF), 241 ALU_INST(SQ_OP2_INST_MOV), 242 BANK_SWIZZLE(SQ_ALU_VEC_012), 243 DST_GPR(0), 244 DST_REL(ABSOLUTE), 245 DST_ELEM(ELEM_Z), 246 CLAMP(1)); 247 /* 6 */ 248 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 249 SRC0_REL(ABSOLUTE), 250 SRC0_ELEM(ELEM_W), 251 SRC0_NEG(0), 252 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 253 SRC1_REL(ABSOLUTE), 254 SRC1_ELEM(ELEM_W), 255 SRC1_NEG(0), 256 INDEX_MODE(SQ_INDEX_AR_X), 257 PRED_SEL(SQ_PRED_SEL_OFF), 258 LAST(1)); 259 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 260 SRC1_ABS(0), 261 UPDATE_EXECUTE_MASK(0), 262 UPDATE_PRED(0), 263 WRITE_MASK(1), 264 OMOD(SQ_ALU_OMOD_OFF), 265 ALU_INST(SQ_OP2_INST_MOV), 266 BANK_SWIZZLE(SQ_ALU_VEC_012), 267 DST_GPR(0), 268 DST_REL(ABSOLUTE), 269 DST_ELEM(ELEM_W), 270 CLAMP(1)); 271 272 return i; 273} 274 275/* copy vs --------------------------------------- */ 276int cayman_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) 277{ 278 int i = 0; 279 280 /* 0 */ 281 shader[i++] = CF_DWORD0(ADDR(4), 282 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 283 shader[i++] = CF_DWORD1(POP_COUNT(0), 284 CF_CONST(0), 285 COND(SQ_CF_COND_ACTIVE), 286 I_COUNT(2), 287 VALID_PIXEL_MODE(0), 288 CF_INST(SQ_CF_INST_TC), 289 BARRIER(1)); 290 /* 1 */ 291 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 292 TYPE(SQ_EXPORT_POS), 293 RW_GPR(1), 294 RW_REL(ABSOLUTE), 295 INDEX_GPR(0), 296 ELEM_SIZE(0)); 297 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 298 SRC_SEL_Y(SQ_SEL_Y), 299 SRC_SEL_Z(SQ_SEL_Z), 300 SRC_SEL_W(SQ_SEL_W), 301 BURST_COUNT(0), 302 VALID_PIXEL_MODE(0), 303 CF_INST(SQ_CF_INST_EXPORT_DONE), 304 MARK(0), 305 BARRIER(1)); 306 /* 2 */ 307 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 308 TYPE(SQ_EXPORT_PARAM), 309 RW_GPR(0), 310 RW_REL(ABSOLUTE), 311 INDEX_GPR(0), 312 ELEM_SIZE(0)); 313 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 314 SRC_SEL_Y(SQ_SEL_Y), 315 SRC_SEL_Z(SQ_SEL_Z), 316 SRC_SEL_W(SQ_SEL_W), 317 BURST_COUNT(0), 318 VALID_PIXEL_MODE(0), 319 CF_INST(SQ_CF_INST_EXPORT_DONE), 320 MARK(0), 321 BARRIER(0)); 322 /* 3 - end */ 323 shader[i++] = CF_DWORD0(ADDR(0), 324 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 325 shader[i++] = CF_DWORD1(POP_COUNT(0), 326 CF_CONST(0), 327 COND(SQ_CF_COND_ACTIVE), 328 I_COUNT(0), 329 VALID_PIXEL_MODE(0), 330 CF_INST(SQ_CF_INST_END), 331 BARRIER(1)); 332 /* 4/5 */ 333 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 334 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 335 FETCH_WHOLE_QUAD(0), 336 BUFFER_ID(0), 337 SRC_GPR(0), 338 SRC_REL(ABSOLUTE), 339 SRC_SEL_X(SQ_SEL_X), 340 SRC_SEL_Y(SQ_SEL_Y), 341 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 342 LDS_REQ(0), 343 COALESCED_READ(0)); 344 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 345 DST_REL(0), 346 DST_SEL_X(SQ_SEL_X), 347 DST_SEL_Y(SQ_SEL_Y), 348 DST_SEL_Z(SQ_SEL_0), 349 DST_SEL_W(SQ_SEL_1), 350 USE_CONST_FIELDS(0), 351 DATA_FORMAT(FMT_32_32_FLOAT), 352 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 353 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 354 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 355 shader[i++] = VTX_DWORD2(OFFSET(0), 356#if X_BYTE_ORDER == X_BIG_ENDIAN 357 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 358#else 359 ENDIAN_SWAP(ENDIAN_NONE), 360#endif 361 CONST_BUF_NO_STRIDE(0), 362 ALT_CONST(0), 363 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 364 shader[i++] = VTX_DWORD_PAD; 365 /* 6/7 */ 366 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 367 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 368 FETCH_WHOLE_QUAD(0), 369 BUFFER_ID(0), 370 SRC_GPR(0), 371 SRC_REL(ABSOLUTE), 372 SRC_SEL_X(SQ_SEL_X), 373 SRC_SEL_Y(SQ_SEL_Y), 374 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 375 LDS_REQ(0), 376 COALESCED_READ(0)); 377 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 378 DST_REL(0), 379 DST_SEL_X(SQ_SEL_X), 380 DST_SEL_Y(SQ_SEL_Y), 381 DST_SEL_Z(SQ_SEL_0), 382 DST_SEL_W(SQ_SEL_1), 383 USE_CONST_FIELDS(0), 384 DATA_FORMAT(FMT_32_32_FLOAT), 385 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 386 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 387 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 388 shader[i++] = VTX_DWORD2(OFFSET(8), 389#if X_BYTE_ORDER == X_BIG_ENDIAN 390 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 391#else 392 ENDIAN_SWAP(ENDIAN_NONE), 393#endif 394 CONST_BUF_NO_STRIDE(0), 395 ALT_CONST(0), 396 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 397 shader[i++] = VTX_DWORD_PAD; 398 399 return i; 400} 401 402/* copy ps --------------------------------------- */ 403int cayman_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) 404{ 405 int i = 0; 406 407 /* CF INST 0 */ 408 shader[i++] = CF_ALU_DWORD0(ADDR(4), 409 KCACHE_BANK0(0), 410 KCACHE_BANK1(0), 411 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 412 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 413 KCACHE_ADDR0(0), 414 KCACHE_ADDR1(0), 415 I_COUNT(4), 416 ALT_CONST(0), 417 CF_INST(SQ_CF_INST_ALU), 418 WHOLE_QUAD_MODE(0), 419 BARRIER(1)); 420 /* CF INST 1 */ 421 shader[i++] = CF_DWORD0(ADDR(8), 422 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 423 shader[i++] = CF_DWORD1(POP_COUNT(0), 424 CF_CONST(0), 425 COND(SQ_CF_COND_ACTIVE), 426 I_COUNT(1), 427 VALID_PIXEL_MODE(0), 428 CF_INST(SQ_CF_INST_TC), 429 BARRIER(1)); 430 /* CF INST 2 */ 431 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 432 TYPE(SQ_EXPORT_PIXEL), 433 RW_GPR(0), 434 RW_REL(ABSOLUTE), 435 INDEX_GPR(0), 436 ELEM_SIZE(1)); 437 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 438 SRC_SEL_Y(SQ_SEL_Y), 439 SRC_SEL_Z(SQ_SEL_Z), 440 SRC_SEL_W(SQ_SEL_W), 441 BURST_COUNT(1), 442 VALID_PIXEL_MODE(0), 443 CF_INST(SQ_CF_INST_EXPORT_DONE), 444 MARK(0), 445 BARRIER(1)); 446 /* CF INST 3 - end */ 447 shader[i++] = CF_DWORD0(ADDR(0), 448 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 449 shader[i++] = CF_DWORD1(POP_COUNT(0), 450 CF_CONST(0), 451 COND(SQ_CF_COND_ACTIVE), 452 I_COUNT(0), 453 VALID_PIXEL_MODE(0), 454 CF_INST(SQ_CF_INST_END), 455 BARRIER(1)); 456 /* 4 interpolate tex coords */ 457 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 458 SRC0_REL(ABSOLUTE), 459 SRC0_ELEM(ELEM_Y), 460 SRC0_NEG(0), 461 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 462 SRC1_REL(ABSOLUTE), 463 SRC1_ELEM(ELEM_X), 464 SRC1_NEG(0), 465 INDEX_MODE(SQ_INDEX_AR_X), 466 PRED_SEL(SQ_PRED_SEL_OFF), 467 LAST(0)); 468 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 469 SRC1_ABS(0), 470 UPDATE_EXECUTE_MASK(0), 471 UPDATE_PRED(0), 472 WRITE_MASK(1), 473 OMOD(SQ_ALU_OMOD_OFF), 474 ALU_INST(SQ_OP2_INST_INTERP_XY), 475 BANK_SWIZZLE(SQ_ALU_VEC_210), 476 DST_GPR(0), 477 DST_REL(ABSOLUTE), 478 DST_ELEM(ELEM_X), 479 CLAMP(0)); 480 /* 5 */ 481 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 482 SRC0_REL(ABSOLUTE), 483 SRC0_ELEM(ELEM_X), 484 SRC0_NEG(0), 485 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 486 SRC1_REL(ABSOLUTE), 487 SRC1_ELEM(ELEM_X), 488 SRC1_NEG(0), 489 INDEX_MODE(SQ_INDEX_AR_X), 490 PRED_SEL(SQ_PRED_SEL_OFF), 491 LAST(0)); 492 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 493 SRC1_ABS(0), 494 UPDATE_EXECUTE_MASK(0), 495 UPDATE_PRED(0), 496 WRITE_MASK(1), 497 OMOD(SQ_ALU_OMOD_OFF), 498 ALU_INST(SQ_OP2_INST_INTERP_XY), 499 BANK_SWIZZLE(SQ_ALU_VEC_210), 500 DST_GPR(0), 501 DST_REL(ABSOLUTE), 502 DST_ELEM(ELEM_Y), 503 CLAMP(0)); 504 /* 6 */ 505 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 506 SRC0_REL(ABSOLUTE), 507 SRC0_ELEM(ELEM_Y), 508 SRC0_NEG(0), 509 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 510 SRC1_REL(ABSOLUTE), 511 SRC1_ELEM(ELEM_X), 512 SRC1_NEG(0), 513 INDEX_MODE(SQ_INDEX_AR_X), 514 PRED_SEL(SQ_PRED_SEL_OFF), 515 LAST(0)); 516 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 517 SRC1_ABS(0), 518 UPDATE_EXECUTE_MASK(0), 519 UPDATE_PRED(0), 520 WRITE_MASK(0), 521 OMOD(SQ_ALU_OMOD_OFF), 522 ALU_INST(SQ_OP2_INST_INTERP_XY), 523 BANK_SWIZZLE(SQ_ALU_VEC_210), 524 DST_GPR(0), 525 DST_REL(ABSOLUTE), 526 DST_ELEM(ELEM_Z), 527 CLAMP(0)); 528 /* 7 */ 529 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 530 SRC0_REL(ABSOLUTE), 531 SRC0_ELEM(ELEM_X), 532 SRC0_NEG(0), 533 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 534 SRC1_REL(ABSOLUTE), 535 SRC1_ELEM(ELEM_X), 536 SRC1_NEG(0), 537 INDEX_MODE(SQ_INDEX_AR_X), 538 PRED_SEL(SQ_PRED_SEL_OFF), 539 LAST(1)); 540 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 541 SRC1_ABS(0), 542 UPDATE_EXECUTE_MASK(0), 543 UPDATE_PRED(0), 544 WRITE_MASK(0), 545 OMOD(SQ_ALU_OMOD_OFF), 546 ALU_INST(SQ_OP2_INST_INTERP_XY), 547 BANK_SWIZZLE(SQ_ALU_VEC_210), 548 DST_GPR(0), 549 DST_REL(ABSOLUTE), 550 DST_ELEM(ELEM_W), 551 CLAMP(0)); 552 553 /* 8/9 TEX INST 0 */ 554 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 555 INST_MOD(0), 556 FETCH_WHOLE_QUAD(0), 557 RESOURCE_ID(0), 558 SRC_GPR(0), 559 SRC_REL(ABSOLUTE), 560 ALT_CONST(0), 561 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 562 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 563 shader[i++] = TEX_DWORD1(DST_GPR(0), 564 DST_REL(ABSOLUTE), 565 DST_SEL_X(SQ_SEL_X), /* R */ 566 DST_SEL_Y(SQ_SEL_Y), /* G */ 567 DST_SEL_Z(SQ_SEL_Z), /* B */ 568 DST_SEL_W(SQ_SEL_W), /* A */ 569 LOD_BIAS(0), 570 COORD_TYPE_X(TEX_UNNORMALIZED), 571 COORD_TYPE_Y(TEX_UNNORMALIZED), 572 COORD_TYPE_Z(TEX_UNNORMALIZED), 573 COORD_TYPE_W(TEX_UNNORMALIZED)); 574 shader[i++] = TEX_DWORD2(OFFSET_X(0), 575 OFFSET_Y(0), 576 OFFSET_Z(0), 577 SAMPLER_ID(0), 578 SRC_SEL_X(SQ_SEL_X), 579 SRC_SEL_Y(SQ_SEL_Y), 580 SRC_SEL_Z(SQ_SEL_0), 581 SRC_SEL_W(SQ_SEL_1)); 582 shader[i++] = TEX_DWORD_PAD; 583 584 return i; 585} 586 587int cayman_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) 588{ 589 int i = 0; 590 591 /* 0 */ 592 shader[i++] = CF_DWORD0(ADDR(8), 593 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 594 shader[i++] = CF_DWORD1(POP_COUNT(0), 595 CF_CONST(0), 596 COND(SQ_CF_COND_ACTIVE), 597 I_COUNT(2), 598 VALID_PIXEL_MODE(0), 599 CF_INST(SQ_CF_INST_TC), 600 BARRIER(1)); 601 602 /* 1 - ALU */ 603 shader[i++] = CF_ALU_DWORD0(ADDR(5), 604 KCACHE_BANK0(0), 605 KCACHE_BANK1(0), 606 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 607 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 608 KCACHE_ADDR0(0), 609 KCACHE_ADDR1(0), 610 I_COUNT(2), 611 ALT_CONST(0), 612 CF_INST(SQ_CF_INST_ALU), 613 WHOLE_QUAD_MODE(0), 614 BARRIER(1)); 615 616 /* 2 */ 617 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 618 TYPE(SQ_EXPORT_POS), 619 RW_GPR(1), 620 RW_REL(ABSOLUTE), 621 INDEX_GPR(0), 622 ELEM_SIZE(3)); 623 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 624 SRC_SEL_Y(SQ_SEL_Y), 625 SRC_SEL_Z(SQ_SEL_Z), 626 SRC_SEL_W(SQ_SEL_W), 627 BURST_COUNT(1), 628 VALID_PIXEL_MODE(0), 629 CF_INST(SQ_CF_INST_EXPORT_DONE), 630 MARK(0), 631 BARRIER(1)); 632 /* 3 */ 633 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 634 TYPE(SQ_EXPORT_PARAM), 635 RW_GPR(0), 636 RW_REL(ABSOLUTE), 637 INDEX_GPR(0), 638 ELEM_SIZE(3)); 639 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 640 SRC_SEL_Y(SQ_SEL_Y), 641 SRC_SEL_Z(SQ_SEL_Z), 642 SRC_SEL_W(SQ_SEL_W), 643 BURST_COUNT(1), 644 VALID_PIXEL_MODE(0), 645 CF_INST(SQ_CF_INST_EXPORT_DONE), 646 MARK(0), 647 BARRIER(0)); 648 /* 4 - end */ 649 shader[i++] = CF_DWORD0(ADDR(0), 650 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 651 shader[i++] = CF_DWORD1(POP_COUNT(0), 652 CF_CONST(0), 653 COND(SQ_CF_COND_ACTIVE), 654 I_COUNT(0), 655 VALID_PIXEL_MODE(0), 656 CF_INST(SQ_CF_INST_END), 657 BARRIER(1)); 658 /* 5 texX / w */ 659 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 660 SRC0_REL(ABSOLUTE), 661 SRC0_ELEM(ELEM_X), 662 SRC0_NEG(0), 663 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 664 SRC1_REL(ABSOLUTE), 665 SRC1_ELEM(ELEM_X), 666 SRC1_NEG(0), 667 INDEX_MODE(SQ_INDEX_AR_X), 668 PRED_SEL(SQ_PRED_SEL_OFF), 669 LAST(0)); 670 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 671 SRC1_ABS(0), 672 UPDATE_EXECUTE_MASK(0), 673 UPDATE_PRED(0), 674 WRITE_MASK(1), 675 OMOD(SQ_ALU_OMOD_OFF), 676 ALU_INST(SQ_OP2_INST_MUL), 677 BANK_SWIZZLE(SQ_ALU_VEC_012), 678 DST_GPR(0), 679 DST_REL(ABSOLUTE), 680 DST_ELEM(ELEM_X), 681 CLAMP(0)); 682 683 /* 6 texY / h */ 684 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 685 SRC0_REL(ABSOLUTE), 686 SRC0_ELEM(ELEM_Y), 687 SRC0_NEG(0), 688 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 689 SRC1_REL(ABSOLUTE), 690 SRC1_ELEM(ELEM_Y), 691 SRC1_NEG(0), 692 INDEX_MODE(SQ_INDEX_AR_X), 693 PRED_SEL(SQ_PRED_SEL_OFF), 694 LAST(1)); 695 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 696 SRC1_ABS(0), 697 UPDATE_EXECUTE_MASK(0), 698 UPDATE_PRED(0), 699 WRITE_MASK(1), 700 OMOD(SQ_ALU_OMOD_OFF), 701 ALU_INST(SQ_OP2_INST_MUL), 702 BANK_SWIZZLE(SQ_ALU_VEC_012), 703 DST_GPR(0), 704 DST_REL(ABSOLUTE), 705 DST_ELEM(ELEM_Y), 706 CLAMP(0)); 707 708 /* 7 - padding */ 709 shader[i++] = 0x00000000; 710 shader[i++] = 0x00000000; 711 /* 8/9 */ 712 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 713 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 714 FETCH_WHOLE_QUAD(0), 715 BUFFER_ID(0), 716 SRC_GPR(0), 717 SRC_REL(ABSOLUTE), 718 SRC_SEL_X(SQ_SEL_X), 719 SRC_SEL_Y(SQ_SEL_Y), 720 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 721 LDS_REQ(0), 722 COALESCED_READ(0)); 723 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 724 DST_REL(ABSOLUTE), 725 DST_SEL_X(SQ_SEL_X), 726 DST_SEL_Y(SQ_SEL_Y), 727 DST_SEL_Z(SQ_SEL_0), 728 DST_SEL_W(SQ_SEL_1), 729 USE_CONST_FIELDS(0), 730 DATA_FORMAT(FMT_32_32_FLOAT), 731 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 732 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 733 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 734 shader[i++] = VTX_DWORD2(OFFSET(0), 735#if X_BYTE_ORDER == X_BIG_ENDIAN 736 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 737#else 738 ENDIAN_SWAP(ENDIAN_NONE), 739#endif 740 CONST_BUF_NO_STRIDE(0), 741 ALT_CONST(0), 742 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 743 shader[i++] = VTX_DWORD_PAD; 744 /* 10/11 */ 745 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 746 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 747 FETCH_WHOLE_QUAD(0), 748 BUFFER_ID(0), 749 SRC_GPR(0), 750 SRC_REL(ABSOLUTE), 751 SRC_SEL_X(SQ_SEL_X), 752 SRC_SEL_Y(SQ_SEL_Y), 753 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 754 LDS_REQ(0), 755 COALESCED_READ(0)); 756 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 757 DST_REL(ABSOLUTE), 758 DST_SEL_X(SQ_SEL_X), 759 DST_SEL_Y(SQ_SEL_Y), 760 DST_SEL_Z(SQ_SEL_0), 761 DST_SEL_W(SQ_SEL_1), 762 USE_CONST_FIELDS(0), 763 DATA_FORMAT(FMT_32_32_FLOAT), 764 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 765 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 766 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 767 shader[i++] = VTX_DWORD2(OFFSET(8), 768#if X_BYTE_ORDER == X_BIG_ENDIAN 769 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 770#else 771 ENDIAN_SWAP(ENDIAN_NONE), 772#endif 773 CONST_BUF_NO_STRIDE(0), 774 ALT_CONST(0), 775 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 776 shader[i++] = VTX_DWORD_PAD; 777 778 return i; 779} 780 781int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) 782{ 783 int i = 0; 784 785 /* 0 */ 786 shader[i++] = CF_ALU_DWORD0(ADDR(6), 787 KCACHE_BANK0(0), 788 KCACHE_BANK1(0), 789 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 790 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 791 KCACHE_ADDR0(0), 792 KCACHE_ADDR1(0), 793 I_COUNT(4), 794 ALT_CONST(0), 795 CF_INST(SQ_CF_INST_ALU), 796 WHOLE_QUAD_MODE(0), 797 BARRIER(1)); 798 /* 1 */ 799 shader[i++] = CF_DWORD0(ADDR(22), 800 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 801 shader[i++] = CF_DWORD1(POP_COUNT(0), 802 CF_CONST(0), 803 COND(SQ_CF_COND_BOOL), 804 I_COUNT(0), 805 VALID_PIXEL_MODE(0), 806 CF_INST(SQ_CF_INST_CALL), 807 BARRIER(0)); 808 /* 2 */ 809 shader[i++] = CF_DWORD0(ADDR(30), 810 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 811 shader[i++] = CF_DWORD1(POP_COUNT(0), 812 CF_CONST(0), 813 COND(SQ_CF_COND_NOT_BOOL), 814 I_COUNT(0), 815 VALID_PIXEL_MODE(0), 816 CF_INST(SQ_CF_INST_CALL), 817 BARRIER(0)); 818 /* 3 */ 819 shader[i++] = CF_ALU_DWORD0(ADDR(10), 820 KCACHE_BANK0(0), 821 KCACHE_BANK1(0), 822 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 823 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 824 KCACHE_ADDR0(0), 825 KCACHE_ADDR1(0), 826 I_COUNT(12), 827 ALT_CONST(0), 828 CF_INST(SQ_CF_INST_ALU), 829 WHOLE_QUAD_MODE(0), 830 BARRIER(1)); 831 /* 4 */ 832 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 833 TYPE(SQ_EXPORT_PIXEL), 834 RW_GPR(2), 835 RW_REL(ABSOLUTE), 836 INDEX_GPR(0), 837 ELEM_SIZE(3)); 838 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 839 SRC_SEL_Y(SQ_SEL_Y), 840 SRC_SEL_Z(SQ_SEL_Z), 841 SRC_SEL_W(SQ_SEL_W), 842 BURST_COUNT(1), 843 VALID_PIXEL_MODE(0), 844 CF_INST(SQ_CF_INST_EXPORT_DONE), 845 MARK(0), 846 BARRIER(1)); 847 /* 5 - end */ 848 shader[i++] = CF_DWORD0(ADDR(0), 849 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 850 shader[i++] = CF_DWORD1(POP_COUNT(0), 851 CF_CONST(0), 852 COND(SQ_CF_COND_ACTIVE), 853 I_COUNT(0), 854 VALID_PIXEL_MODE(0), 855 CF_INST(SQ_CF_INST_END), 856 BARRIER(1)); 857 /* 6 interpolate tex coords */ 858 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 859 SRC0_REL(ABSOLUTE), 860 SRC0_ELEM(ELEM_Y), 861 SRC0_NEG(0), 862 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 863 SRC1_REL(ABSOLUTE), 864 SRC1_ELEM(ELEM_X), 865 SRC1_NEG(0), 866 INDEX_MODE(SQ_INDEX_AR_X), 867 PRED_SEL(SQ_PRED_SEL_OFF), 868 LAST(0)); 869 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 870 SRC1_ABS(0), 871 UPDATE_EXECUTE_MASK(0), 872 UPDATE_PRED(0), 873 WRITE_MASK(1), 874 OMOD(SQ_ALU_OMOD_OFF), 875 ALU_INST(SQ_OP2_INST_INTERP_XY), 876 BANK_SWIZZLE(SQ_ALU_VEC_210), 877 DST_GPR(0), 878 DST_REL(ABSOLUTE), 879 DST_ELEM(ELEM_X), 880 CLAMP(0)); 881 /* 7 */ 882 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 883 SRC0_REL(ABSOLUTE), 884 SRC0_ELEM(ELEM_X), 885 SRC0_NEG(0), 886 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 887 SRC1_REL(ABSOLUTE), 888 SRC1_ELEM(ELEM_X), 889 SRC1_NEG(0), 890 INDEX_MODE(SQ_INDEX_AR_X), 891 PRED_SEL(SQ_PRED_SEL_OFF), 892 LAST(0)); 893 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 894 SRC1_ABS(0), 895 UPDATE_EXECUTE_MASK(0), 896 UPDATE_PRED(0), 897 WRITE_MASK(1), 898 OMOD(SQ_ALU_OMOD_OFF), 899 ALU_INST(SQ_OP2_INST_INTERP_XY), 900 BANK_SWIZZLE(SQ_ALU_VEC_210), 901 DST_GPR(0), 902 DST_REL(ABSOLUTE), 903 DST_ELEM(ELEM_Y), 904 CLAMP(0)); 905 /* 8 */ 906 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 907 SRC0_REL(ABSOLUTE), 908 SRC0_ELEM(ELEM_Y), 909 SRC0_NEG(0), 910 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 911 SRC1_REL(ABSOLUTE), 912 SRC1_ELEM(ELEM_X), 913 SRC1_NEG(0), 914 INDEX_MODE(SQ_INDEX_AR_X), 915 PRED_SEL(SQ_PRED_SEL_OFF), 916 LAST(0)); 917 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 918 SRC1_ABS(0), 919 UPDATE_EXECUTE_MASK(0), 920 UPDATE_PRED(0), 921 WRITE_MASK(0), 922 OMOD(SQ_ALU_OMOD_OFF), 923 ALU_INST(SQ_OP2_INST_INTERP_XY), 924 BANK_SWIZZLE(SQ_ALU_VEC_210), 925 DST_GPR(0), 926 DST_REL(ABSOLUTE), 927 DST_ELEM(ELEM_Z), 928 CLAMP(0)); 929 /* 9 */ 930 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 931 SRC0_REL(ABSOLUTE), 932 SRC0_ELEM(ELEM_X), 933 SRC0_NEG(0), 934 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 935 SRC1_REL(ABSOLUTE), 936 SRC1_ELEM(ELEM_X), 937 SRC1_NEG(0), 938 INDEX_MODE(SQ_INDEX_AR_X), 939 PRED_SEL(SQ_PRED_SEL_OFF), 940 LAST(1)); 941 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 942 SRC1_ABS(0), 943 UPDATE_EXECUTE_MASK(0), 944 UPDATE_PRED(0), 945 WRITE_MASK(0), 946 OMOD(SQ_ALU_OMOD_OFF), 947 ALU_INST(SQ_OP2_INST_INTERP_XY), 948 BANK_SWIZZLE(SQ_ALU_VEC_210), 949 DST_GPR(0), 950 DST_REL(ABSOLUTE), 951 DST_ELEM(ELEM_W), 952 CLAMP(0)); 953 954 /* 10,11,12,13 */ 955 /* r2.x = MAD(c0.w, r1.x, c0.x) */ 956 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 957 SRC0_REL(ABSOLUTE), 958 SRC0_ELEM(ELEM_W), 959 SRC0_NEG(0), 960 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 961 SRC1_REL(ABSOLUTE), 962 SRC1_ELEM(ELEM_X), 963 SRC1_NEG(0), 964 INDEX_MODE(SQ_INDEX_LOOP), 965 PRED_SEL(SQ_PRED_SEL_OFF), 966 LAST(0)); 967 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 968 SRC2_REL(ABSOLUTE), 969 SRC2_ELEM(ELEM_X), 970 SRC2_NEG(0), 971 ALU_INST(SQ_OP3_INST_MULADD), 972 BANK_SWIZZLE(SQ_ALU_VEC_012), 973 DST_GPR(2), 974 DST_REL(ABSOLUTE), 975 DST_ELEM(ELEM_X), 976 CLAMP(0)); 977 /* r2.y = MAD(c0.w, r1.x, c0.y) */ 978 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 979 SRC0_REL(ABSOLUTE), 980 SRC0_ELEM(ELEM_W), 981 SRC0_NEG(0), 982 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 983 SRC1_REL(ABSOLUTE), 984 SRC1_ELEM(ELEM_X), 985 SRC1_NEG(0), 986 INDEX_MODE(SQ_INDEX_LOOP), 987 PRED_SEL(SQ_PRED_SEL_OFF), 988 LAST(0)); 989 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 990 SRC2_REL(ABSOLUTE), 991 SRC2_ELEM(ELEM_Y), 992 SRC2_NEG(0), 993 ALU_INST(SQ_OP3_INST_MULADD), 994 BANK_SWIZZLE(SQ_ALU_VEC_012), 995 DST_GPR(2), 996 DST_REL(ABSOLUTE), 997 DST_ELEM(ELEM_Y), 998 CLAMP(0)); 999 /* r2.z = MAD(c0.w, r1.x, c0.z) */ 1000 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 1001 SRC0_REL(ABSOLUTE), 1002 SRC0_ELEM(ELEM_W), 1003 SRC0_NEG(0), 1004 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1005 SRC1_REL(ABSOLUTE), 1006 SRC1_ELEM(ELEM_X), 1007 SRC1_NEG(0), 1008 INDEX_MODE(SQ_INDEX_LOOP), 1009 PRED_SEL(SQ_PRED_SEL_OFF), 1010 LAST(0)); 1011 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 1012 SRC2_REL(ABSOLUTE), 1013 SRC2_ELEM(ELEM_Z), 1014 SRC2_NEG(0), 1015 ALU_INST(SQ_OP3_INST_MULADD), 1016 BANK_SWIZZLE(SQ_ALU_VEC_012), 1017 DST_GPR(2), 1018 DST_REL(ABSOLUTE), 1019 DST_ELEM(ELEM_Z), 1020 CLAMP(0)); 1021 /* r2.w = MAD(0, 0, 1) */ 1022 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1023 SRC0_REL(ABSOLUTE), 1024 SRC0_ELEM(ELEM_X), 1025 SRC0_NEG(0), 1026 SRC1_SEL(SQ_ALU_SRC_0), 1027 SRC1_REL(ABSOLUTE), 1028 SRC1_ELEM(ELEM_X), 1029 SRC1_NEG(0), 1030 INDEX_MODE(SQ_INDEX_LOOP), 1031 PRED_SEL(SQ_PRED_SEL_OFF), 1032 LAST(1)); 1033 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1034 SRC2_REL(ABSOLUTE), 1035 SRC2_ELEM(ELEM_X), 1036 SRC2_NEG(0), 1037 ALU_INST(SQ_OP3_INST_MULADD), 1038 BANK_SWIZZLE(SQ_ALU_VEC_012), 1039 DST_GPR(2), 1040 DST_REL(ABSOLUTE), 1041 DST_ELEM(ELEM_W), 1042 CLAMP(0)); 1043 1044 /* 14,15,16,17 */ 1045 /* r2.x = MAD(c1.x, r1.y, pv.x) */ 1046 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1047 SRC0_REL(ABSOLUTE), 1048 SRC0_ELEM(ELEM_X), 1049 SRC0_NEG(0), 1050 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1051 SRC1_REL(ABSOLUTE), 1052 SRC1_ELEM(ELEM_Y), 1053 SRC1_NEG(0), 1054 INDEX_MODE(SQ_INDEX_LOOP), 1055 PRED_SEL(SQ_PRED_SEL_OFF), 1056 LAST(0)); 1057 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1058 SRC2_REL(ABSOLUTE), 1059 SRC2_ELEM(ELEM_X), 1060 SRC2_NEG(0), 1061 ALU_INST(SQ_OP3_INST_MULADD), 1062 BANK_SWIZZLE(SQ_ALU_VEC_012), 1063 DST_GPR(2), 1064 DST_REL(ABSOLUTE), 1065 DST_ELEM(ELEM_X), 1066 CLAMP(0)); 1067 /* r2.y = MAD(c1.y, r1.y, pv.y) */ 1068 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1069 SRC0_REL(ABSOLUTE), 1070 SRC0_ELEM(ELEM_Y), 1071 SRC0_NEG(0), 1072 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1073 SRC1_REL(ABSOLUTE), 1074 SRC1_ELEM(ELEM_Y), 1075 SRC1_NEG(0), 1076 INDEX_MODE(SQ_INDEX_LOOP), 1077 PRED_SEL(SQ_PRED_SEL_OFF), 1078 LAST(0)); 1079 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1080 SRC2_REL(ABSOLUTE), 1081 SRC2_ELEM(ELEM_Y), 1082 SRC2_NEG(0), 1083 ALU_INST(SQ_OP3_INST_MULADD), 1084 BANK_SWIZZLE(SQ_ALU_VEC_012), 1085 DST_GPR(2), 1086 DST_REL(ABSOLUTE), 1087 DST_ELEM(ELEM_Y), 1088 CLAMP(0)); 1089 /* r2.z = MAD(c1.z, r1.y, pv.z) */ 1090 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1091 SRC0_REL(ABSOLUTE), 1092 SRC0_ELEM(ELEM_Z), 1093 SRC0_NEG(0), 1094 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1095 SRC1_REL(ABSOLUTE), 1096 SRC1_ELEM(ELEM_Y), 1097 SRC1_NEG(0), 1098 INDEX_MODE(SQ_INDEX_LOOP), 1099 PRED_SEL(SQ_PRED_SEL_OFF), 1100 LAST(0)); 1101 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1102 SRC2_REL(ABSOLUTE), 1103 SRC2_ELEM(ELEM_Z), 1104 SRC2_NEG(0), 1105 ALU_INST(SQ_OP3_INST_MULADD), 1106 BANK_SWIZZLE(SQ_ALU_VEC_012), 1107 DST_GPR(2), 1108 DST_REL(ABSOLUTE), 1109 DST_ELEM(ELEM_Z), 1110 CLAMP(0)); 1111 /* r2.w = MAD(0, 0, 1) */ 1112 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1113 SRC0_REL(ABSOLUTE), 1114 SRC0_ELEM(ELEM_X), 1115 SRC0_NEG(0), 1116 SRC1_SEL(SQ_ALU_SRC_0), 1117 SRC1_REL(ABSOLUTE), 1118 SRC1_ELEM(ELEM_X), 1119 SRC1_NEG(0), 1120 INDEX_MODE(SQ_INDEX_LOOP), 1121 PRED_SEL(SQ_PRED_SEL_OFF), 1122 LAST(1)); 1123 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1124 SRC2_REL(ABSOLUTE), 1125 SRC2_ELEM(ELEM_W), 1126 SRC2_NEG(0), 1127 ALU_INST(SQ_OP3_INST_MULADD), 1128 BANK_SWIZZLE(SQ_ALU_VEC_012), 1129 DST_GPR(2), 1130 DST_REL(ABSOLUTE), 1131 DST_ELEM(ELEM_W), 1132 CLAMP(0)); 1133 /* 18,19,20,21 */ 1134 /* r2.x = MAD(c2.x, r1.z, pv.x) */ 1135 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1136 SRC0_REL(ABSOLUTE), 1137 SRC0_ELEM(ELEM_X), 1138 SRC0_NEG(0), 1139 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1140 SRC1_REL(ABSOLUTE), 1141 SRC1_ELEM(ELEM_Z), 1142 SRC1_NEG(0), 1143 INDEX_MODE(SQ_INDEX_LOOP), 1144 PRED_SEL(SQ_PRED_SEL_OFF), 1145 LAST(0)); 1146 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1147 SRC2_REL(ABSOLUTE), 1148 SRC2_ELEM(ELEM_X), 1149 SRC2_NEG(0), 1150 ALU_INST(SQ_OP3_INST_MULADD), 1151 BANK_SWIZZLE(SQ_ALU_VEC_012), 1152 DST_GPR(2), 1153 DST_REL(ABSOLUTE), 1154 DST_ELEM(ELEM_X), 1155 CLAMP(1)); 1156 /* r2.y = MAD(c2.y, r1.z, pv.y) */ 1157 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1158 SRC0_REL(ABSOLUTE), 1159 SRC0_ELEM(ELEM_Y), 1160 SRC0_NEG(0), 1161 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1162 SRC1_REL(ABSOLUTE), 1163 SRC1_ELEM(ELEM_Z), 1164 SRC1_NEG(0), 1165 INDEX_MODE(SQ_INDEX_LOOP), 1166 PRED_SEL(SQ_PRED_SEL_OFF), 1167 LAST(0)); 1168 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1169 SRC2_REL(ABSOLUTE), 1170 SRC2_ELEM(ELEM_Y), 1171 SRC2_NEG(0), 1172 ALU_INST(SQ_OP3_INST_MULADD), 1173 BANK_SWIZZLE(SQ_ALU_VEC_012), 1174 DST_GPR(2), 1175 DST_REL(ABSOLUTE), 1176 DST_ELEM(ELEM_Y), 1177 CLAMP(1)); 1178 /* r2.z = MAD(c2.z, r1.z, pv.z) */ 1179 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1180 SRC0_REL(ABSOLUTE), 1181 SRC0_ELEM(ELEM_Z), 1182 SRC0_NEG(0), 1183 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1184 SRC1_REL(ABSOLUTE), 1185 SRC1_ELEM(ELEM_Z), 1186 SRC1_NEG(0), 1187 INDEX_MODE(SQ_INDEX_LOOP), 1188 PRED_SEL(SQ_PRED_SEL_OFF), 1189 LAST(0)); 1190 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1191 SRC2_REL(ABSOLUTE), 1192 SRC2_ELEM(ELEM_Z), 1193 SRC2_NEG(0), 1194 ALU_INST(SQ_OP3_INST_MULADD), 1195 BANK_SWIZZLE(SQ_ALU_VEC_012), 1196 DST_GPR(2), 1197 DST_REL(ABSOLUTE), 1198 DST_ELEM(ELEM_Z), 1199 CLAMP(1)); 1200 /* r2.w = MAD(0, 0, 1) */ 1201 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1202 SRC0_REL(ABSOLUTE), 1203 SRC0_ELEM(ELEM_X), 1204 SRC0_NEG(0), 1205 SRC1_SEL(SQ_ALU_SRC_0), 1206 SRC1_REL(ABSOLUTE), 1207 SRC1_ELEM(ELEM_X), 1208 SRC1_NEG(0), 1209 INDEX_MODE(SQ_INDEX_LOOP), 1210 PRED_SEL(SQ_PRED_SEL_OFF), 1211 LAST(1)); 1212 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1213 SRC2_REL(ABSOLUTE), 1214 SRC2_ELEM(ELEM_X), 1215 SRC2_NEG(0), 1216 ALU_INST(SQ_OP3_INST_MULADD), 1217 BANK_SWIZZLE(SQ_ALU_VEC_012), 1218 DST_GPR(2), 1219 DST_REL(ABSOLUTE), 1220 DST_ELEM(ELEM_W), 1221 CLAMP(1)); 1222 1223 /* 22 */ 1224 shader[i++] = CF_DWORD0(ADDR(24), 1225 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1226 shader[i++] = CF_DWORD1(POP_COUNT(0), 1227 CF_CONST(0), 1228 COND(SQ_CF_COND_ACTIVE), 1229 I_COUNT(3), 1230 VALID_PIXEL_MODE(0), 1231 CF_INST(SQ_CF_INST_TC), 1232 BARRIER(1)); 1233 /* 23 */ 1234 shader[i++] = CF_DWORD0(ADDR(0), 1235 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1236 shader[i++] = CF_DWORD1(POP_COUNT(0), 1237 CF_CONST(0), 1238 COND(SQ_CF_COND_ACTIVE), 1239 I_COUNT(0), 1240 VALID_PIXEL_MODE(0), 1241 CF_INST(SQ_CF_INST_RETURN), 1242 BARRIER(1)); 1243 /* 24/25 */ 1244 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1245 INST_MOD(0), 1246 FETCH_WHOLE_QUAD(0), 1247 RESOURCE_ID(0), 1248 SRC_GPR(0), 1249 SRC_REL(ABSOLUTE), 1250 ALT_CONST(0), 1251 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1252 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1253 shader[i++] = TEX_DWORD1(DST_GPR(1), 1254 DST_REL(ABSOLUTE), 1255 DST_SEL_X(SQ_SEL_X), 1256 DST_SEL_Y(SQ_SEL_MASK), 1257 DST_SEL_Z(SQ_SEL_MASK), 1258 DST_SEL_W(SQ_SEL_1), 1259 LOD_BIAS(0), 1260 COORD_TYPE_X(TEX_NORMALIZED), 1261 COORD_TYPE_Y(TEX_NORMALIZED), 1262 COORD_TYPE_Z(TEX_NORMALIZED), 1263 COORD_TYPE_W(TEX_NORMALIZED)); 1264 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1265 OFFSET_Y(0), 1266 OFFSET_Z(0), 1267 SAMPLER_ID(0), 1268 SRC_SEL_X(SQ_SEL_X), 1269 SRC_SEL_Y(SQ_SEL_Y), 1270 SRC_SEL_Z(SQ_SEL_0), 1271 SRC_SEL_W(SQ_SEL_1)); 1272 shader[i++] = TEX_DWORD_PAD; 1273 /* 26/27 */ 1274 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1275 INST_MOD(0), 1276 FETCH_WHOLE_QUAD(0), 1277 RESOURCE_ID(1), 1278 SRC_GPR(0), 1279 SRC_REL(ABSOLUTE), 1280 ALT_CONST(0), 1281 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1282 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1283 shader[i++] = TEX_DWORD1(DST_GPR(1), 1284 DST_REL(ABSOLUTE), 1285 DST_SEL_X(SQ_SEL_MASK), 1286 DST_SEL_Y(SQ_SEL_MASK), 1287 DST_SEL_Z(SQ_SEL_X), 1288 DST_SEL_W(SQ_SEL_MASK), 1289 LOD_BIAS(0), 1290 COORD_TYPE_X(TEX_NORMALIZED), 1291 COORD_TYPE_Y(TEX_NORMALIZED), 1292 COORD_TYPE_Z(TEX_NORMALIZED), 1293 COORD_TYPE_W(TEX_NORMALIZED)); 1294 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1295 OFFSET_Y(0), 1296 OFFSET_Z(0), 1297 SAMPLER_ID(1), 1298 SRC_SEL_X(SQ_SEL_X), 1299 SRC_SEL_Y(SQ_SEL_Y), 1300 SRC_SEL_Z(SQ_SEL_0), 1301 SRC_SEL_W(SQ_SEL_1)); 1302 shader[i++] = TEX_DWORD_PAD; 1303 /* 28/29 */ 1304 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1305 INST_MOD(0), 1306 FETCH_WHOLE_QUAD(0), 1307 RESOURCE_ID(2), 1308 SRC_GPR(0), 1309 SRC_REL(ABSOLUTE), 1310 ALT_CONST(0), 1311 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1312 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1313 shader[i++] = TEX_DWORD1(DST_GPR(1), 1314 DST_REL(ABSOLUTE), 1315 DST_SEL_X(SQ_SEL_MASK), 1316 DST_SEL_Y(SQ_SEL_X), 1317 DST_SEL_Z(SQ_SEL_MASK), 1318 DST_SEL_W(SQ_SEL_MASK), 1319 LOD_BIAS(0), 1320 COORD_TYPE_X(TEX_NORMALIZED), 1321 COORD_TYPE_Y(TEX_NORMALIZED), 1322 COORD_TYPE_Z(TEX_NORMALIZED), 1323 COORD_TYPE_W(TEX_NORMALIZED)); 1324 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1325 OFFSET_Y(0), 1326 OFFSET_Z(0), 1327 SAMPLER_ID(2), 1328 SRC_SEL_X(SQ_SEL_X), 1329 SRC_SEL_Y(SQ_SEL_Y), 1330 SRC_SEL_Z(SQ_SEL_0), 1331 SRC_SEL_W(SQ_SEL_1)); 1332 shader[i++] = TEX_DWORD_PAD; 1333 /* 30 */ 1334 shader[i++] = CF_DWORD0(ADDR(32), 1335 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1336 shader[i++] = CF_DWORD1(POP_COUNT(0), 1337 CF_CONST(0), 1338 COND(SQ_CF_COND_ACTIVE), 1339 I_COUNT(1), 1340 VALID_PIXEL_MODE(0), 1341 CF_INST(SQ_CF_INST_TC), 1342 BARRIER(1)); 1343 /* 31 */ 1344 shader[i++] = CF_DWORD0(ADDR(0), 1345 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1346 shader[i++] = CF_DWORD1(POP_COUNT(0), 1347 CF_CONST(0), 1348 COND(SQ_CF_COND_ACTIVE), 1349 I_COUNT(0), 1350 VALID_PIXEL_MODE(0), 1351 CF_INST(SQ_CF_INST_RETURN), 1352 BARRIER(1)); 1353 /* 32/33 */ 1354 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1355 INST_MOD(0), 1356 FETCH_WHOLE_QUAD(0), 1357 RESOURCE_ID(0), 1358 SRC_GPR(0), 1359 SRC_REL(ABSOLUTE), 1360 ALT_CONST(0), 1361 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1362 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1363 shader[i++] = TEX_DWORD1(DST_GPR(1), 1364 DST_REL(ABSOLUTE), 1365 DST_SEL_X(SQ_SEL_X), 1366 DST_SEL_Y(SQ_SEL_Y), 1367 DST_SEL_Z(SQ_SEL_Z), 1368 DST_SEL_W(SQ_SEL_1), 1369 LOD_BIAS(0), 1370 COORD_TYPE_X(TEX_NORMALIZED), 1371 COORD_TYPE_Y(TEX_NORMALIZED), 1372 COORD_TYPE_Z(TEX_NORMALIZED), 1373 COORD_TYPE_W(TEX_NORMALIZED)); 1374 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1375 OFFSET_Y(0), 1376 OFFSET_Z(0), 1377 SAMPLER_ID(0), 1378 SRC_SEL_X(SQ_SEL_X), 1379 SRC_SEL_Y(SQ_SEL_Y), 1380 SRC_SEL_Z(SQ_SEL_0), 1381 SRC_SEL_W(SQ_SEL_1)); 1382 shader[i++] = TEX_DWORD_PAD; 1383 1384 return i; 1385} 1386 1387/* comp vs --------------------------------------- */ 1388int cayman_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) 1389{ 1390 int i = 0; 1391 1392 /* 0 */ 1393 shader[i++] = CF_DWORD0(ADDR(3), 1394 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1395 shader[i++] = CF_DWORD1(POP_COUNT(0), 1396 CF_CONST(0), 1397 COND(SQ_CF_COND_BOOL), 1398 I_COUNT(0), 1399 VALID_PIXEL_MODE(0), 1400 CF_INST(SQ_CF_INST_CALL), 1401 BARRIER(0)); 1402 /* 1 */ 1403 shader[i++] = CF_DWORD0(ADDR(9), 1404 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1405 shader[i++] = CF_DWORD1(POP_COUNT(0), 1406 CF_CONST(0), 1407 COND(SQ_CF_COND_NOT_BOOL), 1408 I_COUNT(0), 1409 VALID_PIXEL_MODE(0), 1410 CF_INST(SQ_CF_INST_CALL), 1411 BARRIER(0)); 1412 /* 2 - end */ 1413 shader[i++] = CF_DWORD0(ADDR(0), 1414 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1415 shader[i++] = CF_DWORD1(POP_COUNT(0), 1416 CF_CONST(0), 1417 COND(SQ_CF_COND_ACTIVE), 1418 I_COUNT(0), 1419 VALID_PIXEL_MODE(0), 1420 CF_INST(SQ_CF_INST_END), 1421 BARRIER(1)); 1422 /* 3 - mask sub */ 1423 shader[i++] = CF_DWORD0(ADDR(44), 1424 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1425 shader[i++] = CF_DWORD1(POP_COUNT(0), 1426 CF_CONST(0), 1427 COND(SQ_CF_COND_ACTIVE), 1428 I_COUNT(3), 1429 VALID_PIXEL_MODE(0), 1430 CF_INST(SQ_CF_INST_TC), 1431 BARRIER(1)); 1432 1433 /* 4 - ALU */ 1434 shader[i++] = CF_ALU_DWORD0(ADDR(14), 1435 KCACHE_BANK0(0), 1436 KCACHE_BANK1(0), 1437 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 1438 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1439 KCACHE_ADDR0(0), 1440 KCACHE_ADDR1(0), 1441 I_COUNT(20), 1442 ALT_CONST(0), 1443 CF_INST(SQ_CF_INST_ALU), 1444 WHOLE_QUAD_MODE(0), 1445 BARRIER(1)); 1446 1447 /* 5 - dst */ 1448 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1449 TYPE(SQ_EXPORT_POS), 1450 RW_GPR(2), 1451 RW_REL(ABSOLUTE), 1452 INDEX_GPR(0), 1453 ELEM_SIZE(0)); 1454 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1455 SRC_SEL_Y(SQ_SEL_Y), 1456 SRC_SEL_Z(SQ_SEL_0), 1457 SRC_SEL_W(SQ_SEL_1), 1458 BURST_COUNT(1), 1459 VALID_PIXEL_MODE(0), 1460 CF_INST(SQ_CF_INST_EXPORT_DONE), 1461 MARK(0), 1462 BARRIER(1)); 1463 /* 6 - src */ 1464 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1465 TYPE(SQ_EXPORT_PARAM), 1466 RW_GPR(1), 1467 RW_REL(ABSOLUTE), 1468 INDEX_GPR(0), 1469 ELEM_SIZE(0)); 1470 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1471 SRC_SEL_Y(SQ_SEL_Y), 1472 SRC_SEL_Z(SQ_SEL_0), 1473 SRC_SEL_W(SQ_SEL_1), 1474 BURST_COUNT(1), 1475 VALID_PIXEL_MODE(0), 1476 CF_INST(SQ_CF_INST_EXPORT), 1477 MARK(0), 1478 BARRIER(0)); 1479 /* 7 - mask */ 1480 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), 1481 TYPE(SQ_EXPORT_PARAM), 1482 RW_GPR(0), 1483 RW_REL(ABSOLUTE), 1484 INDEX_GPR(0), 1485 ELEM_SIZE(0)); 1486 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1487 SRC_SEL_Y(SQ_SEL_Y), 1488 SRC_SEL_Z(SQ_SEL_0), 1489 SRC_SEL_W(SQ_SEL_1), 1490 BURST_COUNT(1), 1491 VALID_PIXEL_MODE(0), 1492 CF_INST(SQ_CF_INST_EXPORT_DONE), 1493 MARK(0), 1494 BARRIER(0)); 1495 /* 8 */ 1496 shader[i++] = CF_DWORD0(ADDR(0), 1497 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1498 shader[i++] = CF_DWORD1(POP_COUNT(0), 1499 CF_CONST(0), 1500 COND(SQ_CF_COND_ACTIVE), 1501 I_COUNT(0), 1502 VALID_PIXEL_MODE(0), 1503 CF_INST(SQ_CF_INST_RETURN), 1504 BARRIER(1)); 1505 /* 9 - non-mask sub */ 1506 shader[i++] = CF_DWORD0(ADDR(50), 1507 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1508 shader[i++] = CF_DWORD1(POP_COUNT(0), 1509 CF_CONST(0), 1510 COND(SQ_CF_COND_ACTIVE), 1511 I_COUNT(2), 1512 VALID_PIXEL_MODE(0), 1513 CF_INST(SQ_CF_INST_TC), 1514 BARRIER(1)); 1515 1516 /* 10 - ALU */ 1517 shader[i++] = CF_ALU_DWORD0(ADDR(34), 1518 KCACHE_BANK0(0), 1519 KCACHE_BANK1(0), 1520 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 1521 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1522 KCACHE_ADDR0(0), 1523 KCACHE_ADDR1(0), 1524 I_COUNT(10), 1525 ALT_CONST(0), 1526 CF_INST(SQ_CF_INST_ALU), 1527 WHOLE_QUAD_MODE(0), 1528 BARRIER(1)); 1529 1530 /* 11 - dst */ 1531 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1532 TYPE(SQ_EXPORT_POS), 1533 RW_GPR(1), 1534 RW_REL(ABSOLUTE), 1535 INDEX_GPR(0), 1536 ELEM_SIZE(0)); 1537 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1538 SRC_SEL_Y(SQ_SEL_Y), 1539 SRC_SEL_Z(SQ_SEL_0), 1540 SRC_SEL_W(SQ_SEL_1), 1541 BURST_COUNT(0), 1542 VALID_PIXEL_MODE(0), 1543 CF_INST(SQ_CF_INST_EXPORT_DONE), 1544 MARK(0), 1545 BARRIER(1)); 1546 /* 12 - src */ 1547 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1548 TYPE(SQ_EXPORT_PARAM), 1549 RW_GPR(0), 1550 RW_REL(ABSOLUTE), 1551 INDEX_GPR(0), 1552 ELEM_SIZE(0)); 1553 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1554 SRC_SEL_Y(SQ_SEL_Y), 1555 SRC_SEL_Z(SQ_SEL_0), 1556 SRC_SEL_W(SQ_SEL_1), 1557 BURST_COUNT(0), 1558 VALID_PIXEL_MODE(0), 1559 CF_INST(SQ_CF_INST_EXPORT_DONE), 1560 MARK(0), 1561 BARRIER(0)); 1562 /* 13 */ 1563 shader[i++] = CF_DWORD0(ADDR(0), 1564 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1565 shader[i++] = CF_DWORD1(POP_COUNT(0), 1566 CF_CONST(0), 1567 COND(SQ_CF_COND_ACTIVE), 1568 I_COUNT(0), 1569 VALID_PIXEL_MODE(0), 1570 CF_INST(SQ_CF_INST_RETURN), 1571 BARRIER(1)); 1572 1573 /* 14 srcX.x DOT4 - mask */ 1574 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1575 SRC0_REL(ABSOLUTE), 1576 SRC0_ELEM(ELEM_X), 1577 SRC0_NEG(0), 1578 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1579 SRC1_REL(ABSOLUTE), 1580 SRC1_ELEM(ELEM_X), 1581 SRC1_NEG(0), 1582 INDEX_MODE(SQ_INDEX_LOOP), 1583 PRED_SEL(SQ_PRED_SEL_OFF), 1584 LAST(0)); 1585 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1586 SRC1_ABS(0), 1587 UPDATE_EXECUTE_MASK(0), 1588 UPDATE_PRED(0), 1589 WRITE_MASK(1), 1590 OMOD(SQ_ALU_OMOD_OFF), 1591 ALU_INST(SQ_OP2_INST_DOT4), 1592 BANK_SWIZZLE(SQ_ALU_VEC_012), 1593 DST_GPR(3), 1594 DST_REL(ABSOLUTE), 1595 DST_ELEM(ELEM_X), 1596 CLAMP(0)); 1597 1598 /* 15 srcX.y DOT4 - mask */ 1599 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1600 SRC0_REL(ABSOLUTE), 1601 SRC0_ELEM(ELEM_Y), 1602 SRC0_NEG(0), 1603 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1604 SRC1_REL(ABSOLUTE), 1605 SRC1_ELEM(ELEM_Y), 1606 SRC1_NEG(0), 1607 INDEX_MODE(SQ_INDEX_LOOP), 1608 PRED_SEL(SQ_PRED_SEL_OFF), 1609 LAST(0)); 1610 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1611 SRC1_ABS(0), 1612 UPDATE_EXECUTE_MASK(0), 1613 UPDATE_PRED(0), 1614 WRITE_MASK(0), 1615 OMOD(SQ_ALU_OMOD_OFF), 1616 ALU_INST(SQ_OP2_INST_DOT4), 1617 BANK_SWIZZLE(SQ_ALU_VEC_012), 1618 DST_GPR(3), 1619 DST_REL(ABSOLUTE), 1620 DST_ELEM(ELEM_Y), 1621 CLAMP(0)); 1622 1623 /* 16 srcX.z DOT4 - mask */ 1624 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1625 SRC0_REL(ABSOLUTE), 1626 SRC0_ELEM(ELEM_Z), 1627 SRC0_NEG(0), 1628 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1629 SRC1_REL(ABSOLUTE), 1630 SRC1_ELEM(ELEM_Z), 1631 SRC1_NEG(0), 1632 INDEX_MODE(SQ_INDEX_LOOP), 1633 PRED_SEL(SQ_PRED_SEL_OFF), 1634 LAST(0)); 1635 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1636 SRC1_ABS(0), 1637 UPDATE_EXECUTE_MASK(0), 1638 UPDATE_PRED(0), 1639 WRITE_MASK(0), 1640 OMOD(SQ_ALU_OMOD_OFF), 1641 ALU_INST(SQ_OP2_INST_DOT4), 1642 BANK_SWIZZLE(SQ_ALU_VEC_012), 1643 DST_GPR(3), 1644 DST_REL(ABSOLUTE), 1645 DST_ELEM(ELEM_Z), 1646 CLAMP(0)); 1647 1648 /* 17 srcX.w DOT4 - mask */ 1649 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1650 SRC0_REL(ABSOLUTE), 1651 SRC0_ELEM(ELEM_W), 1652 SRC0_NEG(0), 1653 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1654 SRC1_REL(ABSOLUTE), 1655 SRC1_ELEM(ELEM_W), 1656 SRC1_NEG(0), 1657 INDEX_MODE(SQ_INDEX_LOOP), 1658 PRED_SEL(SQ_PRED_SEL_OFF), 1659 LAST(1)); 1660 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1661 SRC1_ABS(0), 1662 UPDATE_EXECUTE_MASK(0), 1663 UPDATE_PRED(0), 1664 WRITE_MASK(0), 1665 OMOD(SQ_ALU_OMOD_OFF), 1666 ALU_INST(SQ_OP2_INST_DOT4), 1667 BANK_SWIZZLE(SQ_ALU_VEC_012), 1668 DST_GPR(3), 1669 DST_REL(ABSOLUTE), 1670 DST_ELEM(ELEM_W), 1671 CLAMP(0)); 1672 1673 /* 18 srcY.x DOT4 - mask */ 1674 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1675 SRC0_REL(ABSOLUTE), 1676 SRC0_ELEM(ELEM_X), 1677 SRC0_NEG(0), 1678 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1679 SRC1_REL(ABSOLUTE), 1680 SRC1_ELEM(ELEM_X), 1681 SRC1_NEG(0), 1682 INDEX_MODE(SQ_INDEX_LOOP), 1683 PRED_SEL(SQ_PRED_SEL_OFF), 1684 LAST(0)); 1685 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1686 SRC1_ABS(0), 1687 UPDATE_EXECUTE_MASK(0), 1688 UPDATE_PRED(0), 1689 WRITE_MASK(0), 1690 OMOD(SQ_ALU_OMOD_OFF), 1691 ALU_INST(SQ_OP2_INST_DOT4), 1692 BANK_SWIZZLE(SQ_ALU_VEC_012), 1693 DST_GPR(3), 1694 DST_REL(ABSOLUTE), 1695 DST_ELEM(ELEM_X), 1696 CLAMP(0)); 1697 1698 /* 19 srcY.y DOT4 - mask */ 1699 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1700 SRC0_REL(ABSOLUTE), 1701 SRC0_ELEM(ELEM_Y), 1702 SRC0_NEG(0), 1703 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1704 SRC1_REL(ABSOLUTE), 1705 SRC1_ELEM(ELEM_Y), 1706 SRC1_NEG(0), 1707 INDEX_MODE(SQ_INDEX_LOOP), 1708 PRED_SEL(SQ_PRED_SEL_OFF), 1709 LAST(0)); 1710 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1711 SRC1_ABS(0), 1712 UPDATE_EXECUTE_MASK(0), 1713 UPDATE_PRED(0), 1714 WRITE_MASK(1), 1715 OMOD(SQ_ALU_OMOD_OFF), 1716 ALU_INST(SQ_OP2_INST_DOT4), 1717 BANK_SWIZZLE(SQ_ALU_VEC_012), 1718 DST_GPR(3), 1719 DST_REL(ABSOLUTE), 1720 DST_ELEM(ELEM_Y), 1721 CLAMP(0)); 1722 1723 /* 20 srcY.z DOT4 - mask */ 1724 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1725 SRC0_REL(ABSOLUTE), 1726 SRC0_ELEM(ELEM_Z), 1727 SRC0_NEG(0), 1728 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1729 SRC1_REL(ABSOLUTE), 1730 SRC1_ELEM(ELEM_Z), 1731 SRC1_NEG(0), 1732 INDEX_MODE(SQ_INDEX_LOOP), 1733 PRED_SEL(SQ_PRED_SEL_OFF), 1734 LAST(0)); 1735 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1736 SRC1_ABS(0), 1737 UPDATE_EXECUTE_MASK(0), 1738 UPDATE_PRED(0), 1739 WRITE_MASK(0), 1740 OMOD(SQ_ALU_OMOD_OFF), 1741 ALU_INST(SQ_OP2_INST_DOT4), 1742 BANK_SWIZZLE(SQ_ALU_VEC_012), 1743 DST_GPR(3), 1744 DST_REL(ABSOLUTE), 1745 DST_ELEM(ELEM_Z), 1746 CLAMP(0)); 1747 1748 /* 21 srcY.w DOT4 - mask */ 1749 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1750 SRC0_REL(ABSOLUTE), 1751 SRC0_ELEM(ELEM_W), 1752 SRC0_NEG(0), 1753 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1754 SRC1_REL(ABSOLUTE), 1755 SRC1_ELEM(ELEM_W), 1756 SRC1_NEG(0), 1757 INDEX_MODE(SQ_INDEX_LOOP), 1758 PRED_SEL(SQ_PRED_SEL_OFF), 1759 LAST(1)); 1760 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1761 SRC1_ABS(0), 1762 UPDATE_EXECUTE_MASK(0), 1763 UPDATE_PRED(0), 1764 WRITE_MASK(0), 1765 OMOD(SQ_ALU_OMOD_OFF), 1766 ALU_INST(SQ_OP2_INST_DOT4), 1767 BANK_SWIZZLE(SQ_ALU_VEC_012), 1768 DST_GPR(3), 1769 DST_REL(ABSOLUTE), 1770 DST_ELEM(ELEM_W), 1771 CLAMP(0)); 1772 1773 /* 22 maskX.x DOT4 - mask */ 1774 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1775 SRC0_REL(ABSOLUTE), 1776 SRC0_ELEM(ELEM_X), 1777 SRC0_NEG(0), 1778 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1779 SRC1_REL(ABSOLUTE), 1780 SRC1_ELEM(ELEM_X), 1781 SRC1_NEG(0), 1782 INDEX_MODE(SQ_INDEX_LOOP), 1783 PRED_SEL(SQ_PRED_SEL_OFF), 1784 LAST(0)); 1785 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1786 SRC1_ABS(0), 1787 UPDATE_EXECUTE_MASK(0), 1788 UPDATE_PRED(0), 1789 WRITE_MASK(1), 1790 OMOD(SQ_ALU_OMOD_OFF), 1791 ALU_INST(SQ_OP2_INST_DOT4), 1792 BANK_SWIZZLE(SQ_ALU_VEC_012), 1793 DST_GPR(4), 1794 DST_REL(ABSOLUTE), 1795 DST_ELEM(ELEM_X), 1796 CLAMP(0)); 1797 1798 /* 23 maskX.y DOT4 - mask */ 1799 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1800 SRC0_REL(ABSOLUTE), 1801 SRC0_ELEM(ELEM_Y), 1802 SRC0_NEG(0), 1803 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1804 SRC1_REL(ABSOLUTE), 1805 SRC1_ELEM(ELEM_Y), 1806 SRC1_NEG(0), 1807 INDEX_MODE(SQ_INDEX_LOOP), 1808 PRED_SEL(SQ_PRED_SEL_OFF), 1809 LAST(0)); 1810 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1811 SRC1_ABS(0), 1812 UPDATE_EXECUTE_MASK(0), 1813 UPDATE_PRED(0), 1814 WRITE_MASK(0), 1815 OMOD(SQ_ALU_OMOD_OFF), 1816 ALU_INST(SQ_OP2_INST_DOT4), 1817 BANK_SWIZZLE(SQ_ALU_VEC_012), 1818 DST_GPR(4), 1819 DST_REL(ABSOLUTE), 1820 DST_ELEM(ELEM_Y), 1821 CLAMP(0)); 1822 1823 /* 24 maskX.z DOT4 - mask */ 1824 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1825 SRC0_REL(ABSOLUTE), 1826 SRC0_ELEM(ELEM_Z), 1827 SRC0_NEG(0), 1828 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1829 SRC1_REL(ABSOLUTE), 1830 SRC1_ELEM(ELEM_Z), 1831 SRC1_NEG(0), 1832 INDEX_MODE(SQ_INDEX_LOOP), 1833 PRED_SEL(SQ_PRED_SEL_OFF), 1834 LAST(0)); 1835 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1836 SRC1_ABS(0), 1837 UPDATE_EXECUTE_MASK(0), 1838 UPDATE_PRED(0), 1839 WRITE_MASK(0), 1840 OMOD(SQ_ALU_OMOD_OFF), 1841 ALU_INST(SQ_OP2_INST_DOT4), 1842 BANK_SWIZZLE(SQ_ALU_VEC_012), 1843 DST_GPR(4), 1844 DST_REL(ABSOLUTE), 1845 DST_ELEM(ELEM_Z), 1846 CLAMP(0)); 1847 1848 /* 25 maskX.w DOT4 - mask */ 1849 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1850 SRC0_REL(ABSOLUTE), 1851 SRC0_ELEM(ELEM_W), 1852 SRC0_NEG(0), 1853 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1854 SRC1_REL(ABSOLUTE), 1855 SRC1_ELEM(ELEM_W), 1856 SRC1_NEG(0), 1857 INDEX_MODE(SQ_INDEX_LOOP), 1858 PRED_SEL(SQ_PRED_SEL_OFF), 1859 LAST(1)); 1860 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1861 SRC1_ABS(0), 1862 UPDATE_EXECUTE_MASK(0), 1863 UPDATE_PRED(0), 1864 WRITE_MASK(0), 1865 OMOD(SQ_ALU_OMOD_OFF), 1866 ALU_INST(SQ_OP2_INST_DOT4), 1867 BANK_SWIZZLE(SQ_ALU_VEC_012), 1868 DST_GPR(4), 1869 DST_REL(ABSOLUTE), 1870 DST_ELEM(ELEM_W), 1871 CLAMP(0)); 1872 1873 /* 26 maskY.x DOT4 - mask */ 1874 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1875 SRC0_REL(ABSOLUTE), 1876 SRC0_ELEM(ELEM_X), 1877 SRC0_NEG(0), 1878 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1879 SRC1_REL(ABSOLUTE), 1880 SRC1_ELEM(ELEM_X), 1881 SRC1_NEG(0), 1882 INDEX_MODE(SQ_INDEX_LOOP), 1883 PRED_SEL(SQ_PRED_SEL_OFF), 1884 LAST(0)); 1885 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1886 SRC1_ABS(0), 1887 UPDATE_EXECUTE_MASK(0), 1888 UPDATE_PRED(0), 1889 WRITE_MASK(0), 1890 OMOD(SQ_ALU_OMOD_OFF), 1891 ALU_INST(SQ_OP2_INST_DOT4), 1892 BANK_SWIZZLE(SQ_ALU_VEC_012), 1893 DST_GPR(4), 1894 DST_REL(ABSOLUTE), 1895 DST_ELEM(ELEM_X), 1896 CLAMP(0)); 1897 1898 /* 27 maskY.y DOT4 - mask */ 1899 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1900 SRC0_REL(ABSOLUTE), 1901 SRC0_ELEM(ELEM_Y), 1902 SRC0_NEG(0), 1903 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1904 SRC1_REL(ABSOLUTE), 1905 SRC1_ELEM(ELEM_Y), 1906 SRC1_NEG(0), 1907 INDEX_MODE(SQ_INDEX_LOOP), 1908 PRED_SEL(SQ_PRED_SEL_OFF), 1909 LAST(0)); 1910 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1911 SRC1_ABS(0), 1912 UPDATE_EXECUTE_MASK(0), 1913 UPDATE_PRED(0), 1914 WRITE_MASK(1), 1915 OMOD(SQ_ALU_OMOD_OFF), 1916 ALU_INST(SQ_OP2_INST_DOT4), 1917 BANK_SWIZZLE(SQ_ALU_VEC_012), 1918 DST_GPR(4), 1919 DST_REL(ABSOLUTE), 1920 DST_ELEM(ELEM_Y), 1921 CLAMP(0)); 1922 1923 /* 28 maskY.z DOT4 - mask */ 1924 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1925 SRC0_REL(ABSOLUTE), 1926 SRC0_ELEM(ELEM_Z), 1927 SRC0_NEG(0), 1928 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1929 SRC1_REL(ABSOLUTE), 1930 SRC1_ELEM(ELEM_Z), 1931 SRC1_NEG(0), 1932 INDEX_MODE(SQ_INDEX_LOOP), 1933 PRED_SEL(SQ_PRED_SEL_OFF), 1934 LAST(0)); 1935 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1936 SRC1_ABS(0), 1937 UPDATE_EXECUTE_MASK(0), 1938 UPDATE_PRED(0), 1939 WRITE_MASK(0), 1940 OMOD(SQ_ALU_OMOD_OFF), 1941 ALU_INST(SQ_OP2_INST_DOT4), 1942 BANK_SWIZZLE(SQ_ALU_VEC_012), 1943 DST_GPR(4), 1944 DST_REL(ABSOLUTE), 1945 DST_ELEM(ELEM_Z), 1946 CLAMP(0)); 1947 1948 /* 29 maskY.w DOT4 - mask */ 1949 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1950 SRC0_REL(ABSOLUTE), 1951 SRC0_ELEM(ELEM_W), 1952 SRC0_NEG(0), 1953 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1954 SRC1_REL(ABSOLUTE), 1955 SRC1_ELEM(ELEM_W), 1956 SRC1_NEG(0), 1957 INDEX_MODE(SQ_INDEX_LOOP), 1958 PRED_SEL(SQ_PRED_SEL_OFF), 1959 LAST(1)); 1960 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1961 SRC1_ABS(0), 1962 UPDATE_EXECUTE_MASK(0), 1963 UPDATE_PRED(0), 1964 WRITE_MASK(0), 1965 OMOD(SQ_ALU_OMOD_OFF), 1966 ALU_INST(SQ_OP2_INST_DOT4), 1967 BANK_SWIZZLE(SQ_ALU_VEC_012), 1968 DST_GPR(4), 1969 DST_REL(ABSOLUTE), 1970 DST_ELEM(ELEM_W), 1971 CLAMP(0)); 1972 1973 /* 30 srcX / w */ 1974 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1975 SRC0_REL(ABSOLUTE), 1976 SRC0_ELEM(ELEM_X), 1977 SRC0_NEG(0), 1978 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1979 SRC1_REL(ABSOLUTE), 1980 SRC1_ELEM(ELEM_W), 1981 SRC1_NEG(0), 1982 INDEX_MODE(SQ_INDEX_AR_X), 1983 PRED_SEL(SQ_PRED_SEL_OFF), 1984 LAST(1)); 1985 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1986 SRC1_ABS(0), 1987 UPDATE_EXECUTE_MASK(0), 1988 UPDATE_PRED(0), 1989 WRITE_MASK(1), 1990 OMOD(SQ_ALU_OMOD_OFF), 1991 ALU_INST(SQ_OP2_INST_MUL), 1992 BANK_SWIZZLE(SQ_ALU_VEC_012), 1993 DST_GPR(1), 1994 DST_REL(ABSOLUTE), 1995 DST_ELEM(ELEM_X), 1996 CLAMP(0)); 1997 1998 /* 31 srcY / h */ 1999 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 2000 SRC0_REL(ABSOLUTE), 2001 SRC0_ELEM(ELEM_Y), 2002 SRC0_NEG(0), 2003 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2004 SRC1_REL(ABSOLUTE), 2005 SRC1_ELEM(ELEM_W), 2006 SRC1_NEG(0), 2007 INDEX_MODE(SQ_INDEX_AR_X), 2008 PRED_SEL(SQ_PRED_SEL_OFF), 2009 LAST(1)); 2010 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2011 SRC1_ABS(0), 2012 UPDATE_EXECUTE_MASK(0), 2013 UPDATE_PRED(0), 2014 WRITE_MASK(1), 2015 OMOD(SQ_ALU_OMOD_OFF), 2016 ALU_INST(SQ_OP2_INST_MUL), 2017 BANK_SWIZZLE(SQ_ALU_VEC_012), 2018 DST_GPR(1), 2019 DST_REL(ABSOLUTE), 2020 DST_ELEM(ELEM_Y), 2021 CLAMP(0)); 2022 2023 /* 32 maskX / w */ 2024 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 2025 SRC0_REL(ABSOLUTE), 2026 SRC0_ELEM(ELEM_X), 2027 SRC0_NEG(0), 2028 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 2029 SRC1_REL(ABSOLUTE), 2030 SRC1_ELEM(ELEM_W), 2031 SRC1_NEG(0), 2032 INDEX_MODE(SQ_INDEX_AR_X), 2033 PRED_SEL(SQ_PRED_SEL_OFF), 2034 LAST(1)); 2035 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2036 SRC1_ABS(0), 2037 UPDATE_EXECUTE_MASK(0), 2038 UPDATE_PRED(0), 2039 WRITE_MASK(1), 2040 OMOD(SQ_ALU_OMOD_OFF), 2041 ALU_INST(SQ_OP2_INST_MUL), 2042 BANK_SWIZZLE(SQ_ALU_VEC_012), 2043 DST_GPR(0), 2044 DST_REL(ABSOLUTE), 2045 DST_ELEM(ELEM_X), 2046 CLAMP(0)); 2047 2048 /* 33 maskY / h */ 2049 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 2050 SRC0_REL(ABSOLUTE), 2051 SRC0_ELEM(ELEM_Y), 2052 SRC0_NEG(0), 2053 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 2054 SRC1_REL(ABSOLUTE), 2055 SRC1_ELEM(ELEM_W), 2056 SRC1_NEG(0), 2057 INDEX_MODE(SQ_INDEX_AR_X), 2058 PRED_SEL(SQ_PRED_SEL_OFF), 2059 LAST(1)); 2060 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2061 SRC1_ABS(0), 2062 UPDATE_EXECUTE_MASK(0), 2063 UPDATE_PRED(0), 2064 WRITE_MASK(1), 2065 OMOD(SQ_ALU_OMOD_OFF), 2066 ALU_INST(SQ_OP2_INST_MUL), 2067 BANK_SWIZZLE(SQ_ALU_VEC_012), 2068 DST_GPR(0), 2069 DST_REL(ABSOLUTE), 2070 DST_ELEM(ELEM_Y), 2071 CLAMP(0)); 2072 2073 /* 34 srcX.x DOT4 - non-mask */ 2074 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2075 SRC0_REL(ABSOLUTE), 2076 SRC0_ELEM(ELEM_X), 2077 SRC0_NEG(0), 2078 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2079 SRC1_REL(ABSOLUTE), 2080 SRC1_ELEM(ELEM_X), 2081 SRC1_NEG(0), 2082 INDEX_MODE(SQ_INDEX_LOOP), 2083 PRED_SEL(SQ_PRED_SEL_OFF), 2084 LAST(0)); 2085 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2086 SRC1_ABS(0), 2087 UPDATE_EXECUTE_MASK(0), 2088 UPDATE_PRED(0), 2089 WRITE_MASK(1), 2090 OMOD(SQ_ALU_OMOD_OFF), 2091 ALU_INST(SQ_OP2_INST_DOT4), 2092 BANK_SWIZZLE(SQ_ALU_VEC_012), 2093 DST_GPR(2), 2094 DST_REL(ABSOLUTE), 2095 DST_ELEM(ELEM_X), 2096 CLAMP(0)); 2097 2098 /* 35 srcX.y DOT4 - non-mask */ 2099 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2100 SRC0_REL(ABSOLUTE), 2101 SRC0_ELEM(ELEM_Y), 2102 SRC0_NEG(0), 2103 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2104 SRC1_REL(ABSOLUTE), 2105 SRC1_ELEM(ELEM_Y), 2106 SRC1_NEG(0), 2107 INDEX_MODE(SQ_INDEX_LOOP), 2108 PRED_SEL(SQ_PRED_SEL_OFF), 2109 LAST(0)); 2110 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2111 SRC1_ABS(0), 2112 UPDATE_EXECUTE_MASK(0), 2113 UPDATE_PRED(0), 2114 WRITE_MASK(0), 2115 OMOD(SQ_ALU_OMOD_OFF), 2116 ALU_INST(SQ_OP2_INST_DOT4), 2117 BANK_SWIZZLE(SQ_ALU_VEC_012), 2118 DST_GPR(2), 2119 DST_REL(ABSOLUTE), 2120 DST_ELEM(ELEM_Y), 2121 CLAMP(0)); 2122 2123 /* 36 srcX.z DOT4 - non-mask */ 2124 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2125 SRC0_REL(ABSOLUTE), 2126 SRC0_ELEM(ELEM_Z), 2127 SRC0_NEG(0), 2128 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2129 SRC1_REL(ABSOLUTE), 2130 SRC1_ELEM(ELEM_Z), 2131 SRC1_NEG(0), 2132 INDEX_MODE(SQ_INDEX_LOOP), 2133 PRED_SEL(SQ_PRED_SEL_OFF), 2134 LAST(0)); 2135 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2136 SRC1_ABS(0), 2137 UPDATE_EXECUTE_MASK(0), 2138 UPDATE_PRED(0), 2139 WRITE_MASK(0), 2140 OMOD(SQ_ALU_OMOD_OFF), 2141 ALU_INST(SQ_OP2_INST_DOT4), 2142 BANK_SWIZZLE(SQ_ALU_VEC_012), 2143 DST_GPR(2), 2144 DST_REL(ABSOLUTE), 2145 DST_ELEM(ELEM_Z), 2146 CLAMP(0)); 2147 2148 /* 37 srcX.w DOT4 - non-mask */ 2149 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2150 SRC0_REL(ABSOLUTE), 2151 SRC0_ELEM(ELEM_W), 2152 SRC0_NEG(0), 2153 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2154 SRC1_REL(ABSOLUTE), 2155 SRC1_ELEM(ELEM_W), 2156 SRC1_NEG(0), 2157 INDEX_MODE(SQ_INDEX_LOOP), 2158 PRED_SEL(SQ_PRED_SEL_OFF), 2159 LAST(1)); 2160 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2161 SRC1_ABS(0), 2162 UPDATE_EXECUTE_MASK(0), 2163 UPDATE_PRED(0), 2164 WRITE_MASK(0), 2165 OMOD(SQ_ALU_OMOD_OFF), 2166 ALU_INST(SQ_OP2_INST_DOT4), 2167 BANK_SWIZZLE(SQ_ALU_VEC_012), 2168 DST_GPR(2), 2169 DST_REL(ABSOLUTE), 2170 DST_ELEM(ELEM_W), 2171 CLAMP(0)); 2172 2173 /* 38 srcY.x DOT4 - non-mask */ 2174 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2175 SRC0_REL(ABSOLUTE), 2176 SRC0_ELEM(ELEM_X), 2177 SRC0_NEG(0), 2178 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2179 SRC1_REL(ABSOLUTE), 2180 SRC1_ELEM(ELEM_X), 2181 SRC1_NEG(0), 2182 INDEX_MODE(SQ_INDEX_LOOP), 2183 PRED_SEL(SQ_PRED_SEL_OFF), 2184 LAST(0)); 2185 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2186 SRC1_ABS(0), 2187 UPDATE_EXECUTE_MASK(0), 2188 UPDATE_PRED(0), 2189 WRITE_MASK(0), 2190 OMOD(SQ_ALU_OMOD_OFF), 2191 ALU_INST(SQ_OP2_INST_DOT4), 2192 BANK_SWIZZLE(SQ_ALU_VEC_012), 2193 DST_GPR(2), 2194 DST_REL(ABSOLUTE), 2195 DST_ELEM(ELEM_X), 2196 CLAMP(0)); 2197 2198 /* 39 srcY.y DOT4 - non-mask */ 2199 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2200 SRC0_REL(ABSOLUTE), 2201 SRC0_ELEM(ELEM_Y), 2202 SRC0_NEG(0), 2203 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2204 SRC1_REL(ABSOLUTE), 2205 SRC1_ELEM(ELEM_Y), 2206 SRC1_NEG(0), 2207 INDEX_MODE(SQ_INDEX_LOOP), 2208 PRED_SEL(SQ_PRED_SEL_OFF), 2209 LAST(0)); 2210 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2211 SRC1_ABS(0), 2212 UPDATE_EXECUTE_MASK(0), 2213 UPDATE_PRED(0), 2214 WRITE_MASK(1), 2215 OMOD(SQ_ALU_OMOD_OFF), 2216 ALU_INST(SQ_OP2_INST_DOT4), 2217 BANK_SWIZZLE(SQ_ALU_VEC_012), 2218 DST_GPR(2), 2219 DST_REL(ABSOLUTE), 2220 DST_ELEM(ELEM_Y), 2221 CLAMP(0)); 2222 2223 /* 40 srcY.z DOT4 - non-mask */ 2224 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2225 SRC0_REL(ABSOLUTE), 2226 SRC0_ELEM(ELEM_Z), 2227 SRC0_NEG(0), 2228 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2229 SRC1_REL(ABSOLUTE), 2230 SRC1_ELEM(ELEM_Z), 2231 SRC1_NEG(0), 2232 INDEX_MODE(SQ_INDEX_LOOP), 2233 PRED_SEL(SQ_PRED_SEL_OFF), 2234 LAST(0)); 2235 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2236 SRC1_ABS(0), 2237 UPDATE_EXECUTE_MASK(0), 2238 UPDATE_PRED(0), 2239 WRITE_MASK(0), 2240 OMOD(SQ_ALU_OMOD_OFF), 2241 ALU_INST(SQ_OP2_INST_DOT4), 2242 BANK_SWIZZLE(SQ_ALU_VEC_012), 2243 DST_GPR(2), 2244 DST_REL(ABSOLUTE), 2245 DST_ELEM(ELEM_Z), 2246 CLAMP(0)); 2247 2248 /* 41 srcY.w DOT4 - non-mask */ 2249 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2250 SRC0_REL(ABSOLUTE), 2251 SRC0_ELEM(ELEM_W), 2252 SRC0_NEG(0), 2253 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2254 SRC1_REL(ABSOLUTE), 2255 SRC1_ELEM(ELEM_W), 2256 SRC1_NEG(0), 2257 INDEX_MODE(SQ_INDEX_LOOP), 2258 PRED_SEL(SQ_PRED_SEL_OFF), 2259 LAST(1)); 2260 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2261 SRC1_ABS(0), 2262 UPDATE_EXECUTE_MASK(0), 2263 UPDATE_PRED(0), 2264 WRITE_MASK(0), 2265 OMOD(SQ_ALU_OMOD_OFF), 2266 ALU_INST(SQ_OP2_INST_DOT4), 2267 BANK_SWIZZLE(SQ_ALU_VEC_012), 2268 DST_GPR(2), 2269 DST_REL(ABSOLUTE), 2270 DST_ELEM(ELEM_W), 2271 CLAMP(0)); 2272 2273 /* 42 srcX / w */ 2274 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2275 SRC0_REL(ABSOLUTE), 2276 SRC0_ELEM(ELEM_X), 2277 SRC0_NEG(0), 2278 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2279 SRC1_REL(ABSOLUTE), 2280 SRC1_ELEM(ELEM_W), 2281 SRC1_NEG(0), 2282 INDEX_MODE(SQ_INDEX_AR_X), 2283 PRED_SEL(SQ_PRED_SEL_OFF), 2284 LAST(1)); 2285 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2286 SRC1_ABS(0), 2287 UPDATE_EXECUTE_MASK(0), 2288 UPDATE_PRED(0), 2289 WRITE_MASK(1), 2290 OMOD(SQ_ALU_OMOD_OFF), 2291 ALU_INST(SQ_OP2_INST_MUL), 2292 BANK_SWIZZLE(SQ_ALU_VEC_012), 2293 DST_GPR(0), 2294 DST_REL(ABSOLUTE), 2295 DST_ELEM(ELEM_X), 2296 CLAMP(0)); 2297 2298 /* 43 srcY / h */ 2299 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2300 SRC0_REL(ABSOLUTE), 2301 SRC0_ELEM(ELEM_Y), 2302 SRC0_NEG(0), 2303 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2304 SRC1_REL(ABSOLUTE), 2305 SRC1_ELEM(ELEM_W), 2306 SRC1_NEG(0), 2307 INDEX_MODE(SQ_INDEX_AR_X), 2308 PRED_SEL(SQ_PRED_SEL_OFF), 2309 LAST(1)); 2310 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2311 SRC1_ABS(0), 2312 UPDATE_EXECUTE_MASK(0), 2313 UPDATE_PRED(0), 2314 WRITE_MASK(1), 2315 OMOD(SQ_ALU_OMOD_OFF), 2316 ALU_INST(SQ_OP2_INST_MUL), 2317 BANK_SWIZZLE(SQ_ALU_VEC_012), 2318 DST_GPR(0), 2319 DST_REL(ABSOLUTE), 2320 DST_ELEM(ELEM_Y), 2321 CLAMP(0)); 2322 /* mask vfetch - 44/45 - dst */ 2323 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2324 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2325 FETCH_WHOLE_QUAD(0), 2326 BUFFER_ID(0), 2327 SRC_GPR(0), 2328 SRC_REL(ABSOLUTE), 2329 SRC_SEL_X(SQ_SEL_X), 2330 SRC_SEL_Y(SQ_SEL_Y), 2331 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 2332 LDS_REQ(0), 2333 COALESCED_READ(0)); 2334 shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), 2335 DST_REL(0), 2336 DST_SEL_X(SQ_SEL_X), 2337 DST_SEL_Y(SQ_SEL_Y), 2338 DST_SEL_Z(SQ_SEL_0), 2339 DST_SEL_W(SQ_SEL_1), 2340 USE_CONST_FIELDS(0), 2341 DATA_FORMAT(FMT_32_32_FLOAT), 2342 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2343 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2344 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2345 shader[i++] = VTX_DWORD2(OFFSET(0), 2346#if X_BYTE_ORDER == X_BIG_ENDIAN 2347 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2348#else 2349 ENDIAN_SWAP(ENDIAN_NONE), 2350#endif 2351 CONST_BUF_NO_STRIDE(0), 2352 ALT_CONST(0), 2353 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2354 shader[i++] = VTX_DWORD_PAD; 2355 /* 46/47 - src */ 2356 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2357 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2358 FETCH_WHOLE_QUAD(0), 2359 BUFFER_ID(0), 2360 SRC_GPR(0), 2361 SRC_REL(ABSOLUTE), 2362 SRC_SEL_X(SQ_SEL_X), 2363 SRC_SEL_Y(SQ_SEL_Y), 2364 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 2365 LDS_REQ(0), 2366 COALESCED_READ(0)); 2367 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2368 DST_REL(0), 2369 DST_SEL_X(SQ_SEL_X), 2370 DST_SEL_Y(SQ_SEL_Y), 2371 DST_SEL_Z(SQ_SEL_1), 2372 DST_SEL_W(SQ_SEL_0), 2373 USE_CONST_FIELDS(0), 2374 DATA_FORMAT(FMT_32_32_FLOAT), 2375 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2376 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2377 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2378 shader[i++] = VTX_DWORD2(OFFSET(8), 2379#if X_BYTE_ORDER == X_BIG_ENDIAN 2380 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2381#else 2382 ENDIAN_SWAP(ENDIAN_NONE), 2383#endif 2384 CONST_BUF_NO_STRIDE(0), 2385 ALT_CONST(0), 2386 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2387 shader[i++] = VTX_DWORD_PAD; 2388 /* 48/49 - mask */ 2389 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2390 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2391 FETCH_WHOLE_QUAD(0), 2392 BUFFER_ID(0), 2393 SRC_GPR(0), 2394 SRC_REL(ABSOLUTE), 2395 SRC_SEL_X(SQ_SEL_X), 2396 SRC_SEL_Y(SQ_SEL_Y), 2397 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 2398 LDS_REQ(0), 2399 COALESCED_READ(0)); 2400 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2401 DST_REL(0), 2402 DST_SEL_X(SQ_SEL_X), 2403 DST_SEL_Y(SQ_SEL_Y), 2404 DST_SEL_Z(SQ_SEL_1), 2405 DST_SEL_W(SQ_SEL_0), 2406 USE_CONST_FIELDS(0), 2407 DATA_FORMAT(FMT_32_32_FLOAT), 2408 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2409 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2410 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2411 shader[i++] = VTX_DWORD2(OFFSET(16), 2412#if X_BYTE_ORDER == X_BIG_ENDIAN 2413 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2414#else 2415 ENDIAN_SWAP(ENDIAN_NONE), 2416#endif 2417 CONST_BUF_NO_STRIDE(0), 2418 ALT_CONST(0), 2419 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2420 shader[i++] = VTX_DWORD_PAD; 2421 2422 /* no mask vfetch - 50/51 - dst */ 2423 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2424 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2425 FETCH_WHOLE_QUAD(0), 2426 BUFFER_ID(0), 2427 SRC_GPR(0), 2428 SRC_REL(ABSOLUTE), 2429 SRC_SEL_X(SQ_SEL_X), 2430 SRC_SEL_Y(SQ_SEL_Y), 2431 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 2432 LDS_REQ(0), 2433 COALESCED_READ(0)); 2434 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2435 DST_REL(0), 2436 DST_SEL_X(SQ_SEL_X), 2437 DST_SEL_Y(SQ_SEL_Y), 2438 DST_SEL_Z(SQ_SEL_0), 2439 DST_SEL_W(SQ_SEL_1), 2440 USE_CONST_FIELDS(0), 2441 DATA_FORMAT(FMT_32_32_FLOAT), 2442 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2443 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2444 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2445 shader[i++] = VTX_DWORD2(OFFSET(0), 2446#if X_BYTE_ORDER == X_BIG_ENDIAN 2447 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2448#else 2449 ENDIAN_SWAP(ENDIAN_NONE), 2450#endif 2451 CONST_BUF_NO_STRIDE(0), 2452 ALT_CONST(0), 2453 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2454 shader[i++] = VTX_DWORD_PAD; 2455 /* 52/53 - src */ 2456 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2457 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2458 FETCH_WHOLE_QUAD(0), 2459 BUFFER_ID(0), 2460 SRC_GPR(0), 2461 SRC_REL(ABSOLUTE), 2462 SRC_SEL_X(SQ_SEL_X), 2463 SRC_SEL_Y(SQ_SEL_Y), 2464 STRUCTURED_READ(SQ_VTX_STRU_READ_OFF), 2465 LDS_REQ(0), 2466 COALESCED_READ(0)); 2467 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2468 DST_REL(0), 2469 DST_SEL_X(SQ_SEL_X), 2470 DST_SEL_Y(SQ_SEL_Y), 2471 DST_SEL_Z(SQ_SEL_1), 2472 DST_SEL_W(SQ_SEL_0), 2473 USE_CONST_FIELDS(0), 2474 DATA_FORMAT(FMT_32_32_FLOAT), 2475 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2476 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2477 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2478 shader[i++] = VTX_DWORD2(OFFSET(8), 2479#if X_BYTE_ORDER == X_BIG_ENDIAN 2480 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2481#else 2482 ENDIAN_SWAP(ENDIAN_NONE), 2483#endif 2484 CONST_BUF_NO_STRIDE(0), 2485 ALT_CONST(0), 2486 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2487 shader[i++] = VTX_DWORD_PAD; 2488 2489 return i; 2490} 2491 2492/* comp ps --------------------------------------- */ 2493int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) 2494{ 2495 int i = 0; 2496 2497 /* 0 */ 2498 /* call interp-fetch-mask if boolean1 == true */ 2499 shader[i++] = CF_DWORD0(ADDR(12), 2500 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2501 shader[i++] = CF_DWORD1(POP_COUNT(0), 2502 CF_CONST(1), 2503 COND(SQ_CF_COND_BOOL), 2504 I_COUNT(0), 2505 VALID_PIXEL_MODE(0), 2506 CF_INST(SQ_CF_INST_CALL), 2507 BARRIER(0)); 2508 2509 /* 1 */ 2510 /* call read-constant-mask if boolean1 == false */ 2511 shader[i++] = CF_DWORD0(ADDR(15), 2512 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2513 shader[i++] = CF_DWORD1(POP_COUNT(0), 2514 CF_CONST(1), 2515 COND(SQ_CF_COND_NOT_BOOL), 2516 I_COUNT(0), 2517 VALID_PIXEL_MODE(0), 2518 CF_INST(SQ_CF_INST_CALL), 2519 BARRIER(0)); 2520 2521 /* 2 */ 2522 /* call interp-fetch-src if boolean0 == true */ 2523 shader[i++] = CF_DWORD0(ADDR(7), 2524 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2525 shader[i++] = CF_DWORD1(POP_COUNT(0), 2526 CF_CONST(0), 2527 COND(SQ_CF_COND_BOOL), 2528 I_COUNT(0), 2529 VALID_PIXEL_MODE(0), 2530 CF_INST(SQ_CF_INST_CALL), 2531 BARRIER(0)); 2532 2533 /* 3 */ 2534 /* call read-constant-src if boolean0 == false */ 2535 shader[i++] = CF_DWORD0(ADDR(10), 2536 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2537 shader[i++] = CF_DWORD1(POP_COUNT(0), 2538 CF_CONST(0), 2539 COND(SQ_CF_COND_NOT_BOOL), 2540 I_COUNT(0), 2541 VALID_PIXEL_MODE(0), 2542 CF_INST(SQ_CF_INST_CALL), 2543 BARRIER(0)); 2544 /* 4 */ 2545 /* src IN mask (GPR2 := GPR1 .* GPR0) */ 2546 shader[i++] = CF_ALU_DWORD0(ADDR(17), 2547 KCACHE_BANK0(0), 2548 KCACHE_BANK1(0), 2549 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2550 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2551 KCACHE_ADDR0(0), 2552 KCACHE_ADDR1(0), 2553 I_COUNT(4), 2554 ALT_CONST(0), 2555 CF_INST(SQ_CF_INST_ALU), 2556 WHOLE_QUAD_MODE(0), 2557 BARRIER(1)); 2558 2559 /* 5 */ 2560 /* export pixel data */ 2561 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2562 TYPE(SQ_EXPORT_PIXEL), 2563 RW_GPR(0), 2564 RW_REL(ABSOLUTE), 2565 INDEX_GPR(0), 2566 ELEM_SIZE(1)); 2567 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2568 SRC_SEL_Y(SQ_SEL_Y), 2569 SRC_SEL_Z(SQ_SEL_Z), 2570 SRC_SEL_W(SQ_SEL_W), 2571 BURST_COUNT(1), 2572 VALID_PIXEL_MODE(0), 2573 CF_INST(SQ_CF_INST_EXPORT_DONE), 2574 MARK(0), 2575 BARRIER(1)); 2576 2577 /* 6 */ 2578 /* end of program */ 2579 shader[i++] = CF_DWORD0(ADDR(0), 2580 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2581 shader[i++] = CF_DWORD1(POP_COUNT(0), 2582 CF_CONST(0), 2583 COND(SQ_CF_COND_ACTIVE), 2584 I_COUNT(0), 2585 VALID_PIXEL_MODE(0), 2586 CF_INST(SQ_CF_INST_END), 2587 BARRIER(1)); 2588 2589 /* subroutine interp-fetch-src */ 2590 2591 /* 7 */ 2592 /* interpolate src */ 2593 shader[i++] = CF_ALU_DWORD0(ADDR(21), 2594 KCACHE_BANK0(0), 2595 KCACHE_BANK1(0), 2596 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2597 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2598 KCACHE_ADDR0(0), 2599 KCACHE_ADDR1(0), 2600 I_COUNT(4), 2601 ALT_CONST(0), 2602 CF_INST(SQ_CF_INST_ALU), 2603 WHOLE_QUAD_MODE(0), 2604 BARRIER(1)); 2605 2606 /* 8 */ 2607 /* texture fetch src into GPR0 */ 2608 shader[i++] = CF_DWORD0(ADDR(26), 2609 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2610 shader[i++] = CF_DWORD1(POP_COUNT(0), 2611 CF_CONST(0), 2612 COND(SQ_CF_COND_ACTIVE), 2613 I_COUNT(1), 2614 VALID_PIXEL_MODE(0), 2615 CF_INST(SQ_CF_INST_TC), 2616 BARRIER(1)); 2617 2618 /* 9 */ 2619 /* return */ 2620 shader[i++] = CF_DWORD0(ADDR(0), 2621 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2622 shader[i++] = CF_DWORD1(POP_COUNT(0), 2623 CF_CONST(0), 2624 COND(SQ_CF_COND_ACTIVE), 2625 I_COUNT(0), 2626 VALID_PIXEL_MODE(0), 2627 CF_INST(SQ_CF_INST_RETURN), 2628 BARRIER(0)); 2629 2630 /* subroutine read-constant-src */ 2631 2632 /* 10 */ 2633 /* read constants into GPR0 */ 2634 shader[i++] = CF_ALU_DWORD0(ADDR(28), 2635 KCACHE_BANK0(0), 2636 KCACHE_BANK1(0), 2637 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 2638 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2639 KCACHE_ADDR0(0), 2640 KCACHE_ADDR1(0), 2641 I_COUNT(4), 2642 ALT_CONST(1), 2643 CF_INST(SQ_CF_INST_ALU), 2644 WHOLE_QUAD_MODE(0), 2645 BARRIER(1)); 2646 2647 /* 11 */ 2648 /* return */ 2649 shader[i++] = CF_DWORD0(ADDR(0), 2650 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2651 shader[i++] = CF_DWORD1(POP_COUNT(0), 2652 CF_CONST(0), 2653 COND(SQ_CF_COND_ACTIVE), 2654 I_COUNT(0), 2655 VALID_PIXEL_MODE(0), 2656 CF_INST(SQ_CF_INST_RETURN), 2657 BARRIER(0)); 2658 2659 /* subroutine interp-fetch-mask */ 2660 2661 /* 12 */ 2662 /* interpolate mask */ 2663 shader[i++] = CF_ALU_DWORD0(ADDR(32), 2664 KCACHE_BANK0(0), 2665 KCACHE_BANK1(0), 2666 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2667 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2668 KCACHE_ADDR0(0), 2669 KCACHE_ADDR1(0), 2670 I_COUNT(4), 2671 ALT_CONST(0), 2672 CF_INST(SQ_CF_INST_ALU), 2673 WHOLE_QUAD_MODE(0), 2674 BARRIER(1)); 2675 2676 /* 13 */ 2677 /* texture fetch mask into GPR1 */ 2678 shader[i++] = CF_DWORD0(ADDR(36), 2679 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2680 shader[i++] = CF_DWORD1(POP_COUNT(0), 2681 CF_CONST(0), 2682 COND(SQ_CF_COND_ACTIVE), 2683 I_COUNT(1), 2684 VALID_PIXEL_MODE(0), 2685 CF_INST(SQ_CF_INST_TC), 2686 BARRIER(1)); 2687 2688 /* 14 */ 2689 /* return */ 2690 shader[i++] = CF_DWORD0(ADDR(0), 2691 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2692 shader[i++] = CF_DWORD1(POP_COUNT(0), 2693 CF_CONST(0), 2694 COND(SQ_CF_COND_ACTIVE), 2695 I_COUNT(0), 2696 VALID_PIXEL_MODE(0), 2697 CF_INST(SQ_CF_INST_RETURN), 2698 BARRIER(0)); 2699 2700 /* subroutine read-constant-src */ 2701 2702 /* 15 */ 2703 /* read constants into GPR1 */ 2704 shader[i++] = CF_ALU_DWORD0(ADDR(38), 2705 KCACHE_BANK0(0), 2706 KCACHE_BANK1(0), 2707 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 2708 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2709 KCACHE_ADDR0(0), 2710 KCACHE_ADDR1(0), 2711 I_COUNT(4), 2712 ALT_CONST(1), 2713 CF_INST(SQ_CF_INST_ALU), 2714 WHOLE_QUAD_MODE(0), 2715 BARRIER(1)); 2716 2717 /* 16 */ 2718 /* return */ 2719 shader[i++] = CF_DWORD0(ADDR(0), 2720 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2721 shader[i++] = CF_DWORD1(POP_COUNT(0), 2722 CF_CONST(0), 2723 COND(SQ_CF_COND_ACTIVE), 2724 I_COUNT(0), 2725 VALID_PIXEL_MODE(0), 2726 CF_INST(SQ_CF_INST_RETURN), 2727 BARRIER(0)); 2728 2729 /* ALU clauses */ 2730 2731 /* 17 */ 2732 /* MUL gpr[0].x gpr[0].x gpr[1].x */ 2733 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2734 SRC0_REL(ABSOLUTE), 2735 SRC0_ELEM(ELEM_X), 2736 SRC0_NEG(0), 2737 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2738 SRC1_REL(ABSOLUTE), 2739 SRC1_ELEM(ELEM_X), 2740 SRC1_NEG(0), 2741 INDEX_MODE(SQ_INDEX_LOOP), 2742 PRED_SEL(SQ_PRED_SEL_OFF), 2743 LAST(0)); 2744 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2745 SRC1_ABS(0), 2746 UPDATE_EXECUTE_MASK(0), 2747 UPDATE_PRED(0), 2748 WRITE_MASK(1), 2749 OMOD(SQ_ALU_OMOD_OFF), 2750 ALU_INST(SQ_OP2_INST_MUL), 2751 BANK_SWIZZLE(SQ_ALU_VEC_012), 2752 DST_GPR(0), 2753 DST_REL(ABSOLUTE), 2754 DST_ELEM(ELEM_X), 2755 CLAMP(1)); 2756 2757 /* 18 */ 2758 /* MUL gpr[0].y gpr[0].y gpr[1].y */ 2759 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2760 SRC0_REL(ABSOLUTE), 2761 SRC0_ELEM(ELEM_Y), 2762 SRC0_NEG(0), 2763 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2764 SRC1_REL(ABSOLUTE), 2765 SRC1_ELEM(ELEM_Y), 2766 SRC1_NEG(0), 2767 INDEX_MODE(SQ_INDEX_LOOP), 2768 PRED_SEL(SQ_PRED_SEL_OFF), 2769 LAST(0)); 2770 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2771 SRC1_ABS(0), 2772 UPDATE_EXECUTE_MASK(0), 2773 UPDATE_PRED(0), 2774 WRITE_MASK(1), 2775 OMOD(SQ_ALU_OMOD_OFF), 2776 ALU_INST(SQ_OP2_INST_MUL), 2777 BANK_SWIZZLE(SQ_ALU_VEC_012), 2778 DST_GPR(0), 2779 DST_REL(ABSOLUTE), 2780 DST_ELEM(ELEM_Y), 2781 CLAMP(1)); 2782 /* 19 */ 2783 /* MUL gpr[0].z gpr[0].z gpr[1].z */ 2784 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2785 SRC0_REL(ABSOLUTE), 2786 SRC0_ELEM(ELEM_Z), 2787 SRC0_NEG(0), 2788 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2789 SRC1_REL(ABSOLUTE), 2790 SRC1_ELEM(ELEM_Z), 2791 SRC1_NEG(0), 2792 INDEX_MODE(SQ_INDEX_LOOP), 2793 PRED_SEL(SQ_PRED_SEL_OFF), 2794 LAST(0)); 2795 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2796 SRC1_ABS(0), 2797 UPDATE_EXECUTE_MASK(0), 2798 UPDATE_PRED(0), 2799 WRITE_MASK(1), 2800 OMOD(SQ_ALU_OMOD_OFF), 2801 ALU_INST(SQ_OP2_INST_MUL), 2802 BANK_SWIZZLE(SQ_ALU_VEC_012), 2803 DST_GPR(0), 2804 DST_REL(ABSOLUTE), 2805 DST_ELEM(ELEM_Z), 2806 CLAMP(1)); 2807 /* 20 */ 2808 /* MUL gpr[0].w gpr[0].w gpr[1].w */ 2809 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2810 SRC0_REL(ABSOLUTE), 2811 SRC0_ELEM(ELEM_W), 2812 SRC0_NEG(0), 2813 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2814 SRC1_REL(ABSOLUTE), 2815 SRC1_ELEM(ELEM_W), 2816 SRC1_NEG(0), 2817 INDEX_MODE(SQ_INDEX_LOOP), 2818 PRED_SEL(SQ_PRED_SEL_OFF), 2819 LAST(1)); 2820 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2821 SRC1_ABS(0), 2822 UPDATE_EXECUTE_MASK(0), 2823 UPDATE_PRED(0), 2824 WRITE_MASK(1), 2825 OMOD(SQ_ALU_OMOD_OFF), 2826 ALU_INST(SQ_OP2_INST_MUL), 2827 BANK_SWIZZLE(SQ_ALU_VEC_012), 2828 DST_GPR(0), 2829 DST_REL(ABSOLUTE), 2830 DST_ELEM(ELEM_W), 2831 CLAMP(1)); 2832 2833 /* 21 */ 2834 /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */ 2835 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2836 SRC0_REL(ABSOLUTE), 2837 SRC0_ELEM(ELEM_Y), 2838 SRC0_NEG(0), 2839 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2840 SRC1_REL(ABSOLUTE), 2841 SRC1_ELEM(ELEM_X), 2842 SRC1_NEG(0), 2843 INDEX_MODE(SQ_INDEX_AR_X), 2844 PRED_SEL(SQ_PRED_SEL_OFF), 2845 LAST(0)); 2846 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2847 SRC1_ABS(0), 2848 UPDATE_EXECUTE_MASK(0), 2849 UPDATE_PRED(0), 2850 WRITE_MASK(1), 2851 OMOD(SQ_ALU_OMOD_OFF), 2852 ALU_INST(SQ_OP2_INST_INTERP_XY), 2853 BANK_SWIZZLE(SQ_ALU_VEC_210), 2854 DST_GPR(0), 2855 DST_REL(ABSOLUTE), 2856 DST_ELEM(ELEM_X), 2857 CLAMP(0)); 2858 /* 22 */ 2859 /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */ 2860 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2861 SRC0_REL(ABSOLUTE), 2862 SRC0_ELEM(ELEM_X), 2863 SRC0_NEG(0), 2864 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2865 SRC1_REL(ABSOLUTE), 2866 SRC1_ELEM(ELEM_X), 2867 SRC1_NEG(0), 2868 INDEX_MODE(SQ_INDEX_AR_X), 2869 PRED_SEL(SQ_PRED_SEL_OFF), 2870 LAST(0)); 2871 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2872 SRC1_ABS(0), 2873 UPDATE_EXECUTE_MASK(0), 2874 UPDATE_PRED(0), 2875 WRITE_MASK(1), 2876 OMOD(SQ_ALU_OMOD_OFF), 2877 ALU_INST(SQ_OP2_INST_INTERP_XY), 2878 BANK_SWIZZLE(SQ_ALU_VEC_210), 2879 DST_GPR(0), 2880 DST_REL(ABSOLUTE), 2881 DST_ELEM(ELEM_Y), 2882 CLAMP(0)); 2883 /* 23 */ 2884 /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */ 2885 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2886 SRC0_REL(ABSOLUTE), 2887 SRC0_ELEM(ELEM_Y), 2888 SRC0_NEG(0), 2889 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2890 SRC1_REL(ABSOLUTE), 2891 SRC1_ELEM(ELEM_X), 2892 SRC1_NEG(0), 2893 INDEX_MODE(SQ_INDEX_AR_X), 2894 PRED_SEL(SQ_PRED_SEL_OFF), 2895 LAST(0)); 2896 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2897 SRC1_ABS(0), 2898 UPDATE_EXECUTE_MASK(0), 2899 UPDATE_PRED(0), 2900 WRITE_MASK(0), 2901 OMOD(SQ_ALU_OMOD_OFF), 2902 ALU_INST(SQ_OP2_INST_INTERP_XY), 2903 BANK_SWIZZLE(SQ_ALU_VEC_210), 2904 DST_GPR(0), 2905 DST_REL(ABSOLUTE), 2906 DST_ELEM(ELEM_Z), 2907 CLAMP(0)); 2908 2909 /* 24 */ 2910 /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */ 2911 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2912 SRC0_REL(ABSOLUTE), 2913 SRC0_ELEM(ELEM_X), 2914 SRC0_NEG(0), 2915 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2916 SRC1_REL(ABSOLUTE), 2917 SRC1_ELEM(ELEM_X), 2918 SRC1_NEG(0), 2919 INDEX_MODE(SQ_INDEX_AR_X), 2920 PRED_SEL(SQ_PRED_SEL_OFF), 2921 LAST(1)); 2922 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2923 SRC1_ABS(0), 2924 UPDATE_EXECUTE_MASK(0), 2925 UPDATE_PRED(0), 2926 WRITE_MASK(0), 2927 OMOD(SQ_ALU_OMOD_OFF), 2928 ALU_INST(SQ_OP2_INST_INTERP_XY), 2929 BANK_SWIZZLE(SQ_ALU_VEC_210), 2930 DST_GPR(0), 2931 DST_REL(ABSOLUTE), 2932 DST_ELEM(ELEM_W), 2933 CLAMP(0)); 2934 2935 /* 25 */ 2936 shader[i++] = 0; 2937 shader[i++] = 0; 2938 2939 /* 26/27 */ 2940 /* SAMPLE RID=0 GPR0, GPR0 */ 2941 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 2942 INST_MOD(0), 2943 FETCH_WHOLE_QUAD(0), 2944 RESOURCE_ID(0), 2945 SRC_GPR(0), 2946 SRC_REL(ABSOLUTE), 2947 ALT_CONST(0), 2948 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 2949 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2950 shader[i++] = TEX_DWORD1(DST_GPR(0), 2951 DST_REL(ABSOLUTE), 2952 DST_SEL_X(SQ_SEL_X), 2953 DST_SEL_Y(SQ_SEL_Y), 2954 DST_SEL_Z(SQ_SEL_Z), 2955 DST_SEL_W(SQ_SEL_W), 2956 LOD_BIAS(0), 2957 COORD_TYPE_X(TEX_NORMALIZED), 2958 COORD_TYPE_Y(TEX_NORMALIZED), 2959 COORD_TYPE_Z(TEX_NORMALIZED), 2960 COORD_TYPE_W(TEX_NORMALIZED)); 2961 shader[i++] = TEX_DWORD2(OFFSET_X(0), 2962 OFFSET_Y(0), 2963 OFFSET_Z(0), 2964 SAMPLER_ID(0), 2965 SRC_SEL_X(SQ_SEL_X), 2966 SRC_SEL_Y(SQ_SEL_Y), 2967 SRC_SEL_Z(SQ_SEL_0), 2968 SRC_SEL_W(SQ_SEL_1)); 2969 shader[i++] = TEX_DWORD_PAD; 2970 2971 /* 28 */ 2972 /* MOV GPR0.x, KC4.x */ 2973 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), 2974 SRC0_REL(ABSOLUTE), 2975 SRC0_ELEM(ELEM_X), 2976 SRC0_NEG(0), 2977 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2978 SRC1_REL(ABSOLUTE), 2979 SRC1_ELEM(ELEM_X), 2980 SRC1_NEG(0), 2981 INDEX_MODE(SQ_INDEX_AR_X), 2982 PRED_SEL(SQ_PRED_SEL_OFF), 2983 LAST(0)); 2984 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2985 SRC1_ABS(0), 2986 UPDATE_EXECUTE_MASK(0), 2987 UPDATE_PRED(0), 2988 WRITE_MASK(1), 2989 OMOD(SQ_ALU_OMOD_OFF), 2990 ALU_INST(SQ_OP2_INST_MOV), 2991 BANK_SWIZZLE(SQ_ALU_VEC_012), 2992 DST_GPR(0), 2993 DST_REL(ABSOLUTE), 2994 DST_ELEM(ELEM_X), 2995 CLAMP(1)); 2996 2997 /* 29 */ 2998 /* MOV GPR0.y, KC4.y */ 2999 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), 3000 SRC0_REL(ABSOLUTE), 3001 SRC0_ELEM(ELEM_Y), 3002 SRC0_NEG(0), 3003 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3004 SRC1_REL(ABSOLUTE), 3005 SRC1_ELEM(ELEM_X), 3006 SRC1_NEG(0), 3007 INDEX_MODE(SQ_INDEX_AR_X), 3008 PRED_SEL(SQ_PRED_SEL_OFF), 3009 LAST(0)); 3010 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3011 SRC1_ABS(0), 3012 UPDATE_EXECUTE_MASK(0), 3013 UPDATE_PRED(0), 3014 WRITE_MASK(1), 3015 OMOD(SQ_ALU_OMOD_OFF), 3016 ALU_INST(SQ_OP2_INST_MOV), 3017 BANK_SWIZZLE(SQ_ALU_VEC_012), 3018 DST_GPR(0), 3019 DST_REL(ABSOLUTE), 3020 DST_ELEM(ELEM_Y), 3021 CLAMP(1)); 3022 3023 /* 30 */ 3024 /* MOV GPR0.z, KC4.z */ 3025 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), 3026 SRC0_REL(ABSOLUTE), 3027 SRC0_ELEM(ELEM_Z), 3028 SRC0_NEG(0), 3029 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3030 SRC1_REL(ABSOLUTE), 3031 SRC1_ELEM(ELEM_X), 3032 SRC1_NEG(0), 3033 INDEX_MODE(SQ_INDEX_AR_X), 3034 PRED_SEL(SQ_PRED_SEL_OFF), 3035 LAST(0)); 3036 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3037 SRC1_ABS(0), 3038 UPDATE_EXECUTE_MASK(0), 3039 UPDATE_PRED(0), 3040 WRITE_MASK(1), 3041 OMOD(SQ_ALU_OMOD_OFF), 3042 ALU_INST(SQ_OP2_INST_MOV), 3043 BANK_SWIZZLE(SQ_ALU_VEC_012), 3044 DST_GPR(0), 3045 DST_REL(ABSOLUTE), 3046 DST_ELEM(ELEM_Z), 3047 CLAMP(1)); 3048 3049 /* 31 */ 3050 /* MOV GPR0.w, KC4.w */ 3051 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), 3052 SRC0_REL(ABSOLUTE), 3053 SRC0_ELEM(ELEM_W), 3054 SRC0_NEG(0), 3055 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3056 SRC1_REL(ABSOLUTE), 3057 SRC1_ELEM(ELEM_X), 3058 SRC1_NEG(0), 3059 INDEX_MODE(SQ_INDEX_AR_X), 3060 PRED_SEL(SQ_PRED_SEL_OFF), 3061 LAST(1)); 3062 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3063 SRC1_ABS(0), 3064 UPDATE_EXECUTE_MASK(0), 3065 UPDATE_PRED(0), 3066 WRITE_MASK(1), 3067 OMOD(SQ_ALU_OMOD_OFF), 3068 ALU_INST(SQ_OP2_INST_MOV), 3069 BANK_SWIZZLE(SQ_ALU_VEC_012), 3070 DST_GPR(0), 3071 DST_REL(ABSOLUTE), 3072 DST_ELEM(ELEM_W), 3073 CLAMP(1)); 3074 3075 /* 32 */ 3076 /* INTERP_XY GPR1.x, PARAM1 */ 3077 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3078 SRC0_REL(ABSOLUTE), 3079 SRC0_ELEM(ELEM_Y), 3080 SRC0_NEG(0), 3081 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 3082 SRC1_REL(ABSOLUTE), 3083 SRC1_ELEM(ELEM_X), 3084 SRC1_NEG(0), 3085 INDEX_MODE(SQ_INDEX_AR_X), 3086 PRED_SEL(SQ_PRED_SEL_OFF), 3087 LAST(0)); 3088 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3089 SRC1_ABS(0), 3090 UPDATE_EXECUTE_MASK(0), 3091 UPDATE_PRED(0), 3092 WRITE_MASK(1), 3093 OMOD(SQ_ALU_OMOD_OFF), 3094 ALU_INST(SQ_OP2_INST_INTERP_XY), 3095 BANK_SWIZZLE(SQ_ALU_VEC_210), 3096 DST_GPR(1), 3097 DST_REL(ABSOLUTE), 3098 DST_ELEM(ELEM_X), 3099 CLAMP(0)); 3100 /* 33 */ 3101 /* INTERP_XY GPR1.y, PARAM1 */ 3102 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3103 SRC0_REL(ABSOLUTE), 3104 SRC0_ELEM(ELEM_X), 3105 SRC0_NEG(0), 3106 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 3107 SRC1_REL(ABSOLUTE), 3108 SRC1_ELEM(ELEM_X), 3109 SRC1_NEG(0), 3110 INDEX_MODE(SQ_INDEX_AR_X), 3111 PRED_SEL(SQ_PRED_SEL_OFF), 3112 LAST(0)); 3113 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3114 SRC1_ABS(0), 3115 UPDATE_EXECUTE_MASK(0), 3116 UPDATE_PRED(0), 3117 WRITE_MASK(1), 3118 OMOD(SQ_ALU_OMOD_OFF), 3119 ALU_INST(SQ_OP2_INST_INTERP_XY), 3120 BANK_SWIZZLE(SQ_ALU_VEC_210), 3121 DST_GPR(1), 3122 DST_REL(ABSOLUTE), 3123 DST_ELEM(ELEM_Y), 3124 CLAMP(0)); 3125 /* 34 */ 3126 /* INTERP_XY GPR1.z, PARAM1 */ 3127 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3128 SRC0_REL(ABSOLUTE), 3129 SRC0_ELEM(ELEM_Y), 3130 SRC0_NEG(0), 3131 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 3132 SRC1_REL(ABSOLUTE), 3133 SRC1_ELEM(ELEM_X), 3134 SRC1_NEG(0), 3135 INDEX_MODE(SQ_INDEX_AR_X), 3136 PRED_SEL(SQ_PRED_SEL_OFF), 3137 LAST(0)); 3138 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3139 SRC1_ABS(0), 3140 UPDATE_EXECUTE_MASK(0), 3141 UPDATE_PRED(0), 3142 WRITE_MASK(0), 3143 OMOD(SQ_ALU_OMOD_OFF), 3144 ALU_INST(SQ_OP2_INST_INTERP_XY), 3145 BANK_SWIZZLE(SQ_ALU_VEC_210), 3146 DST_GPR(1), 3147 DST_REL(ABSOLUTE), 3148 DST_ELEM(ELEM_Z), 3149 CLAMP(0)); 3150 /* 35 */ 3151 /* INTERP_XY GPR1.w, PARAM1 */ 3152 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3153 SRC0_REL(ABSOLUTE), 3154 SRC0_ELEM(ELEM_X), 3155 SRC0_NEG(0), 3156 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 3157 SRC1_REL(ABSOLUTE), 3158 SRC1_ELEM(ELEM_X), 3159 SRC1_NEG(0), 3160 INDEX_MODE(SQ_INDEX_AR_X), 3161 PRED_SEL(SQ_PRED_SEL_OFF), 3162 LAST(1)); 3163 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3164 SRC1_ABS(0), 3165 UPDATE_EXECUTE_MASK(0), 3166 UPDATE_PRED(0), 3167 WRITE_MASK(0), 3168 OMOD(SQ_ALU_OMOD_OFF), 3169 ALU_INST(SQ_OP2_INST_INTERP_XY), 3170 BANK_SWIZZLE(SQ_ALU_VEC_210), 3171 DST_GPR(1), 3172 DST_REL(ABSOLUTE), 3173 DST_ELEM(ELEM_W), 3174 CLAMP(0)); 3175 3176 /* 36/37 */ 3177 /* SAMPLE RID=1 GPR1, GPR1 */ 3178 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3179 INST_MOD(0), 3180 FETCH_WHOLE_QUAD(0), 3181 RESOURCE_ID(1), 3182 SRC_GPR(1), 3183 SRC_REL(ABSOLUTE), 3184 ALT_CONST(0), 3185 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3186 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3187 shader[i++] = TEX_DWORD1(DST_GPR(1), 3188 DST_REL(ABSOLUTE), 3189 DST_SEL_X(SQ_SEL_X), 3190 DST_SEL_Y(SQ_SEL_Y), 3191 DST_SEL_Z(SQ_SEL_Z), 3192 DST_SEL_W(SQ_SEL_W), 3193 LOD_BIAS(0), 3194 COORD_TYPE_X(TEX_NORMALIZED), 3195 COORD_TYPE_Y(TEX_NORMALIZED), 3196 COORD_TYPE_Z(TEX_NORMALIZED), 3197 COORD_TYPE_W(TEX_NORMALIZED)); 3198 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3199 OFFSET_Y(0), 3200 OFFSET_Z(0), 3201 SAMPLER_ID(1), 3202 SRC_SEL_X(SQ_SEL_X), 3203 SRC_SEL_Y(SQ_SEL_Y), 3204 SRC_SEL_Z(SQ_SEL_0), 3205 SRC_SEL_W(SQ_SEL_1)); 3206 shader[i++] = TEX_DWORD_PAD; 3207 3208 /* 38 */ 3209 /* MOV GPR1.x, KC5.x */ 3210 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), 3211 SRC0_REL(ABSOLUTE), 3212 SRC0_ELEM(ELEM_X), 3213 SRC0_NEG(0), 3214 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3215 SRC1_REL(ABSOLUTE), 3216 SRC1_ELEM(ELEM_X), 3217 SRC1_NEG(0), 3218 INDEX_MODE(SQ_INDEX_AR_X), 3219 PRED_SEL(SQ_PRED_SEL_OFF), 3220 LAST(0)); 3221 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3222 SRC1_ABS(0), 3223 UPDATE_EXECUTE_MASK(0), 3224 UPDATE_PRED(0), 3225 WRITE_MASK(1), 3226 OMOD(SQ_ALU_OMOD_OFF), 3227 ALU_INST(SQ_OP2_INST_MOV), 3228 BANK_SWIZZLE(SQ_ALU_VEC_012), 3229 DST_GPR(1), 3230 DST_REL(ABSOLUTE), 3231 DST_ELEM(ELEM_X), 3232 CLAMP(1)); 3233 3234 /* 39 */ 3235 /* MOV GPR1.y, KC5.y */ 3236 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), 3237 SRC0_REL(ABSOLUTE), 3238 SRC0_ELEM(ELEM_Y), 3239 SRC0_NEG(0), 3240 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3241 SRC1_REL(ABSOLUTE), 3242 SRC1_ELEM(ELEM_X), 3243 SRC1_NEG(0), 3244 INDEX_MODE(SQ_INDEX_AR_X), 3245 PRED_SEL(SQ_PRED_SEL_OFF), 3246 LAST(0)); 3247 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3248 SRC1_ABS(0), 3249 UPDATE_EXECUTE_MASK(0), 3250 UPDATE_PRED(0), 3251 WRITE_MASK(1), 3252 OMOD(SQ_ALU_OMOD_OFF), 3253 ALU_INST(SQ_OP2_INST_MOV), 3254 BANK_SWIZZLE(SQ_ALU_VEC_012), 3255 DST_GPR(1), 3256 DST_REL(ABSOLUTE), 3257 DST_ELEM(ELEM_Y), 3258 CLAMP(1)); 3259 3260 /* 40 */ 3261 /* MOV GPR1.z, KC5.z */ 3262 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), 3263 SRC0_REL(ABSOLUTE), 3264 SRC0_ELEM(ELEM_Z), 3265 SRC0_NEG(0), 3266 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3267 SRC1_REL(ABSOLUTE), 3268 SRC1_ELEM(ELEM_X), 3269 SRC1_NEG(0), 3270 INDEX_MODE(SQ_INDEX_AR_X), 3271 PRED_SEL(SQ_PRED_SEL_OFF), 3272 LAST(0)); 3273 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3274 SRC1_ABS(0), 3275 UPDATE_EXECUTE_MASK(0), 3276 UPDATE_PRED(0), 3277 WRITE_MASK(1), 3278 OMOD(SQ_ALU_OMOD_OFF), 3279 ALU_INST(SQ_OP2_INST_MOV), 3280 BANK_SWIZZLE(SQ_ALU_VEC_012), 3281 DST_GPR(1), 3282 DST_REL(ABSOLUTE), 3283 DST_ELEM(ELEM_Z), 3284 CLAMP(1)); 3285 3286 /* 41 */ 3287 /* MOV GPR1.w, KC5.w */ 3288 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), 3289 SRC0_REL(ABSOLUTE), 3290 SRC0_ELEM(ELEM_W), 3291 SRC0_NEG(0), 3292 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3293 SRC1_REL(ABSOLUTE), 3294 SRC1_ELEM(ELEM_X), 3295 SRC1_NEG(0), 3296 INDEX_MODE(SQ_INDEX_AR_X), 3297 PRED_SEL(SQ_PRED_SEL_OFF), 3298 LAST(1)); 3299 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3300 SRC1_ABS(0), 3301 UPDATE_EXECUTE_MASK(0), 3302 UPDATE_PRED(0), 3303 WRITE_MASK(1), 3304 OMOD(SQ_ALU_OMOD_OFF), 3305 ALU_INST(SQ_OP2_INST_MOV), 3306 BANK_SWIZZLE(SQ_ALU_VEC_012), 3307 DST_GPR(1), 3308 DST_REL(ABSOLUTE), 3309 DST_ELEM(ELEM_W), 3310 CLAMP(1)); 3311 3312 return i; 3313} 3314