1/* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#include "xf86.h" 32 33#include "evergreen_shader.h" 34#include "evergreen_reg.h" 35 36/* solid vs --------------------------------------- */ 37int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) 38{ 39 int i = 0; 40 41 /* 0 */ 42 shader[i++] = CF_DWORD0(ADDR(4), 43 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 44 shader[i++] = CF_DWORD1(POP_COUNT(0), 45 CF_CONST(0), 46 COND(SQ_CF_COND_ACTIVE), 47 I_COUNT(1), 48 VALID_PIXEL_MODE(0), 49 END_OF_PROGRAM(0), 50 CF_INST(SQ_CF_INST_VC), 51 WHOLE_QUAD_MODE(0), 52 BARRIER(1)); 53 /* 1 */ 54 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 55 TYPE(SQ_EXPORT_POS), 56 RW_GPR(1), 57 RW_REL(ABSOLUTE), 58 INDEX_GPR(0), 59 ELEM_SIZE(0)); 60 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 61 SRC_SEL_Y(SQ_SEL_Y), 62 SRC_SEL_Z(SQ_SEL_Z), 63 SRC_SEL_W(SQ_SEL_W), 64 BURST_COUNT(1), 65 VALID_PIXEL_MODE(0), 66 END_OF_PROGRAM(0), 67 CF_INST(SQ_CF_INST_EXPORT_DONE), 68 MARK(0), 69 BARRIER(1)); 70 /* 2 - always export a param whether it's used or not */ 71 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 72 TYPE(SQ_EXPORT_PARAM), 73 RW_GPR(0), 74 RW_REL(ABSOLUTE), 75 INDEX_GPR(0), 76 ELEM_SIZE(0)); 77 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 78 SRC_SEL_Y(SQ_SEL_Y), 79 SRC_SEL_Z(SQ_SEL_Z), 80 SRC_SEL_W(SQ_SEL_W), 81 BURST_COUNT(0), 82 VALID_PIXEL_MODE(0), 83 END_OF_PROGRAM(1), 84 CF_INST(SQ_CF_INST_EXPORT_DONE), 85 MARK(0), 86 BARRIER(0)); 87 /* 3 - padding */ 88 shader[i++] = 0x00000000; 89 shader[i++] = 0x00000000; 90 /* 4/5 */ 91 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 92 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 93 FETCH_WHOLE_QUAD(0), 94 BUFFER_ID(0), 95 SRC_GPR(0), 96 SRC_REL(ABSOLUTE), 97 SRC_SEL_X(SQ_SEL_X), 98 MEGA_FETCH_COUNT(8)); 99 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 100 DST_REL(0), 101 DST_SEL_X(SQ_SEL_X), 102 DST_SEL_Y(SQ_SEL_Y), 103 DST_SEL_Z(SQ_SEL_0), 104 DST_SEL_W(SQ_SEL_1), 105 USE_CONST_FIELDS(0), 106 DATA_FORMAT(FMT_32_32_FLOAT), 107 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 108 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 109 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 110 shader[i++] = VTX_DWORD2(OFFSET(0), 111#if X_BYTE_ORDER == X_BIG_ENDIAN 112 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 113#else 114 ENDIAN_SWAP(SQ_ENDIAN_NONE), 115#endif 116 CONST_BUF_NO_STRIDE(0), 117 MEGA_FETCH(1), 118 ALT_CONST(0), 119 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 120 shader[i++] = VTX_DWORD_PAD; 121 122 return i; 123} 124 125/* solid ps --------------------------------------- */ 126int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) 127{ 128 int i = 0; 129 130 /* 0 */ 131 shader[i++] = CF_ALU_DWORD0(ADDR(2), 132 KCACHE_BANK0(0), 133 KCACHE_BANK1(0), 134 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 135 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 136 KCACHE_ADDR0(0), 137 KCACHE_ADDR1(0), 138 I_COUNT(4), 139 ALT_CONST(0), 140 CF_INST(SQ_CF_INST_ALU), 141 WHOLE_QUAD_MODE(0), 142 BARRIER(1)); 143 /* 1 */ 144 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 145 TYPE(SQ_EXPORT_PIXEL), 146 RW_GPR(0), 147 RW_REL(ABSOLUTE), 148 INDEX_GPR(0), 149 ELEM_SIZE(1)); 150 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 151 SRC_SEL_Y(SQ_SEL_Y), 152 SRC_SEL_Z(SQ_SEL_Z), 153 SRC_SEL_W(SQ_SEL_W), 154 BURST_COUNT(1), 155 VALID_PIXEL_MODE(0), 156 END_OF_PROGRAM(1), 157 CF_INST(SQ_CF_INST_EXPORT_DONE), 158 MARK(0), 159 BARRIER(1)); 160 161 /* 2 */ 162 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 163 SRC0_REL(ABSOLUTE), 164 SRC0_ELEM(ELEM_X), 165 SRC0_NEG(0), 166 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 167 SRC1_REL(ABSOLUTE), 168 SRC1_ELEM(ELEM_X), 169 SRC1_NEG(0), 170 INDEX_MODE(SQ_INDEX_AR_X), 171 PRED_SEL(SQ_PRED_SEL_OFF), 172 LAST(0)); 173 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 174 SRC1_ABS(0), 175 UPDATE_EXECUTE_MASK(0), 176 UPDATE_PRED(0), 177 WRITE_MASK(1), 178 OMOD(SQ_ALU_OMOD_OFF), 179 ALU_INST(SQ_OP2_INST_MOV), 180 BANK_SWIZZLE(SQ_ALU_VEC_012), 181 DST_GPR(0), 182 DST_REL(ABSOLUTE), 183 DST_ELEM(ELEM_X), 184 CLAMP(1)); 185 /* 3 */ 186 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 187 SRC0_REL(ABSOLUTE), 188 SRC0_ELEM(ELEM_Y), 189 SRC0_NEG(0), 190 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 191 SRC1_REL(ABSOLUTE), 192 SRC1_ELEM(ELEM_Y), 193 SRC1_NEG(0), 194 INDEX_MODE(SQ_INDEX_AR_X), 195 PRED_SEL(SQ_PRED_SEL_OFF), 196 LAST(0)); 197 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 198 SRC1_ABS(0), 199 UPDATE_EXECUTE_MASK(0), 200 UPDATE_PRED(0), 201 WRITE_MASK(1), 202 OMOD(SQ_ALU_OMOD_OFF), 203 ALU_INST(SQ_OP2_INST_MOV), 204 BANK_SWIZZLE(SQ_ALU_VEC_012), 205 DST_GPR(0), 206 DST_REL(ABSOLUTE), 207 DST_ELEM(ELEM_Y), 208 CLAMP(1)); 209 /* 4 */ 210 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 211 SRC0_REL(ABSOLUTE), 212 SRC0_ELEM(ELEM_Z), 213 SRC0_NEG(0), 214 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 215 SRC1_REL(ABSOLUTE), 216 SRC1_ELEM(ELEM_Z), 217 SRC1_NEG(0), 218 INDEX_MODE(SQ_INDEX_AR_X), 219 PRED_SEL(SQ_PRED_SEL_OFF), 220 LAST(0)); 221 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 222 SRC1_ABS(0), 223 UPDATE_EXECUTE_MASK(0), 224 UPDATE_PRED(0), 225 WRITE_MASK(1), 226 OMOD(SQ_ALU_OMOD_OFF), 227 ALU_INST(SQ_OP2_INST_MOV), 228 BANK_SWIZZLE(SQ_ALU_VEC_012), 229 DST_GPR(0), 230 DST_REL(ABSOLUTE), 231 DST_ELEM(ELEM_Z), 232 CLAMP(1)); 233 /* 5 */ 234 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 235 SRC0_REL(ABSOLUTE), 236 SRC0_ELEM(ELEM_W), 237 SRC0_NEG(0), 238 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 239 SRC1_REL(ABSOLUTE), 240 SRC1_ELEM(ELEM_W), 241 SRC1_NEG(0), 242 INDEX_MODE(SQ_INDEX_AR_X), 243 PRED_SEL(SQ_PRED_SEL_OFF), 244 LAST(1)); 245 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 246 SRC1_ABS(0), 247 UPDATE_EXECUTE_MASK(0), 248 UPDATE_PRED(0), 249 WRITE_MASK(1), 250 OMOD(SQ_ALU_OMOD_OFF), 251 ALU_INST(SQ_OP2_INST_MOV), 252 BANK_SWIZZLE(SQ_ALU_VEC_012), 253 DST_GPR(0), 254 DST_REL(ABSOLUTE), 255 DST_ELEM(ELEM_W), 256 CLAMP(1)); 257 258 return i; 259} 260 261/* copy vs --------------------------------------- */ 262int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) 263{ 264 int i = 0; 265 266 /* 0 */ 267 shader[i++] = CF_DWORD0(ADDR(4), 268 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 269 shader[i++] = CF_DWORD1(POP_COUNT(0), 270 CF_CONST(0), 271 COND(SQ_CF_COND_ACTIVE), 272 I_COUNT(2), 273 VALID_PIXEL_MODE(0), 274 END_OF_PROGRAM(0), 275 CF_INST(SQ_CF_INST_VC), 276 WHOLE_QUAD_MODE(0), 277 BARRIER(1)); 278 /* 1 */ 279 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 280 TYPE(SQ_EXPORT_POS), 281 RW_GPR(1), 282 RW_REL(ABSOLUTE), 283 INDEX_GPR(0), 284 ELEM_SIZE(0)); 285 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 286 SRC_SEL_Y(SQ_SEL_Y), 287 SRC_SEL_Z(SQ_SEL_Z), 288 SRC_SEL_W(SQ_SEL_W), 289 BURST_COUNT(0), 290 VALID_PIXEL_MODE(0), 291 END_OF_PROGRAM(0), 292 CF_INST(SQ_CF_INST_EXPORT_DONE), 293 MARK(0), 294 BARRIER(1)); 295 /* 2 */ 296 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 297 TYPE(SQ_EXPORT_PARAM), 298 RW_GPR(0), 299 RW_REL(ABSOLUTE), 300 INDEX_GPR(0), 301 ELEM_SIZE(0)); 302 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 303 SRC_SEL_Y(SQ_SEL_Y), 304 SRC_SEL_Z(SQ_SEL_Z), 305 SRC_SEL_W(SQ_SEL_W), 306 BURST_COUNT(0), 307 VALID_PIXEL_MODE(0), 308 END_OF_PROGRAM(1), 309 CF_INST(SQ_CF_INST_EXPORT_DONE), 310 MARK(0), 311 BARRIER(0)); 312 /* 3 */ 313 shader[i++] = 0x00000000; 314 shader[i++] = 0x00000000; 315 /* 4/5 */ 316 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 317 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 318 FETCH_WHOLE_QUAD(0), 319 BUFFER_ID(0), 320 SRC_GPR(0), 321 SRC_REL(ABSOLUTE), 322 SRC_SEL_X(SQ_SEL_X), 323 MEGA_FETCH_COUNT(16)); 324 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 325 DST_REL(0), 326 DST_SEL_X(SQ_SEL_X), 327 DST_SEL_Y(SQ_SEL_Y), 328 DST_SEL_Z(SQ_SEL_0), 329 DST_SEL_W(SQ_SEL_1), 330 USE_CONST_FIELDS(0), 331 DATA_FORMAT(FMT_32_32_FLOAT), 332 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 333 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 334 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 335 shader[i++] = VTX_DWORD2(OFFSET(0), 336#if X_BYTE_ORDER == X_BIG_ENDIAN 337 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 338#else 339 ENDIAN_SWAP(SQ_ENDIAN_NONE), 340#endif 341 CONST_BUF_NO_STRIDE(0), 342 MEGA_FETCH(1), 343 ALT_CONST(0), 344 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 345 shader[i++] = VTX_DWORD_PAD; 346 /* 6/7 */ 347 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 348 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 349 FETCH_WHOLE_QUAD(0), 350 BUFFER_ID(0), 351 SRC_GPR(0), 352 SRC_REL(ABSOLUTE), 353 SRC_SEL_X(SQ_SEL_X), 354 MEGA_FETCH_COUNT(8)); 355 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 356 DST_REL(0), 357 DST_SEL_X(SQ_SEL_X), 358 DST_SEL_Y(SQ_SEL_Y), 359 DST_SEL_Z(SQ_SEL_0), 360 DST_SEL_W(SQ_SEL_1), 361 USE_CONST_FIELDS(0), 362 DATA_FORMAT(FMT_32_32_FLOAT), 363 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 364 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 365 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 366 shader[i++] = VTX_DWORD2(OFFSET(8), 367#if X_BYTE_ORDER == X_BIG_ENDIAN 368 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 369#else 370 ENDIAN_SWAP(SQ_ENDIAN_NONE), 371#endif 372 CONST_BUF_NO_STRIDE(0), 373 MEGA_FETCH(0), 374 ALT_CONST(0), 375 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 376 shader[i++] = VTX_DWORD_PAD; 377 378 return i; 379} 380 381/* copy ps --------------------------------------- */ 382int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) 383{ 384 int i = 0; 385 386 /* CF INST 0 */ 387 shader[i++] = CF_ALU_DWORD0(ADDR(3), 388 KCACHE_BANK0(0), 389 KCACHE_BANK1(0), 390 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 391 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 392 KCACHE_ADDR0(0), 393 KCACHE_ADDR1(0), 394 I_COUNT(4), 395 ALT_CONST(0), 396 CF_INST(SQ_CF_INST_ALU), 397 WHOLE_QUAD_MODE(0), 398 BARRIER(1)); 399 /* CF INST 1 */ 400 shader[i++] = CF_DWORD0(ADDR(8), 401 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 402 shader[i++] = CF_DWORD1(POP_COUNT(0), 403 CF_CONST(0), 404 COND(SQ_CF_COND_ACTIVE), 405 I_COUNT(1), 406 VALID_PIXEL_MODE(0), 407 END_OF_PROGRAM(0), 408 CF_INST(SQ_CF_INST_TC), 409 WHOLE_QUAD_MODE(0), 410 BARRIER(1)); 411 /* CF INST 2 */ 412 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 413 TYPE(SQ_EXPORT_PIXEL), 414 RW_GPR(0), 415 RW_REL(ABSOLUTE), 416 INDEX_GPR(0), 417 ELEM_SIZE(1)); 418 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 419 SRC_SEL_Y(SQ_SEL_Y), 420 SRC_SEL_Z(SQ_SEL_Z), 421 SRC_SEL_W(SQ_SEL_W), 422 BURST_COUNT(1), 423 VALID_PIXEL_MODE(0), 424 END_OF_PROGRAM(1), 425 CF_INST(SQ_CF_INST_EXPORT_DONE), 426 MARK(0), 427 BARRIER(1)); 428 429 /* 3 interpolate tex coords */ 430 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 431 SRC0_REL(ABSOLUTE), 432 SRC0_ELEM(ELEM_Y), 433 SRC0_NEG(0), 434 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 435 SRC1_REL(ABSOLUTE), 436 SRC1_ELEM(ELEM_X), 437 SRC1_NEG(0), 438 INDEX_MODE(SQ_INDEX_AR_X), 439 PRED_SEL(SQ_PRED_SEL_OFF), 440 LAST(0)); 441 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 442 SRC1_ABS(0), 443 UPDATE_EXECUTE_MASK(0), 444 UPDATE_PRED(0), 445 WRITE_MASK(1), 446 OMOD(SQ_ALU_OMOD_OFF), 447 ALU_INST(SQ_OP2_INST_INTERP_XY), 448 BANK_SWIZZLE(SQ_ALU_VEC_210), 449 DST_GPR(0), 450 DST_REL(ABSOLUTE), 451 DST_ELEM(ELEM_X), 452 CLAMP(0)); 453 /* 4 */ 454 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 455 SRC0_REL(ABSOLUTE), 456 SRC0_ELEM(ELEM_X), 457 SRC0_NEG(0), 458 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 459 SRC1_REL(ABSOLUTE), 460 SRC1_ELEM(ELEM_X), 461 SRC1_NEG(0), 462 INDEX_MODE(SQ_INDEX_AR_X), 463 PRED_SEL(SQ_PRED_SEL_OFF), 464 LAST(0)); 465 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 466 SRC1_ABS(0), 467 UPDATE_EXECUTE_MASK(0), 468 UPDATE_PRED(0), 469 WRITE_MASK(1), 470 OMOD(SQ_ALU_OMOD_OFF), 471 ALU_INST(SQ_OP2_INST_INTERP_XY), 472 BANK_SWIZZLE(SQ_ALU_VEC_210), 473 DST_GPR(0), 474 DST_REL(ABSOLUTE), 475 DST_ELEM(ELEM_Y), 476 CLAMP(0)); 477 /* 5 */ 478 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 479 SRC0_REL(ABSOLUTE), 480 SRC0_ELEM(ELEM_Y), 481 SRC0_NEG(0), 482 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 483 SRC1_REL(ABSOLUTE), 484 SRC1_ELEM(ELEM_X), 485 SRC1_NEG(0), 486 INDEX_MODE(SQ_INDEX_AR_X), 487 PRED_SEL(SQ_PRED_SEL_OFF), 488 LAST(0)); 489 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 490 SRC1_ABS(0), 491 UPDATE_EXECUTE_MASK(0), 492 UPDATE_PRED(0), 493 WRITE_MASK(0), 494 OMOD(SQ_ALU_OMOD_OFF), 495 ALU_INST(SQ_OP2_INST_INTERP_XY), 496 BANK_SWIZZLE(SQ_ALU_VEC_210), 497 DST_GPR(0), 498 DST_REL(ABSOLUTE), 499 DST_ELEM(ELEM_Z), 500 CLAMP(0)); 501 /* 6 */ 502 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 503 SRC0_REL(ABSOLUTE), 504 SRC0_ELEM(ELEM_X), 505 SRC0_NEG(0), 506 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 507 SRC1_REL(ABSOLUTE), 508 SRC1_ELEM(ELEM_X), 509 SRC1_NEG(0), 510 INDEX_MODE(SQ_INDEX_AR_X), 511 PRED_SEL(SQ_PRED_SEL_OFF), 512 LAST(1)); 513 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 514 SRC1_ABS(0), 515 UPDATE_EXECUTE_MASK(0), 516 UPDATE_PRED(0), 517 WRITE_MASK(0), 518 OMOD(SQ_ALU_OMOD_OFF), 519 ALU_INST(SQ_OP2_INST_INTERP_XY), 520 BANK_SWIZZLE(SQ_ALU_VEC_210), 521 DST_GPR(0), 522 DST_REL(ABSOLUTE), 523 DST_ELEM(ELEM_W), 524 CLAMP(0)); 525 526 /* 7 */ 527 shader[i++] = 0x00000000; 528 shader[i++] = 0x00000000; 529 530 /* 8/9 TEX INST 0 */ 531 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 532 INST_MOD(0), 533 FETCH_WHOLE_QUAD(0), 534 RESOURCE_ID(0), 535 SRC_GPR(0), 536 SRC_REL(ABSOLUTE), 537 ALT_CONST(0), 538 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 539 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 540 shader[i++] = TEX_DWORD1(DST_GPR(0), 541 DST_REL(ABSOLUTE), 542 DST_SEL_X(SQ_SEL_X), /* R */ 543 DST_SEL_Y(SQ_SEL_Y), /* G */ 544 DST_SEL_Z(SQ_SEL_Z), /* B */ 545 DST_SEL_W(SQ_SEL_W), /* A */ 546 LOD_BIAS(0), 547 COORD_TYPE_X(TEX_UNNORMALIZED), 548 COORD_TYPE_Y(TEX_UNNORMALIZED), 549 COORD_TYPE_Z(TEX_UNNORMALIZED), 550 COORD_TYPE_W(TEX_UNNORMALIZED)); 551 shader[i++] = TEX_DWORD2(OFFSET_X(0), 552 OFFSET_Y(0), 553 OFFSET_Z(0), 554 SAMPLER_ID(0), 555 SRC_SEL_X(SQ_SEL_X), 556 SRC_SEL_Y(SQ_SEL_Y), 557 SRC_SEL_Z(SQ_SEL_0), 558 SRC_SEL_W(SQ_SEL_1)); 559 shader[i++] = TEX_DWORD_PAD; 560 561 return i; 562} 563 564int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) 565{ 566 int i = 0; 567 568 /* 0 */ 569 shader[i++] = CF_DWORD0(ADDR(6), 570 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 571 shader[i++] = CF_DWORD1(POP_COUNT(0), 572 CF_CONST(0), 573 COND(SQ_CF_COND_ACTIVE), 574 I_COUNT(2), 575 VALID_PIXEL_MODE(0), 576 END_OF_PROGRAM(0), 577 CF_INST(SQ_CF_INST_VC), 578 WHOLE_QUAD_MODE(0), 579 BARRIER(1)); 580 581 /* 1 - ALU */ 582 shader[i++] = CF_ALU_DWORD0(ADDR(4), 583 KCACHE_BANK0(0), 584 KCACHE_BANK1(0), 585 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 586 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 587 KCACHE_ADDR0(0), 588 KCACHE_ADDR1(0), 589 I_COUNT(2), 590 ALT_CONST(0), 591 CF_INST(SQ_CF_INST_ALU), 592 WHOLE_QUAD_MODE(0), 593 BARRIER(1)); 594 595 /* 2 */ 596 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 597 TYPE(SQ_EXPORT_POS), 598 RW_GPR(1), 599 RW_REL(ABSOLUTE), 600 INDEX_GPR(0), 601 ELEM_SIZE(3)); 602 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 603 SRC_SEL_Y(SQ_SEL_Y), 604 SRC_SEL_Z(SQ_SEL_Z), 605 SRC_SEL_W(SQ_SEL_W), 606 BURST_COUNT(1), 607 VALID_PIXEL_MODE(0), 608 END_OF_PROGRAM(0), 609 CF_INST(SQ_CF_INST_EXPORT_DONE), 610 MARK(0), 611 BARRIER(1)); 612 /* 3 */ 613 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 614 TYPE(SQ_EXPORT_PARAM), 615 RW_GPR(0), 616 RW_REL(ABSOLUTE), 617 INDEX_GPR(0), 618 ELEM_SIZE(3)); 619 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 620 SRC_SEL_Y(SQ_SEL_Y), 621 SRC_SEL_Z(SQ_SEL_Z), 622 SRC_SEL_W(SQ_SEL_W), 623 BURST_COUNT(1), 624 VALID_PIXEL_MODE(0), 625 END_OF_PROGRAM(1), 626 CF_INST(SQ_CF_INST_EXPORT_DONE), 627 MARK(0), 628 BARRIER(0)); 629 630 631 /* 4 texX / w */ 632 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 633 SRC0_REL(ABSOLUTE), 634 SRC0_ELEM(ELEM_X), 635 SRC0_NEG(0), 636 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 637 SRC1_REL(ABSOLUTE), 638 SRC1_ELEM(ELEM_X), 639 SRC1_NEG(0), 640 INDEX_MODE(SQ_INDEX_AR_X), 641 PRED_SEL(SQ_PRED_SEL_OFF), 642 LAST(0)); 643 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 644 SRC1_ABS(0), 645 UPDATE_EXECUTE_MASK(0), 646 UPDATE_PRED(0), 647 WRITE_MASK(1), 648 OMOD(SQ_ALU_OMOD_OFF), 649 ALU_INST(SQ_OP2_INST_MUL), 650 BANK_SWIZZLE(SQ_ALU_VEC_012), 651 DST_GPR(0), 652 DST_REL(ABSOLUTE), 653 DST_ELEM(ELEM_X), 654 CLAMP(0)); 655 656 /* 5 texY / h */ 657 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 658 SRC0_REL(ABSOLUTE), 659 SRC0_ELEM(ELEM_Y), 660 SRC0_NEG(0), 661 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 662 SRC1_REL(ABSOLUTE), 663 SRC1_ELEM(ELEM_Y), 664 SRC1_NEG(0), 665 INDEX_MODE(SQ_INDEX_AR_X), 666 PRED_SEL(SQ_PRED_SEL_OFF), 667 LAST(1)); 668 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 669 SRC1_ABS(0), 670 UPDATE_EXECUTE_MASK(0), 671 UPDATE_PRED(0), 672 WRITE_MASK(1), 673 OMOD(SQ_ALU_OMOD_OFF), 674 ALU_INST(SQ_OP2_INST_MUL), 675 BANK_SWIZZLE(SQ_ALU_VEC_012), 676 DST_GPR(0), 677 DST_REL(ABSOLUTE), 678 DST_ELEM(ELEM_Y), 679 CLAMP(0)); 680 681 /* 6/7 */ 682 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 683 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 684 FETCH_WHOLE_QUAD(0), 685 BUFFER_ID(0), 686 SRC_GPR(0), 687 SRC_REL(ABSOLUTE), 688 SRC_SEL_X(SQ_SEL_X), 689 MEGA_FETCH_COUNT(16)); 690 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 691 DST_REL(ABSOLUTE), 692 DST_SEL_X(SQ_SEL_X), 693 DST_SEL_Y(SQ_SEL_Y), 694 DST_SEL_Z(SQ_SEL_0), 695 DST_SEL_W(SQ_SEL_1), 696 USE_CONST_FIELDS(0), 697 DATA_FORMAT(FMT_32_32_FLOAT), 698 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 699 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 700 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 701 shader[i++] = VTX_DWORD2(OFFSET(0), 702#if X_BYTE_ORDER == X_BIG_ENDIAN 703 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 704#else 705 ENDIAN_SWAP(SQ_ENDIAN_NONE), 706#endif 707 CONST_BUF_NO_STRIDE(0), 708 MEGA_FETCH(1), 709 ALT_CONST(0), 710 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 711 shader[i++] = VTX_DWORD_PAD; 712 /* 8/9 */ 713 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 714 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 715 FETCH_WHOLE_QUAD(0), 716 BUFFER_ID(0), 717 SRC_GPR(0), 718 SRC_REL(ABSOLUTE), 719 SRC_SEL_X(SQ_SEL_X), 720 MEGA_FETCH_COUNT(8)); 721 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 722 DST_REL(ABSOLUTE), 723 DST_SEL_X(SQ_SEL_X), 724 DST_SEL_Y(SQ_SEL_Y), 725 DST_SEL_Z(SQ_SEL_0), 726 DST_SEL_W(SQ_SEL_1), 727 USE_CONST_FIELDS(0), 728 DATA_FORMAT(FMT_32_32_FLOAT), 729 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 730 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 731 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 732 shader[i++] = VTX_DWORD2(OFFSET(8), 733#if X_BYTE_ORDER == X_BIG_ENDIAN 734 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 735#else 736 ENDIAN_SWAP(SQ_ENDIAN_NONE), 737#endif 738 CONST_BUF_NO_STRIDE(0), 739 MEGA_FETCH(0), 740 ALT_CONST(0), 741 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 742 shader[i++] = VTX_DWORD_PAD; 743 744 return i; 745} 746 747int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) 748{ 749 int i = 0; 750 751 /* 0 */ 752 shader[i++] = CF_ALU_DWORD0(ADDR(5), 753 KCACHE_BANK0(0), 754 KCACHE_BANK1(0), 755 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 756 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 757 KCACHE_ADDR0(0), 758 KCACHE_ADDR1(0), 759 I_COUNT(4), 760 ALT_CONST(0), 761 CF_INST(SQ_CF_INST_ALU), 762 WHOLE_QUAD_MODE(0), 763 BARRIER(1)); 764 /* 1 */ 765 shader[i++] = CF_DWORD0(ADDR(21), 766 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 767 shader[i++] = CF_DWORD1(POP_COUNT(0), 768 CF_CONST(0), 769 COND(SQ_CF_COND_BOOL), 770 I_COUNT(0), 771 VALID_PIXEL_MODE(0), 772 END_OF_PROGRAM(0), 773 CF_INST(SQ_CF_INST_CALL), 774 WHOLE_QUAD_MODE(0), 775 BARRIER(0)); 776 /* 2 */ 777 shader[i++] = CF_DWORD0(ADDR(30), 778 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 779 shader[i++] = CF_DWORD1(POP_COUNT(0), 780 CF_CONST(0), 781 COND(SQ_CF_COND_NOT_BOOL), 782 I_COUNT(0), 783 VALID_PIXEL_MODE(0), 784 END_OF_PROGRAM(0), 785 CF_INST(SQ_CF_INST_CALL), 786 WHOLE_QUAD_MODE(0), 787 BARRIER(0)); 788 /* 3 */ 789 shader[i++] = CF_ALU_DWORD0(ADDR(9), 790 KCACHE_BANK0(0), 791 KCACHE_BANK1(0), 792 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 793 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 794 KCACHE_ADDR0(0), 795 KCACHE_ADDR1(0), 796 I_COUNT(12), 797 ALT_CONST(0), 798 CF_INST(SQ_CF_INST_ALU), 799 WHOLE_QUAD_MODE(0), 800 BARRIER(1)); 801 /* 4 */ 802 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 803 TYPE(SQ_EXPORT_PIXEL), 804 RW_GPR(2), 805 RW_REL(ABSOLUTE), 806 INDEX_GPR(0), 807 ELEM_SIZE(3)); 808 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 809 SRC_SEL_Y(SQ_SEL_Y), 810 SRC_SEL_Z(SQ_SEL_Z), 811 SRC_SEL_W(SQ_SEL_W), 812 BURST_COUNT(1), 813 VALID_PIXEL_MODE(0), 814 END_OF_PROGRAM(1), 815 CF_INST(SQ_CF_INST_EXPORT_DONE), 816 MARK(0), 817 BARRIER(1)); 818 /* 5 interpolate tex coords */ 819 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 820 SRC0_REL(ABSOLUTE), 821 SRC0_ELEM(ELEM_Y), 822 SRC0_NEG(0), 823 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 824 SRC1_REL(ABSOLUTE), 825 SRC1_ELEM(ELEM_X), 826 SRC1_NEG(0), 827 INDEX_MODE(SQ_INDEX_AR_X), 828 PRED_SEL(SQ_PRED_SEL_OFF), 829 LAST(0)); 830 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 831 SRC1_ABS(0), 832 UPDATE_EXECUTE_MASK(0), 833 UPDATE_PRED(0), 834 WRITE_MASK(1), 835 OMOD(SQ_ALU_OMOD_OFF), 836 ALU_INST(SQ_OP2_INST_INTERP_XY), 837 BANK_SWIZZLE(SQ_ALU_VEC_210), 838 DST_GPR(0), 839 DST_REL(ABSOLUTE), 840 DST_ELEM(ELEM_X), 841 CLAMP(0)); 842 /* 6 */ 843 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 844 SRC0_REL(ABSOLUTE), 845 SRC0_ELEM(ELEM_X), 846 SRC0_NEG(0), 847 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 848 SRC1_REL(ABSOLUTE), 849 SRC1_ELEM(ELEM_X), 850 SRC1_NEG(0), 851 INDEX_MODE(SQ_INDEX_AR_X), 852 PRED_SEL(SQ_PRED_SEL_OFF), 853 LAST(0)); 854 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 855 SRC1_ABS(0), 856 UPDATE_EXECUTE_MASK(0), 857 UPDATE_PRED(0), 858 WRITE_MASK(1), 859 OMOD(SQ_ALU_OMOD_OFF), 860 ALU_INST(SQ_OP2_INST_INTERP_XY), 861 BANK_SWIZZLE(SQ_ALU_VEC_210), 862 DST_GPR(0), 863 DST_REL(ABSOLUTE), 864 DST_ELEM(ELEM_Y), 865 CLAMP(0)); 866 /* 7 */ 867 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 868 SRC0_REL(ABSOLUTE), 869 SRC0_ELEM(ELEM_Y), 870 SRC0_NEG(0), 871 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 872 SRC1_REL(ABSOLUTE), 873 SRC1_ELEM(ELEM_X), 874 SRC1_NEG(0), 875 INDEX_MODE(SQ_INDEX_AR_X), 876 PRED_SEL(SQ_PRED_SEL_OFF), 877 LAST(0)); 878 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 879 SRC1_ABS(0), 880 UPDATE_EXECUTE_MASK(0), 881 UPDATE_PRED(0), 882 WRITE_MASK(0), 883 OMOD(SQ_ALU_OMOD_OFF), 884 ALU_INST(SQ_OP2_INST_INTERP_XY), 885 BANK_SWIZZLE(SQ_ALU_VEC_210), 886 DST_GPR(0), 887 DST_REL(ABSOLUTE), 888 DST_ELEM(ELEM_Z), 889 CLAMP(0)); 890 /* 8 */ 891 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 892 SRC0_REL(ABSOLUTE), 893 SRC0_ELEM(ELEM_X), 894 SRC0_NEG(0), 895 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 896 SRC1_REL(ABSOLUTE), 897 SRC1_ELEM(ELEM_X), 898 SRC1_NEG(0), 899 INDEX_MODE(SQ_INDEX_AR_X), 900 PRED_SEL(SQ_PRED_SEL_OFF), 901 LAST(1)); 902 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 903 SRC1_ABS(0), 904 UPDATE_EXECUTE_MASK(0), 905 UPDATE_PRED(0), 906 WRITE_MASK(0), 907 OMOD(SQ_ALU_OMOD_OFF), 908 ALU_INST(SQ_OP2_INST_INTERP_XY), 909 BANK_SWIZZLE(SQ_ALU_VEC_210), 910 DST_GPR(0), 911 DST_REL(ABSOLUTE), 912 DST_ELEM(ELEM_W), 913 CLAMP(0)); 914 915 /* 9,10,11,12 */ 916 /* r2.x = MAD(c0.w, r1.x, c0.x) */ 917 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 918 SRC0_REL(ABSOLUTE), 919 SRC0_ELEM(ELEM_W), 920 SRC0_NEG(0), 921 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 922 SRC1_REL(ABSOLUTE), 923 SRC1_ELEM(ELEM_X), 924 SRC1_NEG(0), 925 INDEX_MODE(SQ_INDEX_LOOP), 926 PRED_SEL(SQ_PRED_SEL_OFF), 927 LAST(0)); 928 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 929 SRC2_REL(ABSOLUTE), 930 SRC2_ELEM(ELEM_X), 931 SRC2_NEG(0), 932 ALU_INST(SQ_OP3_INST_MULADD), 933 BANK_SWIZZLE(SQ_ALU_VEC_012), 934 DST_GPR(2), 935 DST_REL(ABSOLUTE), 936 DST_ELEM(ELEM_X), 937 CLAMP(0)); 938 /* r2.y = MAD(c0.w, r1.x, c0.y) */ 939 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 940 SRC0_REL(ABSOLUTE), 941 SRC0_ELEM(ELEM_W), 942 SRC0_NEG(0), 943 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 944 SRC1_REL(ABSOLUTE), 945 SRC1_ELEM(ELEM_X), 946 SRC1_NEG(0), 947 INDEX_MODE(SQ_INDEX_LOOP), 948 PRED_SEL(SQ_PRED_SEL_OFF), 949 LAST(0)); 950 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 951 SRC2_REL(ABSOLUTE), 952 SRC2_ELEM(ELEM_Y), 953 SRC2_NEG(0), 954 ALU_INST(SQ_OP3_INST_MULADD), 955 BANK_SWIZZLE(SQ_ALU_VEC_012), 956 DST_GPR(2), 957 DST_REL(ABSOLUTE), 958 DST_ELEM(ELEM_Y), 959 CLAMP(0)); 960 /* r2.z = MAD(c0.w, r1.x, c0.z) */ 961 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 962 SRC0_REL(ABSOLUTE), 963 SRC0_ELEM(ELEM_W), 964 SRC0_NEG(0), 965 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 966 SRC1_REL(ABSOLUTE), 967 SRC1_ELEM(ELEM_X), 968 SRC1_NEG(0), 969 INDEX_MODE(SQ_INDEX_LOOP), 970 PRED_SEL(SQ_PRED_SEL_OFF), 971 LAST(0)); 972 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 973 SRC2_REL(ABSOLUTE), 974 SRC2_ELEM(ELEM_Z), 975 SRC2_NEG(0), 976 ALU_INST(SQ_OP3_INST_MULADD), 977 BANK_SWIZZLE(SQ_ALU_VEC_012), 978 DST_GPR(2), 979 DST_REL(ABSOLUTE), 980 DST_ELEM(ELEM_Z), 981 CLAMP(0)); 982 /* r2.w = MAD(0, 0, 1) */ 983 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 984 SRC0_REL(ABSOLUTE), 985 SRC0_ELEM(ELEM_X), 986 SRC0_NEG(0), 987 SRC1_SEL(SQ_ALU_SRC_0), 988 SRC1_REL(ABSOLUTE), 989 SRC1_ELEM(ELEM_X), 990 SRC1_NEG(0), 991 INDEX_MODE(SQ_INDEX_LOOP), 992 PRED_SEL(SQ_PRED_SEL_OFF), 993 LAST(1)); 994 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 995 SRC2_REL(ABSOLUTE), 996 SRC2_ELEM(ELEM_X), 997 SRC2_NEG(0), 998 ALU_INST(SQ_OP3_INST_MULADD), 999 BANK_SWIZZLE(SQ_ALU_VEC_012), 1000 DST_GPR(2), 1001 DST_REL(ABSOLUTE), 1002 DST_ELEM(ELEM_W), 1003 CLAMP(0)); 1004 1005 /* 13,14,15,16 */ 1006 /* r2.x = MAD(c1.x, r1.y, pv.x) */ 1007 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1008 SRC0_REL(ABSOLUTE), 1009 SRC0_ELEM(ELEM_X), 1010 SRC0_NEG(0), 1011 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1012 SRC1_REL(ABSOLUTE), 1013 SRC1_ELEM(ELEM_Y), 1014 SRC1_NEG(0), 1015 INDEX_MODE(SQ_INDEX_LOOP), 1016 PRED_SEL(SQ_PRED_SEL_OFF), 1017 LAST(0)); 1018 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1019 SRC2_REL(ABSOLUTE), 1020 SRC2_ELEM(ELEM_X), 1021 SRC2_NEG(0), 1022 ALU_INST(SQ_OP3_INST_MULADD), 1023 BANK_SWIZZLE(SQ_ALU_VEC_012), 1024 DST_GPR(2), 1025 DST_REL(ABSOLUTE), 1026 DST_ELEM(ELEM_X), 1027 CLAMP(0)); 1028 /* r2.y = MAD(c1.y, r1.y, pv.y) */ 1029 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1030 SRC0_REL(ABSOLUTE), 1031 SRC0_ELEM(ELEM_Y), 1032 SRC0_NEG(0), 1033 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1034 SRC1_REL(ABSOLUTE), 1035 SRC1_ELEM(ELEM_Y), 1036 SRC1_NEG(0), 1037 INDEX_MODE(SQ_INDEX_LOOP), 1038 PRED_SEL(SQ_PRED_SEL_OFF), 1039 LAST(0)); 1040 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1041 SRC2_REL(ABSOLUTE), 1042 SRC2_ELEM(ELEM_Y), 1043 SRC2_NEG(0), 1044 ALU_INST(SQ_OP3_INST_MULADD), 1045 BANK_SWIZZLE(SQ_ALU_VEC_012), 1046 DST_GPR(2), 1047 DST_REL(ABSOLUTE), 1048 DST_ELEM(ELEM_Y), 1049 CLAMP(0)); 1050 /* r2.z = MAD(c1.z, r1.y, pv.z) */ 1051 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1052 SRC0_REL(ABSOLUTE), 1053 SRC0_ELEM(ELEM_Z), 1054 SRC0_NEG(0), 1055 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1056 SRC1_REL(ABSOLUTE), 1057 SRC1_ELEM(ELEM_Y), 1058 SRC1_NEG(0), 1059 INDEX_MODE(SQ_INDEX_LOOP), 1060 PRED_SEL(SQ_PRED_SEL_OFF), 1061 LAST(0)); 1062 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1063 SRC2_REL(ABSOLUTE), 1064 SRC2_ELEM(ELEM_Z), 1065 SRC2_NEG(0), 1066 ALU_INST(SQ_OP3_INST_MULADD), 1067 BANK_SWIZZLE(SQ_ALU_VEC_012), 1068 DST_GPR(2), 1069 DST_REL(ABSOLUTE), 1070 DST_ELEM(ELEM_Z), 1071 CLAMP(0)); 1072 /* r2.w = MAD(0, 0, 1) */ 1073 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1074 SRC0_REL(ABSOLUTE), 1075 SRC0_ELEM(ELEM_X), 1076 SRC0_NEG(0), 1077 SRC1_SEL(SQ_ALU_SRC_0), 1078 SRC1_REL(ABSOLUTE), 1079 SRC1_ELEM(ELEM_X), 1080 SRC1_NEG(0), 1081 INDEX_MODE(SQ_INDEX_LOOP), 1082 PRED_SEL(SQ_PRED_SEL_OFF), 1083 LAST(1)); 1084 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1085 SRC2_REL(ABSOLUTE), 1086 SRC2_ELEM(ELEM_W), 1087 SRC2_NEG(0), 1088 ALU_INST(SQ_OP3_INST_MULADD), 1089 BANK_SWIZZLE(SQ_ALU_VEC_012), 1090 DST_GPR(2), 1091 DST_REL(ABSOLUTE), 1092 DST_ELEM(ELEM_W), 1093 CLAMP(0)); 1094 /* 17,18,19,20 */ 1095 /* r2.x = MAD(c2.x, r1.z, pv.x) */ 1096 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1097 SRC0_REL(ABSOLUTE), 1098 SRC0_ELEM(ELEM_X), 1099 SRC0_NEG(0), 1100 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1101 SRC1_REL(ABSOLUTE), 1102 SRC1_ELEM(ELEM_Z), 1103 SRC1_NEG(0), 1104 INDEX_MODE(SQ_INDEX_LOOP), 1105 PRED_SEL(SQ_PRED_SEL_OFF), 1106 LAST(0)); 1107 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1108 SRC2_REL(ABSOLUTE), 1109 SRC2_ELEM(ELEM_X), 1110 SRC2_NEG(0), 1111 ALU_INST(SQ_OP3_INST_MULADD), 1112 BANK_SWIZZLE(SQ_ALU_VEC_012), 1113 DST_GPR(2), 1114 DST_REL(ABSOLUTE), 1115 DST_ELEM(ELEM_X), 1116 CLAMP(1)); 1117 /* r2.y = MAD(c2.y, r1.z, pv.y) */ 1118 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1119 SRC0_REL(ABSOLUTE), 1120 SRC0_ELEM(ELEM_Y), 1121 SRC0_NEG(0), 1122 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1123 SRC1_REL(ABSOLUTE), 1124 SRC1_ELEM(ELEM_Z), 1125 SRC1_NEG(0), 1126 INDEX_MODE(SQ_INDEX_LOOP), 1127 PRED_SEL(SQ_PRED_SEL_OFF), 1128 LAST(0)); 1129 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1130 SRC2_REL(ABSOLUTE), 1131 SRC2_ELEM(ELEM_Y), 1132 SRC2_NEG(0), 1133 ALU_INST(SQ_OP3_INST_MULADD), 1134 BANK_SWIZZLE(SQ_ALU_VEC_012), 1135 DST_GPR(2), 1136 DST_REL(ABSOLUTE), 1137 DST_ELEM(ELEM_Y), 1138 CLAMP(1)); 1139 /* r2.z = MAD(c2.z, r1.z, pv.z) */ 1140 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1141 SRC0_REL(ABSOLUTE), 1142 SRC0_ELEM(ELEM_Z), 1143 SRC0_NEG(0), 1144 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1145 SRC1_REL(ABSOLUTE), 1146 SRC1_ELEM(ELEM_Z), 1147 SRC1_NEG(0), 1148 INDEX_MODE(SQ_INDEX_LOOP), 1149 PRED_SEL(SQ_PRED_SEL_OFF), 1150 LAST(0)); 1151 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1152 SRC2_REL(ABSOLUTE), 1153 SRC2_ELEM(ELEM_Z), 1154 SRC2_NEG(0), 1155 ALU_INST(SQ_OP3_INST_MULADD), 1156 BANK_SWIZZLE(SQ_ALU_VEC_012), 1157 DST_GPR(2), 1158 DST_REL(ABSOLUTE), 1159 DST_ELEM(ELEM_Z), 1160 CLAMP(1)); 1161 /* r2.w = MAD(0, 0, 1) */ 1162 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1163 SRC0_REL(ABSOLUTE), 1164 SRC0_ELEM(ELEM_X), 1165 SRC0_NEG(0), 1166 SRC1_SEL(SQ_ALU_SRC_0), 1167 SRC1_REL(ABSOLUTE), 1168 SRC1_ELEM(ELEM_X), 1169 SRC1_NEG(0), 1170 INDEX_MODE(SQ_INDEX_LOOP), 1171 PRED_SEL(SQ_PRED_SEL_OFF), 1172 LAST(1)); 1173 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1174 SRC2_REL(ABSOLUTE), 1175 SRC2_ELEM(ELEM_X), 1176 SRC2_NEG(0), 1177 ALU_INST(SQ_OP3_INST_MULADD), 1178 BANK_SWIZZLE(SQ_ALU_VEC_012), 1179 DST_GPR(2), 1180 DST_REL(ABSOLUTE), 1181 DST_ELEM(ELEM_W), 1182 CLAMP(1)); 1183 1184 /* 21 */ 1185 shader[i++] = CF_DWORD0(ADDR(24), 1186 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1187 shader[i++] = CF_DWORD1(POP_COUNT(0), 1188 CF_CONST(0), 1189 COND(SQ_CF_COND_ACTIVE), 1190 I_COUNT(3), 1191 VALID_PIXEL_MODE(0), 1192 END_OF_PROGRAM(0), 1193 CF_INST(SQ_CF_INST_TC), 1194 WHOLE_QUAD_MODE(0), 1195 BARRIER(1)); 1196 /* 22 */ 1197 shader[i++] = CF_DWORD0(ADDR(0), 1198 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1199 shader[i++] = CF_DWORD1(POP_COUNT(0), 1200 CF_CONST(0), 1201 COND(SQ_CF_COND_ACTIVE), 1202 I_COUNT(0), 1203 VALID_PIXEL_MODE(0), 1204 END_OF_PROGRAM(0), 1205 CF_INST(SQ_CF_INST_RETURN), 1206 WHOLE_QUAD_MODE(0), 1207 BARRIER(1)); 1208 /* 23 */ 1209 shader[i++] = 0x00000000; 1210 shader[i++] = 0x00000000; 1211 /* 24/25 */ 1212 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1213 INST_MOD(0), 1214 FETCH_WHOLE_QUAD(0), 1215 RESOURCE_ID(0), 1216 SRC_GPR(0), 1217 SRC_REL(ABSOLUTE), 1218 ALT_CONST(0), 1219 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1220 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1221 shader[i++] = TEX_DWORD1(DST_GPR(1), 1222 DST_REL(ABSOLUTE), 1223 DST_SEL_X(SQ_SEL_X), 1224 DST_SEL_Y(SQ_SEL_MASK), 1225 DST_SEL_Z(SQ_SEL_MASK), 1226 DST_SEL_W(SQ_SEL_1), 1227 LOD_BIAS(0), 1228 COORD_TYPE_X(TEX_NORMALIZED), 1229 COORD_TYPE_Y(TEX_NORMALIZED), 1230 COORD_TYPE_Z(TEX_NORMALIZED), 1231 COORD_TYPE_W(TEX_NORMALIZED)); 1232 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1233 OFFSET_Y(0), 1234 OFFSET_Z(0), 1235 SAMPLER_ID(0), 1236 SRC_SEL_X(SQ_SEL_X), 1237 SRC_SEL_Y(SQ_SEL_Y), 1238 SRC_SEL_Z(SQ_SEL_0), 1239 SRC_SEL_W(SQ_SEL_1)); 1240 shader[i++] = TEX_DWORD_PAD; 1241 /* 26/27 */ 1242 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1243 INST_MOD(0), 1244 FETCH_WHOLE_QUAD(0), 1245 RESOURCE_ID(1), 1246 SRC_GPR(0), 1247 SRC_REL(ABSOLUTE), 1248 ALT_CONST(0), 1249 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1250 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1251 shader[i++] = TEX_DWORD1(DST_GPR(1), 1252 DST_REL(ABSOLUTE), 1253 DST_SEL_X(SQ_SEL_MASK), 1254 DST_SEL_Y(SQ_SEL_MASK), 1255 DST_SEL_Z(SQ_SEL_X), 1256 DST_SEL_W(SQ_SEL_MASK), 1257 LOD_BIAS(0), 1258 COORD_TYPE_X(TEX_NORMALIZED), 1259 COORD_TYPE_Y(TEX_NORMALIZED), 1260 COORD_TYPE_Z(TEX_NORMALIZED), 1261 COORD_TYPE_W(TEX_NORMALIZED)); 1262 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1263 OFFSET_Y(0), 1264 OFFSET_Z(0), 1265 SAMPLER_ID(1), 1266 SRC_SEL_X(SQ_SEL_X), 1267 SRC_SEL_Y(SQ_SEL_Y), 1268 SRC_SEL_Z(SQ_SEL_0), 1269 SRC_SEL_W(SQ_SEL_1)); 1270 shader[i++] = TEX_DWORD_PAD; 1271 /* 28/29 */ 1272 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1273 INST_MOD(0), 1274 FETCH_WHOLE_QUAD(0), 1275 RESOURCE_ID(2), 1276 SRC_GPR(0), 1277 SRC_REL(ABSOLUTE), 1278 ALT_CONST(0), 1279 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1280 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1281 shader[i++] = TEX_DWORD1(DST_GPR(1), 1282 DST_REL(ABSOLUTE), 1283 DST_SEL_X(SQ_SEL_MASK), 1284 DST_SEL_Y(SQ_SEL_X), 1285 DST_SEL_Z(SQ_SEL_MASK), 1286 DST_SEL_W(SQ_SEL_MASK), 1287 LOD_BIAS(0), 1288 COORD_TYPE_X(TEX_NORMALIZED), 1289 COORD_TYPE_Y(TEX_NORMALIZED), 1290 COORD_TYPE_Z(TEX_NORMALIZED), 1291 COORD_TYPE_W(TEX_NORMALIZED)); 1292 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1293 OFFSET_Y(0), 1294 OFFSET_Z(0), 1295 SAMPLER_ID(2), 1296 SRC_SEL_X(SQ_SEL_X), 1297 SRC_SEL_Y(SQ_SEL_Y), 1298 SRC_SEL_Z(SQ_SEL_0), 1299 SRC_SEL_W(SQ_SEL_1)); 1300 shader[i++] = TEX_DWORD_PAD; 1301 /* 30 */ 1302 shader[i++] = CF_DWORD0(ADDR(32), 1303 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1304 shader[i++] = CF_DWORD1(POP_COUNT(0), 1305 CF_CONST(0), 1306 COND(SQ_CF_COND_ACTIVE), 1307 I_COUNT(1), 1308 VALID_PIXEL_MODE(0), 1309 END_OF_PROGRAM(0), 1310 CF_INST(SQ_CF_INST_TC), 1311 WHOLE_QUAD_MODE(0), 1312 BARRIER(1)); 1313 /* 31 */ 1314 shader[i++] = CF_DWORD0(ADDR(0), 1315 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1316 shader[i++] = CF_DWORD1(POP_COUNT(0), 1317 CF_CONST(0), 1318 COND(SQ_CF_COND_ACTIVE), 1319 I_COUNT(0), 1320 VALID_PIXEL_MODE(0), 1321 END_OF_PROGRAM(0), 1322 CF_INST(SQ_CF_INST_RETURN), 1323 WHOLE_QUAD_MODE(0), 1324 BARRIER(1)); 1325 /* 32/33 */ 1326 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1327 INST_MOD(0), 1328 FETCH_WHOLE_QUAD(0), 1329 RESOURCE_ID(0), 1330 SRC_GPR(0), 1331 SRC_REL(ABSOLUTE), 1332 ALT_CONST(0), 1333 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1334 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1335 shader[i++] = TEX_DWORD1(DST_GPR(1), 1336 DST_REL(ABSOLUTE), 1337 DST_SEL_X(SQ_SEL_X), 1338 DST_SEL_Y(SQ_SEL_Y), 1339 DST_SEL_Z(SQ_SEL_Z), 1340 DST_SEL_W(SQ_SEL_1), 1341 LOD_BIAS(0), 1342 COORD_TYPE_X(TEX_NORMALIZED), 1343 COORD_TYPE_Y(TEX_NORMALIZED), 1344 COORD_TYPE_Z(TEX_NORMALIZED), 1345 COORD_TYPE_W(TEX_NORMALIZED)); 1346 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1347 OFFSET_Y(0), 1348 OFFSET_Z(0), 1349 SAMPLER_ID(0), 1350 SRC_SEL_X(SQ_SEL_X), 1351 SRC_SEL_Y(SQ_SEL_Y), 1352 SRC_SEL_Z(SQ_SEL_0), 1353 SRC_SEL_W(SQ_SEL_1)); 1354 shader[i++] = TEX_DWORD_PAD; 1355 1356 return i; 1357} 1358 1359/* comp vs --------------------------------------- */ 1360int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) 1361{ 1362 int i = 0; 1363 1364 /* 0 */ 1365 shader[i++] = CF_DWORD0(ADDR(3), 1366 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1367 shader[i++] = CF_DWORD1(POP_COUNT(0), 1368 CF_CONST(0), 1369 COND(SQ_CF_COND_BOOL), 1370 I_COUNT(0), 1371 VALID_PIXEL_MODE(0), 1372 END_OF_PROGRAM(0), 1373 CF_INST(SQ_CF_INST_CALL), 1374 WHOLE_QUAD_MODE(0), 1375 BARRIER(0)); 1376 /* 1 */ 1377 shader[i++] = CF_DWORD0(ADDR(9), 1378 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1379 shader[i++] = CF_DWORD1(POP_COUNT(0), 1380 CF_CONST(0), 1381 COND(SQ_CF_COND_NOT_BOOL), 1382 I_COUNT(0), 1383 VALID_PIXEL_MODE(0), 1384 END_OF_PROGRAM(0), 1385 CF_INST(SQ_CF_INST_CALL), 1386 WHOLE_QUAD_MODE(0), 1387 BARRIER(0)); 1388 /* 2 */ 1389 shader[i++] = CF_DWORD0(ADDR(0), 1390 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1391 shader[i++] = CF_DWORD1(POP_COUNT(0), 1392 CF_CONST(0), 1393 COND(SQ_CF_COND_ACTIVE), 1394 I_COUNT(0), 1395 VALID_PIXEL_MODE(0), 1396 END_OF_PROGRAM(1), 1397 CF_INST(SQ_CF_INST_NOP), 1398 WHOLE_QUAD_MODE(0), 1399 BARRIER(1)); 1400 /* 3 - mask sub */ 1401 shader[i++] = CF_DWORD0(ADDR(44), 1402 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1403 shader[i++] = CF_DWORD1(POP_COUNT(0), 1404 CF_CONST(0), 1405 COND(SQ_CF_COND_ACTIVE), 1406 I_COUNT(3), 1407 VALID_PIXEL_MODE(0), 1408 END_OF_PROGRAM(0), 1409 CF_INST(SQ_CF_INST_VC), 1410 WHOLE_QUAD_MODE(0), 1411 BARRIER(1)); 1412 1413 /* 4 - ALU */ 1414 shader[i++] = CF_ALU_DWORD0(ADDR(14), 1415 KCACHE_BANK0(0), 1416 KCACHE_BANK1(0), 1417 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 1418 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1419 KCACHE_ADDR0(0), 1420 KCACHE_ADDR1(0), 1421 I_COUNT(20), 1422 ALT_CONST(0), 1423 CF_INST(SQ_CF_INST_ALU), 1424 WHOLE_QUAD_MODE(0), 1425 BARRIER(1)); 1426 1427 /* 5 - dst */ 1428 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1429 TYPE(SQ_EXPORT_POS), 1430 RW_GPR(2), 1431 RW_REL(ABSOLUTE), 1432 INDEX_GPR(0), 1433 ELEM_SIZE(0)); 1434 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1435 SRC_SEL_Y(SQ_SEL_Y), 1436 SRC_SEL_Z(SQ_SEL_0), 1437 SRC_SEL_W(SQ_SEL_1), 1438 BURST_COUNT(1), 1439 VALID_PIXEL_MODE(0), 1440 END_OF_PROGRAM(0), 1441 CF_INST(SQ_CF_INST_EXPORT_DONE), 1442 MARK(0), 1443 BARRIER(1)); 1444 /* 6 - src */ 1445 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1446 TYPE(SQ_EXPORT_PARAM), 1447 RW_GPR(1), 1448 RW_REL(ABSOLUTE), 1449 INDEX_GPR(0), 1450 ELEM_SIZE(0)); 1451 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1452 SRC_SEL_Y(SQ_SEL_Y), 1453 SRC_SEL_Z(SQ_SEL_0), 1454 SRC_SEL_W(SQ_SEL_1), 1455 BURST_COUNT(1), 1456 VALID_PIXEL_MODE(0), 1457 END_OF_PROGRAM(0), 1458 CF_INST(SQ_CF_INST_EXPORT), 1459 MARK(0), 1460 BARRIER(0)); 1461 /* 7 - mask */ 1462 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), 1463 TYPE(SQ_EXPORT_PARAM), 1464 RW_GPR(0), 1465 RW_REL(ABSOLUTE), 1466 INDEX_GPR(0), 1467 ELEM_SIZE(0)); 1468 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1469 SRC_SEL_Y(SQ_SEL_Y), 1470 SRC_SEL_Z(SQ_SEL_0), 1471 SRC_SEL_W(SQ_SEL_1), 1472 BURST_COUNT(1), 1473 VALID_PIXEL_MODE(0), 1474 END_OF_PROGRAM(0), 1475 CF_INST(SQ_CF_INST_EXPORT_DONE), 1476 WHOLE_QUAD_MODE(0), 1477 BARRIER(0)); 1478 /* 8 */ 1479 shader[i++] = CF_DWORD0(ADDR(0), 1480 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1481 shader[i++] = CF_DWORD1(POP_COUNT(0), 1482 CF_CONST(0), 1483 COND(SQ_CF_COND_ACTIVE), 1484 I_COUNT(0), 1485 VALID_PIXEL_MODE(0), 1486 END_OF_PROGRAM(0), 1487 CF_INST(SQ_CF_INST_RETURN), 1488 WHOLE_QUAD_MODE(0), 1489 BARRIER(1)); 1490 /* 9 - non-mask sub */ 1491 shader[i++] = CF_DWORD0(ADDR(50), 1492 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1493 shader[i++] = CF_DWORD1(POP_COUNT(0), 1494 CF_CONST(0), 1495 COND(SQ_CF_COND_ACTIVE), 1496 I_COUNT(2), 1497 VALID_PIXEL_MODE(0), 1498 END_OF_PROGRAM(0), 1499 CF_INST(SQ_CF_INST_VC), 1500 WHOLE_QUAD_MODE(0), 1501 BARRIER(1)); 1502 1503 /* 10 - ALU */ 1504 shader[i++] = CF_ALU_DWORD0(ADDR(34), 1505 KCACHE_BANK0(0), 1506 KCACHE_BANK1(0), 1507 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 1508 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1509 KCACHE_ADDR0(0), 1510 KCACHE_ADDR1(0), 1511 I_COUNT(10), 1512 ALT_CONST(0), 1513 CF_INST(SQ_CF_INST_ALU), 1514 WHOLE_QUAD_MODE(0), 1515 BARRIER(1)); 1516 1517 /* 11 - dst */ 1518 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1519 TYPE(SQ_EXPORT_POS), 1520 RW_GPR(1), 1521 RW_REL(ABSOLUTE), 1522 INDEX_GPR(0), 1523 ELEM_SIZE(0)); 1524 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1525 SRC_SEL_Y(SQ_SEL_Y), 1526 SRC_SEL_Z(SQ_SEL_0), 1527 SRC_SEL_W(SQ_SEL_1), 1528 BURST_COUNT(0), 1529 VALID_PIXEL_MODE(0), 1530 END_OF_PROGRAM(0), 1531 CF_INST(SQ_CF_INST_EXPORT_DONE), 1532 MARK(0), 1533 BARRIER(1)); 1534 /* 12 - src */ 1535 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1536 TYPE(SQ_EXPORT_PARAM), 1537 RW_GPR(0), 1538 RW_REL(ABSOLUTE), 1539 INDEX_GPR(0), 1540 ELEM_SIZE(0)); 1541 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1542 SRC_SEL_Y(SQ_SEL_Y), 1543 SRC_SEL_Z(SQ_SEL_0), 1544 SRC_SEL_W(SQ_SEL_1), 1545 BURST_COUNT(0), 1546 VALID_PIXEL_MODE(0), 1547 END_OF_PROGRAM(0), 1548 CF_INST(SQ_CF_INST_EXPORT_DONE), 1549 MARK(0), 1550 BARRIER(0)); 1551 /* 13 */ 1552 shader[i++] = CF_DWORD0(ADDR(0), 1553 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1554 shader[i++] = CF_DWORD1(POP_COUNT(0), 1555 CF_CONST(0), 1556 COND(SQ_CF_COND_ACTIVE), 1557 I_COUNT(0), 1558 VALID_PIXEL_MODE(0), 1559 END_OF_PROGRAM(0), 1560 CF_INST(SQ_CF_INST_RETURN), 1561 WHOLE_QUAD_MODE(0), 1562 BARRIER(1)); 1563 1564 /* 14 srcX.x DOT4 - mask */ 1565 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1566 SRC0_REL(ABSOLUTE), 1567 SRC0_ELEM(ELEM_X), 1568 SRC0_NEG(0), 1569 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1570 SRC1_REL(ABSOLUTE), 1571 SRC1_ELEM(ELEM_X), 1572 SRC1_NEG(0), 1573 INDEX_MODE(SQ_INDEX_LOOP), 1574 PRED_SEL(SQ_PRED_SEL_OFF), 1575 LAST(0)); 1576 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1577 SRC1_ABS(0), 1578 UPDATE_EXECUTE_MASK(0), 1579 UPDATE_PRED(0), 1580 WRITE_MASK(1), 1581 OMOD(SQ_ALU_OMOD_OFF), 1582 ALU_INST(SQ_OP2_INST_DOT4), 1583 BANK_SWIZZLE(SQ_ALU_VEC_012), 1584 DST_GPR(3), 1585 DST_REL(ABSOLUTE), 1586 DST_ELEM(ELEM_X), 1587 CLAMP(0)); 1588 1589 /* 15 srcX.y DOT4 - mask */ 1590 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1591 SRC0_REL(ABSOLUTE), 1592 SRC0_ELEM(ELEM_Y), 1593 SRC0_NEG(0), 1594 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1595 SRC1_REL(ABSOLUTE), 1596 SRC1_ELEM(ELEM_Y), 1597 SRC1_NEG(0), 1598 INDEX_MODE(SQ_INDEX_LOOP), 1599 PRED_SEL(SQ_PRED_SEL_OFF), 1600 LAST(0)); 1601 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1602 SRC1_ABS(0), 1603 UPDATE_EXECUTE_MASK(0), 1604 UPDATE_PRED(0), 1605 WRITE_MASK(0), 1606 OMOD(SQ_ALU_OMOD_OFF), 1607 ALU_INST(SQ_OP2_INST_DOT4), 1608 BANK_SWIZZLE(SQ_ALU_VEC_012), 1609 DST_GPR(3), 1610 DST_REL(ABSOLUTE), 1611 DST_ELEM(ELEM_Y), 1612 CLAMP(0)); 1613 1614 /* 16 srcX.z DOT4 - mask */ 1615 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1616 SRC0_REL(ABSOLUTE), 1617 SRC0_ELEM(ELEM_Z), 1618 SRC0_NEG(0), 1619 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1620 SRC1_REL(ABSOLUTE), 1621 SRC1_ELEM(ELEM_Z), 1622 SRC1_NEG(0), 1623 INDEX_MODE(SQ_INDEX_LOOP), 1624 PRED_SEL(SQ_PRED_SEL_OFF), 1625 LAST(0)); 1626 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1627 SRC1_ABS(0), 1628 UPDATE_EXECUTE_MASK(0), 1629 UPDATE_PRED(0), 1630 WRITE_MASK(0), 1631 OMOD(SQ_ALU_OMOD_OFF), 1632 ALU_INST(SQ_OP2_INST_DOT4), 1633 BANK_SWIZZLE(SQ_ALU_VEC_012), 1634 DST_GPR(3), 1635 DST_REL(ABSOLUTE), 1636 DST_ELEM(ELEM_Z), 1637 CLAMP(0)); 1638 1639 /* 17 srcX.w DOT4 - mask */ 1640 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1641 SRC0_REL(ABSOLUTE), 1642 SRC0_ELEM(ELEM_W), 1643 SRC0_NEG(0), 1644 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1645 SRC1_REL(ABSOLUTE), 1646 SRC1_ELEM(ELEM_W), 1647 SRC1_NEG(0), 1648 INDEX_MODE(SQ_INDEX_LOOP), 1649 PRED_SEL(SQ_PRED_SEL_OFF), 1650 LAST(1)); 1651 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1652 SRC1_ABS(0), 1653 UPDATE_EXECUTE_MASK(0), 1654 UPDATE_PRED(0), 1655 WRITE_MASK(0), 1656 OMOD(SQ_ALU_OMOD_OFF), 1657 ALU_INST(SQ_OP2_INST_DOT4), 1658 BANK_SWIZZLE(SQ_ALU_VEC_012), 1659 DST_GPR(3), 1660 DST_REL(ABSOLUTE), 1661 DST_ELEM(ELEM_W), 1662 CLAMP(0)); 1663 1664 /* 18 srcY.x DOT4 - mask */ 1665 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1666 SRC0_REL(ABSOLUTE), 1667 SRC0_ELEM(ELEM_X), 1668 SRC0_NEG(0), 1669 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1670 SRC1_REL(ABSOLUTE), 1671 SRC1_ELEM(ELEM_X), 1672 SRC1_NEG(0), 1673 INDEX_MODE(SQ_INDEX_LOOP), 1674 PRED_SEL(SQ_PRED_SEL_OFF), 1675 LAST(0)); 1676 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1677 SRC1_ABS(0), 1678 UPDATE_EXECUTE_MASK(0), 1679 UPDATE_PRED(0), 1680 WRITE_MASK(0), 1681 OMOD(SQ_ALU_OMOD_OFF), 1682 ALU_INST(SQ_OP2_INST_DOT4), 1683 BANK_SWIZZLE(SQ_ALU_VEC_012), 1684 DST_GPR(3), 1685 DST_REL(ABSOLUTE), 1686 DST_ELEM(ELEM_X), 1687 CLAMP(0)); 1688 1689 /* 19 srcY.y DOT4 - mask */ 1690 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1691 SRC0_REL(ABSOLUTE), 1692 SRC0_ELEM(ELEM_Y), 1693 SRC0_NEG(0), 1694 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1695 SRC1_REL(ABSOLUTE), 1696 SRC1_ELEM(ELEM_Y), 1697 SRC1_NEG(0), 1698 INDEX_MODE(SQ_INDEX_LOOP), 1699 PRED_SEL(SQ_PRED_SEL_OFF), 1700 LAST(0)); 1701 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1702 SRC1_ABS(0), 1703 UPDATE_EXECUTE_MASK(0), 1704 UPDATE_PRED(0), 1705 WRITE_MASK(1), 1706 OMOD(SQ_ALU_OMOD_OFF), 1707 ALU_INST(SQ_OP2_INST_DOT4), 1708 BANK_SWIZZLE(SQ_ALU_VEC_012), 1709 DST_GPR(3), 1710 DST_REL(ABSOLUTE), 1711 DST_ELEM(ELEM_Y), 1712 CLAMP(0)); 1713 1714 /* 20 srcY.z DOT4 - mask */ 1715 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1716 SRC0_REL(ABSOLUTE), 1717 SRC0_ELEM(ELEM_Z), 1718 SRC0_NEG(0), 1719 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1720 SRC1_REL(ABSOLUTE), 1721 SRC1_ELEM(ELEM_Z), 1722 SRC1_NEG(0), 1723 INDEX_MODE(SQ_INDEX_LOOP), 1724 PRED_SEL(SQ_PRED_SEL_OFF), 1725 LAST(0)); 1726 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1727 SRC1_ABS(0), 1728 UPDATE_EXECUTE_MASK(0), 1729 UPDATE_PRED(0), 1730 WRITE_MASK(0), 1731 OMOD(SQ_ALU_OMOD_OFF), 1732 ALU_INST(SQ_OP2_INST_DOT4), 1733 BANK_SWIZZLE(SQ_ALU_VEC_012), 1734 DST_GPR(3), 1735 DST_REL(ABSOLUTE), 1736 DST_ELEM(ELEM_Z), 1737 CLAMP(0)); 1738 1739 /* 21 srcY.w DOT4 - mask */ 1740 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1741 SRC0_REL(ABSOLUTE), 1742 SRC0_ELEM(ELEM_W), 1743 SRC0_NEG(0), 1744 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1745 SRC1_REL(ABSOLUTE), 1746 SRC1_ELEM(ELEM_W), 1747 SRC1_NEG(0), 1748 INDEX_MODE(SQ_INDEX_LOOP), 1749 PRED_SEL(SQ_PRED_SEL_OFF), 1750 LAST(1)); 1751 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1752 SRC1_ABS(0), 1753 UPDATE_EXECUTE_MASK(0), 1754 UPDATE_PRED(0), 1755 WRITE_MASK(0), 1756 OMOD(SQ_ALU_OMOD_OFF), 1757 ALU_INST(SQ_OP2_INST_DOT4), 1758 BANK_SWIZZLE(SQ_ALU_VEC_012), 1759 DST_GPR(3), 1760 DST_REL(ABSOLUTE), 1761 DST_ELEM(ELEM_W), 1762 CLAMP(0)); 1763 1764 /* 22 maskX.x DOT4 - mask */ 1765 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1766 SRC0_REL(ABSOLUTE), 1767 SRC0_ELEM(ELEM_X), 1768 SRC0_NEG(0), 1769 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1770 SRC1_REL(ABSOLUTE), 1771 SRC1_ELEM(ELEM_X), 1772 SRC1_NEG(0), 1773 INDEX_MODE(SQ_INDEX_LOOP), 1774 PRED_SEL(SQ_PRED_SEL_OFF), 1775 LAST(0)); 1776 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1777 SRC1_ABS(0), 1778 UPDATE_EXECUTE_MASK(0), 1779 UPDATE_PRED(0), 1780 WRITE_MASK(1), 1781 OMOD(SQ_ALU_OMOD_OFF), 1782 ALU_INST(SQ_OP2_INST_DOT4), 1783 BANK_SWIZZLE(SQ_ALU_VEC_012), 1784 DST_GPR(4), 1785 DST_REL(ABSOLUTE), 1786 DST_ELEM(ELEM_X), 1787 CLAMP(0)); 1788 1789 /* 23 maskX.y DOT4 - mask */ 1790 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1791 SRC0_REL(ABSOLUTE), 1792 SRC0_ELEM(ELEM_Y), 1793 SRC0_NEG(0), 1794 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1795 SRC1_REL(ABSOLUTE), 1796 SRC1_ELEM(ELEM_Y), 1797 SRC1_NEG(0), 1798 INDEX_MODE(SQ_INDEX_LOOP), 1799 PRED_SEL(SQ_PRED_SEL_OFF), 1800 LAST(0)); 1801 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1802 SRC1_ABS(0), 1803 UPDATE_EXECUTE_MASK(0), 1804 UPDATE_PRED(0), 1805 WRITE_MASK(0), 1806 OMOD(SQ_ALU_OMOD_OFF), 1807 ALU_INST(SQ_OP2_INST_DOT4), 1808 BANK_SWIZZLE(SQ_ALU_VEC_012), 1809 DST_GPR(4), 1810 DST_REL(ABSOLUTE), 1811 DST_ELEM(ELEM_Y), 1812 CLAMP(0)); 1813 1814 /* 24 maskX.z DOT4 - mask */ 1815 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1816 SRC0_REL(ABSOLUTE), 1817 SRC0_ELEM(ELEM_Z), 1818 SRC0_NEG(0), 1819 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1820 SRC1_REL(ABSOLUTE), 1821 SRC1_ELEM(ELEM_Z), 1822 SRC1_NEG(0), 1823 INDEX_MODE(SQ_INDEX_LOOP), 1824 PRED_SEL(SQ_PRED_SEL_OFF), 1825 LAST(0)); 1826 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1827 SRC1_ABS(0), 1828 UPDATE_EXECUTE_MASK(0), 1829 UPDATE_PRED(0), 1830 WRITE_MASK(0), 1831 OMOD(SQ_ALU_OMOD_OFF), 1832 ALU_INST(SQ_OP2_INST_DOT4), 1833 BANK_SWIZZLE(SQ_ALU_VEC_012), 1834 DST_GPR(4), 1835 DST_REL(ABSOLUTE), 1836 DST_ELEM(ELEM_Z), 1837 CLAMP(0)); 1838 1839 /* 25 maskX.w DOT4 - mask */ 1840 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1841 SRC0_REL(ABSOLUTE), 1842 SRC0_ELEM(ELEM_W), 1843 SRC0_NEG(0), 1844 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1845 SRC1_REL(ABSOLUTE), 1846 SRC1_ELEM(ELEM_W), 1847 SRC1_NEG(0), 1848 INDEX_MODE(SQ_INDEX_LOOP), 1849 PRED_SEL(SQ_PRED_SEL_OFF), 1850 LAST(1)); 1851 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1852 SRC1_ABS(0), 1853 UPDATE_EXECUTE_MASK(0), 1854 UPDATE_PRED(0), 1855 WRITE_MASK(0), 1856 OMOD(SQ_ALU_OMOD_OFF), 1857 ALU_INST(SQ_OP2_INST_DOT4), 1858 BANK_SWIZZLE(SQ_ALU_VEC_012), 1859 DST_GPR(4), 1860 DST_REL(ABSOLUTE), 1861 DST_ELEM(ELEM_W), 1862 CLAMP(0)); 1863 1864 /* 26 maskY.x DOT4 - mask */ 1865 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1866 SRC0_REL(ABSOLUTE), 1867 SRC0_ELEM(ELEM_X), 1868 SRC0_NEG(0), 1869 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1870 SRC1_REL(ABSOLUTE), 1871 SRC1_ELEM(ELEM_X), 1872 SRC1_NEG(0), 1873 INDEX_MODE(SQ_INDEX_LOOP), 1874 PRED_SEL(SQ_PRED_SEL_OFF), 1875 LAST(0)); 1876 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1877 SRC1_ABS(0), 1878 UPDATE_EXECUTE_MASK(0), 1879 UPDATE_PRED(0), 1880 WRITE_MASK(0), 1881 OMOD(SQ_ALU_OMOD_OFF), 1882 ALU_INST(SQ_OP2_INST_DOT4), 1883 BANK_SWIZZLE(SQ_ALU_VEC_012), 1884 DST_GPR(4), 1885 DST_REL(ABSOLUTE), 1886 DST_ELEM(ELEM_X), 1887 CLAMP(0)); 1888 1889 /* 27 maskY.y DOT4 - mask */ 1890 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1891 SRC0_REL(ABSOLUTE), 1892 SRC0_ELEM(ELEM_Y), 1893 SRC0_NEG(0), 1894 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1895 SRC1_REL(ABSOLUTE), 1896 SRC1_ELEM(ELEM_Y), 1897 SRC1_NEG(0), 1898 INDEX_MODE(SQ_INDEX_LOOP), 1899 PRED_SEL(SQ_PRED_SEL_OFF), 1900 LAST(0)); 1901 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1902 SRC1_ABS(0), 1903 UPDATE_EXECUTE_MASK(0), 1904 UPDATE_PRED(0), 1905 WRITE_MASK(1), 1906 OMOD(SQ_ALU_OMOD_OFF), 1907 ALU_INST(SQ_OP2_INST_DOT4), 1908 BANK_SWIZZLE(SQ_ALU_VEC_012), 1909 DST_GPR(4), 1910 DST_REL(ABSOLUTE), 1911 DST_ELEM(ELEM_Y), 1912 CLAMP(0)); 1913 1914 /* 28 maskY.z DOT4 - mask */ 1915 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1916 SRC0_REL(ABSOLUTE), 1917 SRC0_ELEM(ELEM_Z), 1918 SRC0_NEG(0), 1919 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1920 SRC1_REL(ABSOLUTE), 1921 SRC1_ELEM(ELEM_Z), 1922 SRC1_NEG(0), 1923 INDEX_MODE(SQ_INDEX_LOOP), 1924 PRED_SEL(SQ_PRED_SEL_OFF), 1925 LAST(0)); 1926 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1927 SRC1_ABS(0), 1928 UPDATE_EXECUTE_MASK(0), 1929 UPDATE_PRED(0), 1930 WRITE_MASK(0), 1931 OMOD(SQ_ALU_OMOD_OFF), 1932 ALU_INST(SQ_OP2_INST_DOT4), 1933 BANK_SWIZZLE(SQ_ALU_VEC_012), 1934 DST_GPR(4), 1935 DST_REL(ABSOLUTE), 1936 DST_ELEM(ELEM_Z), 1937 CLAMP(0)); 1938 1939 /* 29 maskY.w DOT4 - mask */ 1940 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1941 SRC0_REL(ABSOLUTE), 1942 SRC0_ELEM(ELEM_W), 1943 SRC0_NEG(0), 1944 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1945 SRC1_REL(ABSOLUTE), 1946 SRC1_ELEM(ELEM_W), 1947 SRC1_NEG(0), 1948 INDEX_MODE(SQ_INDEX_LOOP), 1949 PRED_SEL(SQ_PRED_SEL_OFF), 1950 LAST(1)); 1951 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1952 SRC1_ABS(0), 1953 UPDATE_EXECUTE_MASK(0), 1954 UPDATE_PRED(0), 1955 WRITE_MASK(0), 1956 OMOD(SQ_ALU_OMOD_OFF), 1957 ALU_INST(SQ_OP2_INST_DOT4), 1958 BANK_SWIZZLE(SQ_ALU_VEC_012), 1959 DST_GPR(4), 1960 DST_REL(ABSOLUTE), 1961 DST_ELEM(ELEM_W), 1962 CLAMP(0)); 1963 1964 /* 30 srcX / w */ 1965 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1966 SRC0_REL(ABSOLUTE), 1967 SRC0_ELEM(ELEM_X), 1968 SRC0_NEG(0), 1969 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1970 SRC1_REL(ABSOLUTE), 1971 SRC1_ELEM(ELEM_W), 1972 SRC1_NEG(0), 1973 INDEX_MODE(SQ_INDEX_AR_X), 1974 PRED_SEL(SQ_PRED_SEL_OFF), 1975 LAST(1)); 1976 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1977 SRC1_ABS(0), 1978 UPDATE_EXECUTE_MASK(0), 1979 UPDATE_PRED(0), 1980 WRITE_MASK(1), 1981 OMOD(SQ_ALU_OMOD_OFF), 1982 ALU_INST(SQ_OP2_INST_MUL), 1983 BANK_SWIZZLE(SQ_ALU_VEC_012), 1984 DST_GPR(1), 1985 DST_REL(ABSOLUTE), 1986 DST_ELEM(ELEM_X), 1987 CLAMP(0)); 1988 1989 /* 31 srcY / h */ 1990 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1991 SRC0_REL(ABSOLUTE), 1992 SRC0_ELEM(ELEM_Y), 1993 SRC0_NEG(0), 1994 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1995 SRC1_REL(ABSOLUTE), 1996 SRC1_ELEM(ELEM_W), 1997 SRC1_NEG(0), 1998 INDEX_MODE(SQ_INDEX_AR_X), 1999 PRED_SEL(SQ_PRED_SEL_OFF), 2000 LAST(1)); 2001 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2002 SRC1_ABS(0), 2003 UPDATE_EXECUTE_MASK(0), 2004 UPDATE_PRED(0), 2005 WRITE_MASK(1), 2006 OMOD(SQ_ALU_OMOD_OFF), 2007 ALU_INST(SQ_OP2_INST_MUL), 2008 BANK_SWIZZLE(SQ_ALU_VEC_012), 2009 DST_GPR(1), 2010 DST_REL(ABSOLUTE), 2011 DST_ELEM(ELEM_Y), 2012 CLAMP(0)); 2013 2014 /* 32 maskX / w */ 2015 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 2016 SRC0_REL(ABSOLUTE), 2017 SRC0_ELEM(ELEM_X), 2018 SRC0_NEG(0), 2019 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 2020 SRC1_REL(ABSOLUTE), 2021 SRC1_ELEM(ELEM_W), 2022 SRC1_NEG(0), 2023 INDEX_MODE(SQ_INDEX_AR_X), 2024 PRED_SEL(SQ_PRED_SEL_OFF), 2025 LAST(1)); 2026 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2027 SRC1_ABS(0), 2028 UPDATE_EXECUTE_MASK(0), 2029 UPDATE_PRED(0), 2030 WRITE_MASK(1), 2031 OMOD(SQ_ALU_OMOD_OFF), 2032 ALU_INST(SQ_OP2_INST_MUL), 2033 BANK_SWIZZLE(SQ_ALU_VEC_012), 2034 DST_GPR(0), 2035 DST_REL(ABSOLUTE), 2036 DST_ELEM(ELEM_X), 2037 CLAMP(0)); 2038 2039 /* 33 maskY / h */ 2040 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 2041 SRC0_REL(ABSOLUTE), 2042 SRC0_ELEM(ELEM_Y), 2043 SRC0_NEG(0), 2044 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 2045 SRC1_REL(ABSOLUTE), 2046 SRC1_ELEM(ELEM_W), 2047 SRC1_NEG(0), 2048 INDEX_MODE(SQ_INDEX_AR_X), 2049 PRED_SEL(SQ_PRED_SEL_OFF), 2050 LAST(1)); 2051 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2052 SRC1_ABS(0), 2053 UPDATE_EXECUTE_MASK(0), 2054 UPDATE_PRED(0), 2055 WRITE_MASK(1), 2056 OMOD(SQ_ALU_OMOD_OFF), 2057 ALU_INST(SQ_OP2_INST_MUL), 2058 BANK_SWIZZLE(SQ_ALU_VEC_012), 2059 DST_GPR(0), 2060 DST_REL(ABSOLUTE), 2061 DST_ELEM(ELEM_Y), 2062 CLAMP(0)); 2063 2064 /* 34 srcX.x DOT4 - non-mask */ 2065 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2066 SRC0_REL(ABSOLUTE), 2067 SRC0_ELEM(ELEM_X), 2068 SRC0_NEG(0), 2069 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2070 SRC1_REL(ABSOLUTE), 2071 SRC1_ELEM(ELEM_X), 2072 SRC1_NEG(0), 2073 INDEX_MODE(SQ_INDEX_LOOP), 2074 PRED_SEL(SQ_PRED_SEL_OFF), 2075 LAST(0)); 2076 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2077 SRC1_ABS(0), 2078 UPDATE_EXECUTE_MASK(0), 2079 UPDATE_PRED(0), 2080 WRITE_MASK(1), 2081 OMOD(SQ_ALU_OMOD_OFF), 2082 ALU_INST(SQ_OP2_INST_DOT4), 2083 BANK_SWIZZLE(SQ_ALU_VEC_012), 2084 DST_GPR(2), 2085 DST_REL(ABSOLUTE), 2086 DST_ELEM(ELEM_X), 2087 CLAMP(0)); 2088 2089 /* 35 srcX.y DOT4 - non-mask */ 2090 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2091 SRC0_REL(ABSOLUTE), 2092 SRC0_ELEM(ELEM_Y), 2093 SRC0_NEG(0), 2094 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2095 SRC1_REL(ABSOLUTE), 2096 SRC1_ELEM(ELEM_Y), 2097 SRC1_NEG(0), 2098 INDEX_MODE(SQ_INDEX_LOOP), 2099 PRED_SEL(SQ_PRED_SEL_OFF), 2100 LAST(0)); 2101 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2102 SRC1_ABS(0), 2103 UPDATE_EXECUTE_MASK(0), 2104 UPDATE_PRED(0), 2105 WRITE_MASK(0), 2106 OMOD(SQ_ALU_OMOD_OFF), 2107 ALU_INST(SQ_OP2_INST_DOT4), 2108 BANK_SWIZZLE(SQ_ALU_VEC_012), 2109 DST_GPR(2), 2110 DST_REL(ABSOLUTE), 2111 DST_ELEM(ELEM_Y), 2112 CLAMP(0)); 2113 2114 /* 36 srcX.z DOT4 - non-mask */ 2115 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2116 SRC0_REL(ABSOLUTE), 2117 SRC0_ELEM(ELEM_Z), 2118 SRC0_NEG(0), 2119 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2120 SRC1_REL(ABSOLUTE), 2121 SRC1_ELEM(ELEM_Z), 2122 SRC1_NEG(0), 2123 INDEX_MODE(SQ_INDEX_LOOP), 2124 PRED_SEL(SQ_PRED_SEL_OFF), 2125 LAST(0)); 2126 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2127 SRC1_ABS(0), 2128 UPDATE_EXECUTE_MASK(0), 2129 UPDATE_PRED(0), 2130 WRITE_MASK(0), 2131 OMOD(SQ_ALU_OMOD_OFF), 2132 ALU_INST(SQ_OP2_INST_DOT4), 2133 BANK_SWIZZLE(SQ_ALU_VEC_012), 2134 DST_GPR(2), 2135 DST_REL(ABSOLUTE), 2136 DST_ELEM(ELEM_Z), 2137 CLAMP(0)); 2138 2139 /* 37 srcX.w DOT4 - non-mask */ 2140 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2141 SRC0_REL(ABSOLUTE), 2142 SRC0_ELEM(ELEM_W), 2143 SRC0_NEG(0), 2144 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2145 SRC1_REL(ABSOLUTE), 2146 SRC1_ELEM(ELEM_W), 2147 SRC1_NEG(0), 2148 INDEX_MODE(SQ_INDEX_LOOP), 2149 PRED_SEL(SQ_PRED_SEL_OFF), 2150 LAST(1)); 2151 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2152 SRC1_ABS(0), 2153 UPDATE_EXECUTE_MASK(0), 2154 UPDATE_PRED(0), 2155 WRITE_MASK(0), 2156 OMOD(SQ_ALU_OMOD_OFF), 2157 ALU_INST(SQ_OP2_INST_DOT4), 2158 BANK_SWIZZLE(SQ_ALU_VEC_012), 2159 DST_GPR(2), 2160 DST_REL(ABSOLUTE), 2161 DST_ELEM(ELEM_W), 2162 CLAMP(0)); 2163 2164 /* 38 srcY.x DOT4 - non-mask */ 2165 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2166 SRC0_REL(ABSOLUTE), 2167 SRC0_ELEM(ELEM_X), 2168 SRC0_NEG(0), 2169 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2170 SRC1_REL(ABSOLUTE), 2171 SRC1_ELEM(ELEM_X), 2172 SRC1_NEG(0), 2173 INDEX_MODE(SQ_INDEX_LOOP), 2174 PRED_SEL(SQ_PRED_SEL_OFF), 2175 LAST(0)); 2176 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2177 SRC1_ABS(0), 2178 UPDATE_EXECUTE_MASK(0), 2179 UPDATE_PRED(0), 2180 WRITE_MASK(0), 2181 OMOD(SQ_ALU_OMOD_OFF), 2182 ALU_INST(SQ_OP2_INST_DOT4), 2183 BANK_SWIZZLE(SQ_ALU_VEC_012), 2184 DST_GPR(2), 2185 DST_REL(ABSOLUTE), 2186 DST_ELEM(ELEM_X), 2187 CLAMP(0)); 2188 2189 /* 39 srcY.y DOT4 - non-mask */ 2190 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2191 SRC0_REL(ABSOLUTE), 2192 SRC0_ELEM(ELEM_Y), 2193 SRC0_NEG(0), 2194 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2195 SRC1_REL(ABSOLUTE), 2196 SRC1_ELEM(ELEM_Y), 2197 SRC1_NEG(0), 2198 INDEX_MODE(SQ_INDEX_LOOP), 2199 PRED_SEL(SQ_PRED_SEL_OFF), 2200 LAST(0)); 2201 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2202 SRC1_ABS(0), 2203 UPDATE_EXECUTE_MASK(0), 2204 UPDATE_PRED(0), 2205 WRITE_MASK(1), 2206 OMOD(SQ_ALU_OMOD_OFF), 2207 ALU_INST(SQ_OP2_INST_DOT4), 2208 BANK_SWIZZLE(SQ_ALU_VEC_012), 2209 DST_GPR(2), 2210 DST_REL(ABSOLUTE), 2211 DST_ELEM(ELEM_Y), 2212 CLAMP(0)); 2213 2214 /* 40 srcY.z DOT4 - non-mask */ 2215 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2216 SRC0_REL(ABSOLUTE), 2217 SRC0_ELEM(ELEM_Z), 2218 SRC0_NEG(0), 2219 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2220 SRC1_REL(ABSOLUTE), 2221 SRC1_ELEM(ELEM_Z), 2222 SRC1_NEG(0), 2223 INDEX_MODE(SQ_INDEX_LOOP), 2224 PRED_SEL(SQ_PRED_SEL_OFF), 2225 LAST(0)); 2226 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2227 SRC1_ABS(0), 2228 UPDATE_EXECUTE_MASK(0), 2229 UPDATE_PRED(0), 2230 WRITE_MASK(0), 2231 OMOD(SQ_ALU_OMOD_OFF), 2232 ALU_INST(SQ_OP2_INST_DOT4), 2233 BANK_SWIZZLE(SQ_ALU_VEC_012), 2234 DST_GPR(2), 2235 DST_REL(ABSOLUTE), 2236 DST_ELEM(ELEM_Z), 2237 CLAMP(0)); 2238 2239 /* 41 srcY.w DOT4 - non-mask */ 2240 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2241 SRC0_REL(ABSOLUTE), 2242 SRC0_ELEM(ELEM_W), 2243 SRC0_NEG(0), 2244 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2245 SRC1_REL(ABSOLUTE), 2246 SRC1_ELEM(ELEM_W), 2247 SRC1_NEG(0), 2248 INDEX_MODE(SQ_INDEX_LOOP), 2249 PRED_SEL(SQ_PRED_SEL_OFF), 2250 LAST(1)); 2251 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2252 SRC1_ABS(0), 2253 UPDATE_EXECUTE_MASK(0), 2254 UPDATE_PRED(0), 2255 WRITE_MASK(0), 2256 OMOD(SQ_ALU_OMOD_OFF), 2257 ALU_INST(SQ_OP2_INST_DOT4), 2258 BANK_SWIZZLE(SQ_ALU_VEC_012), 2259 DST_GPR(2), 2260 DST_REL(ABSOLUTE), 2261 DST_ELEM(ELEM_W), 2262 CLAMP(0)); 2263 2264 /* 42 srcX / w */ 2265 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2266 SRC0_REL(ABSOLUTE), 2267 SRC0_ELEM(ELEM_X), 2268 SRC0_NEG(0), 2269 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2270 SRC1_REL(ABSOLUTE), 2271 SRC1_ELEM(ELEM_W), 2272 SRC1_NEG(0), 2273 INDEX_MODE(SQ_INDEX_AR_X), 2274 PRED_SEL(SQ_PRED_SEL_OFF), 2275 LAST(1)); 2276 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2277 SRC1_ABS(0), 2278 UPDATE_EXECUTE_MASK(0), 2279 UPDATE_PRED(0), 2280 WRITE_MASK(1), 2281 OMOD(SQ_ALU_OMOD_OFF), 2282 ALU_INST(SQ_OP2_INST_MUL), 2283 BANK_SWIZZLE(SQ_ALU_VEC_012), 2284 DST_GPR(0), 2285 DST_REL(ABSOLUTE), 2286 DST_ELEM(ELEM_X), 2287 CLAMP(0)); 2288 2289 /* 43 srcY / h */ 2290 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2291 SRC0_REL(ABSOLUTE), 2292 SRC0_ELEM(ELEM_Y), 2293 SRC0_NEG(0), 2294 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2295 SRC1_REL(ABSOLUTE), 2296 SRC1_ELEM(ELEM_W), 2297 SRC1_NEG(0), 2298 INDEX_MODE(SQ_INDEX_AR_X), 2299 PRED_SEL(SQ_PRED_SEL_OFF), 2300 LAST(1)); 2301 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2302 SRC1_ABS(0), 2303 UPDATE_EXECUTE_MASK(0), 2304 UPDATE_PRED(0), 2305 WRITE_MASK(1), 2306 OMOD(SQ_ALU_OMOD_OFF), 2307 ALU_INST(SQ_OP2_INST_MUL), 2308 BANK_SWIZZLE(SQ_ALU_VEC_012), 2309 DST_GPR(0), 2310 DST_REL(ABSOLUTE), 2311 DST_ELEM(ELEM_Y), 2312 CLAMP(0)); 2313 2314 /* mask vfetch - 44/45 - dst */ 2315 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2316 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2317 FETCH_WHOLE_QUAD(0), 2318 BUFFER_ID(0), 2319 SRC_GPR(0), 2320 SRC_REL(ABSOLUTE), 2321 SRC_SEL_X(SQ_SEL_X), 2322 MEGA_FETCH_COUNT(24)); 2323 shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), 2324 DST_REL(0), 2325 DST_SEL_X(SQ_SEL_X), 2326 DST_SEL_Y(SQ_SEL_Y), 2327 DST_SEL_Z(SQ_SEL_0), 2328 DST_SEL_W(SQ_SEL_1), 2329 USE_CONST_FIELDS(0), 2330 DATA_FORMAT(FMT_32_32_FLOAT), 2331 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2332 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2333 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2334 shader[i++] = VTX_DWORD2(OFFSET(0), 2335#if X_BYTE_ORDER == X_BIG_ENDIAN 2336 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2337#else 2338 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2339#endif 2340 CONST_BUF_NO_STRIDE(0), 2341 MEGA_FETCH(1), 2342 ALT_CONST(0), 2343 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2344 shader[i++] = VTX_DWORD_PAD; 2345 /* 46/47 - src */ 2346 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2347 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2348 FETCH_WHOLE_QUAD(0), 2349 BUFFER_ID(0), 2350 SRC_GPR(0), 2351 SRC_REL(ABSOLUTE), 2352 SRC_SEL_X(SQ_SEL_X), 2353 MEGA_FETCH_COUNT(8)); 2354 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2355 DST_REL(0), 2356 DST_SEL_X(SQ_SEL_X), 2357 DST_SEL_Y(SQ_SEL_Y), 2358 DST_SEL_Z(SQ_SEL_1), 2359 DST_SEL_W(SQ_SEL_0), 2360 USE_CONST_FIELDS(0), 2361 DATA_FORMAT(FMT_32_32_FLOAT), 2362 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2363 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2364 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2365 shader[i++] = VTX_DWORD2(OFFSET(8), 2366#if X_BYTE_ORDER == X_BIG_ENDIAN 2367 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2368#else 2369 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2370#endif 2371 CONST_BUF_NO_STRIDE(0), 2372 MEGA_FETCH(0), 2373 ALT_CONST(0), 2374 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2375 shader[i++] = VTX_DWORD_PAD; 2376 /* 48/49 - mask */ 2377 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2378 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2379 FETCH_WHOLE_QUAD(0), 2380 BUFFER_ID(0), 2381 SRC_GPR(0), 2382 SRC_REL(ABSOLUTE), 2383 SRC_SEL_X(SQ_SEL_X), 2384 MEGA_FETCH_COUNT(8)); 2385 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2386 DST_REL(0), 2387 DST_SEL_X(SQ_SEL_X), 2388 DST_SEL_Y(SQ_SEL_Y), 2389 DST_SEL_Z(SQ_SEL_1), 2390 DST_SEL_W(SQ_SEL_0), 2391 USE_CONST_FIELDS(0), 2392 DATA_FORMAT(FMT_32_32_FLOAT), 2393 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2394 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2395 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2396 shader[i++] = VTX_DWORD2(OFFSET(16), 2397#if X_BYTE_ORDER == X_BIG_ENDIAN 2398 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2399#else 2400 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2401#endif 2402 CONST_BUF_NO_STRIDE(0), 2403 MEGA_FETCH(0), 2404 ALT_CONST(0), 2405 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2406 shader[i++] = VTX_DWORD_PAD; 2407 2408 /* no mask vfetch - 50/51 - dst */ 2409 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2410 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2411 FETCH_WHOLE_QUAD(0), 2412 BUFFER_ID(0), 2413 SRC_GPR(0), 2414 SRC_REL(ABSOLUTE), 2415 SRC_SEL_X(SQ_SEL_X), 2416 MEGA_FETCH_COUNT(16)); 2417 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2418 DST_REL(0), 2419 DST_SEL_X(SQ_SEL_X), 2420 DST_SEL_Y(SQ_SEL_Y), 2421 DST_SEL_Z(SQ_SEL_0), 2422 DST_SEL_W(SQ_SEL_1), 2423 USE_CONST_FIELDS(0), 2424 DATA_FORMAT(FMT_32_32_FLOAT), 2425 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2426 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2427 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2428 shader[i++] = VTX_DWORD2(OFFSET(0), 2429#if X_BYTE_ORDER == X_BIG_ENDIAN 2430 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2431#else 2432 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2433#endif 2434 CONST_BUF_NO_STRIDE(0), 2435 MEGA_FETCH(1), 2436 ALT_CONST(0), 2437 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2438 shader[i++] = VTX_DWORD_PAD; 2439 /* 52/53 - src */ 2440 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2441 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2442 FETCH_WHOLE_QUAD(0), 2443 BUFFER_ID(0), 2444 SRC_GPR(0), 2445 SRC_REL(ABSOLUTE), 2446 SRC_SEL_X(SQ_SEL_X), 2447 MEGA_FETCH_COUNT(8)); 2448 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2449 DST_REL(0), 2450 DST_SEL_X(SQ_SEL_X), 2451 DST_SEL_Y(SQ_SEL_Y), 2452 DST_SEL_Z(SQ_SEL_1), 2453 DST_SEL_W(SQ_SEL_0), 2454 USE_CONST_FIELDS(0), 2455 DATA_FORMAT(FMT_32_32_FLOAT), 2456 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2457 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2458 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2459 shader[i++] = VTX_DWORD2(OFFSET(8), 2460#if X_BYTE_ORDER == X_BIG_ENDIAN 2461 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2462#else 2463 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2464#endif 2465 CONST_BUF_NO_STRIDE(0), 2466 MEGA_FETCH(0), 2467 ALT_CONST(0), 2468 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2469 shader[i++] = VTX_DWORD_PAD; 2470 2471 return i; 2472} 2473 2474/* comp ps --------------------------------------- */ 2475int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t *shader) 2476{ 2477 int i = 0; 2478 2479 /* 0 */ 2480 /* call interp-fetch-mask if boolean1 == true */ 2481 shader[i++] = CF_DWORD0(ADDR(11), 2482 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2483 shader[i++] = CF_DWORD1(POP_COUNT(0), 2484 CF_CONST(1), 2485 COND(SQ_CF_COND_BOOL), 2486 I_COUNT(0), 2487 VALID_PIXEL_MODE(0), 2488 END_OF_PROGRAM(0), 2489 CF_INST(SQ_CF_INST_CALL), 2490 WHOLE_QUAD_MODE(0), 2491 BARRIER(0)); 2492 2493 /* 1 */ 2494 /* call read-constant-mask if boolean1 == false */ 2495 shader[i++] = CF_DWORD0(ADDR(14), 2496 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2497 shader[i++] = CF_DWORD1(POP_COUNT(0), 2498 CF_CONST(1), 2499 COND(SQ_CF_COND_NOT_BOOL), 2500 I_COUNT(0), 2501 VALID_PIXEL_MODE(0), 2502 END_OF_PROGRAM(0), 2503 CF_INST(SQ_CF_INST_CALL), 2504 WHOLE_QUAD_MODE(0), 2505 BARRIER(0)); 2506 2507 /* 2 */ 2508 /* call interp-fetch-src if boolean0 == true */ 2509 shader[i++] = CF_DWORD0(ADDR(6), 2510 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2511 shader[i++] = CF_DWORD1(POP_COUNT(0), 2512 CF_CONST(0), 2513 COND(SQ_CF_COND_BOOL), 2514 I_COUNT(0), 2515 VALID_PIXEL_MODE(0), 2516 END_OF_PROGRAM(0), 2517 CF_INST(SQ_CF_INST_CALL), 2518 WHOLE_QUAD_MODE(0), 2519 BARRIER(0)); 2520 2521 /* 3 */ 2522 /* call read-constant-src if boolean0 == false */ 2523 shader[i++] = CF_DWORD0(ADDR(9), 2524 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2525 shader[i++] = CF_DWORD1(POP_COUNT(0), 2526 CF_CONST(0), 2527 COND(SQ_CF_COND_NOT_BOOL), 2528 I_COUNT(0), 2529 VALID_PIXEL_MODE(0), 2530 END_OF_PROGRAM(0), 2531 CF_INST(SQ_CF_INST_CALL), 2532 WHOLE_QUAD_MODE(0), 2533 BARRIER(0)); 2534 /* 4 */ 2535 /* src IN mask (GPR2 := GPR1 .* GPR0) */ 2536 shader[i++] = CF_ALU_DWORD0(ADDR(16), 2537 KCACHE_BANK0(0), 2538 KCACHE_BANK1(0), 2539 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2540 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2541 KCACHE_ADDR0(0), 2542 KCACHE_ADDR1(0), 2543 I_COUNT(4), 2544 ALT_CONST(0), 2545 CF_INST(SQ_CF_INST_ALU), 2546 WHOLE_QUAD_MODE(0), 2547 BARRIER(1)); 2548 2549 /* 5 */ 2550 /* export pixel data */ 2551 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2552 TYPE(SQ_EXPORT_PIXEL), 2553 RW_GPR(0), 2554 RW_REL(ABSOLUTE), 2555 INDEX_GPR(0), 2556 ELEM_SIZE(1)); 2557 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2558 SRC_SEL_Y(SQ_SEL_Y), 2559 SRC_SEL_Z(SQ_SEL_Z), 2560 SRC_SEL_W(SQ_SEL_W), 2561 BURST_COUNT(1), 2562 VALID_PIXEL_MODE(0), 2563 END_OF_PROGRAM(1), 2564 CF_INST(SQ_CF_INST_EXPORT_DONE), 2565 MARK(0), 2566 BARRIER(1)); 2567 2568 /* subroutine interp-fetch-src */ 2569 2570 /* 6 */ 2571 /* interpolate src */ 2572 shader[i++] = CF_ALU_DWORD0(ADDR(20), 2573 KCACHE_BANK0(0), 2574 KCACHE_BANK1(0), 2575 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2576 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2577 KCACHE_ADDR0(0), 2578 KCACHE_ADDR1(0), 2579 I_COUNT(4), 2580 ALT_CONST(0), 2581 CF_INST(SQ_CF_INST_ALU), 2582 WHOLE_QUAD_MODE(0), 2583 BARRIER(1)); 2584 2585 /* 7 */ 2586 /* texture fetch src into GPR0 */ 2587 shader[i++] = CF_DWORD0(ADDR(24), 2588 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2589 shader[i++] = CF_DWORD1(POP_COUNT(0), 2590 CF_CONST(0), 2591 COND(SQ_CF_COND_ACTIVE), 2592 I_COUNT(1), 2593 VALID_PIXEL_MODE(0), 2594 END_OF_PROGRAM(0), 2595 CF_INST(SQ_CF_INST_TC), 2596 WHOLE_QUAD_MODE(0), 2597 BARRIER(1)); 2598 2599 /* 8 */ 2600 /* return */ 2601 shader[i++] = CF_DWORD0(ADDR(0), 2602 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2603 shader[i++] = CF_DWORD1(POP_COUNT(0), 2604 CF_CONST(0), 2605 COND(SQ_CF_COND_ACTIVE), 2606 I_COUNT(0), 2607 VALID_PIXEL_MODE(0), 2608 END_OF_PROGRAM(0), 2609 CF_INST(SQ_CF_INST_RETURN), 2610 WHOLE_QUAD_MODE(0), 2611 BARRIER(0)); 2612 2613 /* subroutine read-constant-src */ 2614 2615 /* 9 */ 2616 /* read constants into GPR0 */ 2617 shader[i++] = CF_ALU_DWORD0(ADDR(26), 2618 KCACHE_BANK0(0), 2619 KCACHE_BANK1(0), 2620 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 2621 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2622 KCACHE_ADDR0(0), 2623 KCACHE_ADDR1(0), 2624 I_COUNT(4), 2625 ALT_CONST(1), 2626 CF_INST(SQ_CF_INST_ALU), 2627 WHOLE_QUAD_MODE(0), 2628 BARRIER(1)); 2629 2630 /* 10 */ 2631 /* return */ 2632 shader[i++] = CF_DWORD0(ADDR(0), 2633 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2634 shader[i++] = CF_DWORD1(POP_COUNT(0), 2635 CF_CONST(0), 2636 COND(SQ_CF_COND_ACTIVE), 2637 I_COUNT(0), 2638 VALID_PIXEL_MODE(0), 2639 END_OF_PROGRAM(0), 2640 CF_INST(SQ_CF_INST_RETURN), 2641 WHOLE_QUAD_MODE(0), 2642 BARRIER(0)); 2643 2644 /* subroutine interp-fetch-mask */ 2645 2646 /* 11 */ 2647 /* interpolate mask */ 2648 shader[i++] = CF_ALU_DWORD0(ADDR(30), 2649 KCACHE_BANK0(0), 2650 KCACHE_BANK1(0), 2651 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2652 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2653 KCACHE_ADDR0(0), 2654 KCACHE_ADDR1(0), 2655 I_COUNT(4), 2656 ALT_CONST(0), 2657 CF_INST(SQ_CF_INST_ALU), 2658 WHOLE_QUAD_MODE(0), 2659 BARRIER(1)); 2660 2661 /* 12 */ 2662 /* texture fetch mask into GPR1 */ 2663 shader[i++] = CF_DWORD0(ADDR(34), 2664 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2665 shader[i++] = CF_DWORD1(POP_COUNT(0), 2666 CF_CONST(0), 2667 COND(SQ_CF_COND_ACTIVE), 2668 I_COUNT(1), 2669 VALID_PIXEL_MODE(0), 2670 END_OF_PROGRAM(0), 2671 CF_INST(SQ_CF_INST_TC), 2672 WHOLE_QUAD_MODE(0), 2673 BARRIER(1)); 2674 2675 /* 13 */ 2676 /* return */ 2677 shader[i++] = CF_DWORD0(ADDR(0), 2678 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2679 shader[i++] = CF_DWORD1(POP_COUNT(0), 2680 CF_CONST(0), 2681 COND(SQ_CF_COND_ACTIVE), 2682 I_COUNT(0), 2683 VALID_PIXEL_MODE(0), 2684 END_OF_PROGRAM(0), 2685 CF_INST(SQ_CF_INST_RETURN), 2686 WHOLE_QUAD_MODE(0), 2687 BARRIER(0)); 2688 2689 /* subroutine read-constant-src */ 2690 2691 /* 14 */ 2692 /* read constants into GPR1 */ 2693 shader[i++] = CF_ALU_DWORD0(ADDR(36), 2694 KCACHE_BANK0(0), 2695 KCACHE_BANK1(0), 2696 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 2697 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2698 KCACHE_ADDR0(0), 2699 KCACHE_ADDR1(0), 2700 I_COUNT(4), 2701 ALT_CONST(1), 2702 CF_INST(SQ_CF_INST_ALU), 2703 WHOLE_QUAD_MODE(0), 2704 BARRIER(1)); 2705 2706 /* 15 */ 2707 /* return */ 2708 shader[i++] = CF_DWORD0(ADDR(0), 2709 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2710 shader[i++] = CF_DWORD1(POP_COUNT(0), 2711 CF_CONST(0), 2712 COND(SQ_CF_COND_ACTIVE), 2713 I_COUNT(0), 2714 VALID_PIXEL_MODE(0), 2715 END_OF_PROGRAM(0), 2716 CF_INST(SQ_CF_INST_RETURN), 2717 WHOLE_QUAD_MODE(0), 2718 BARRIER(0)); 2719 2720 /* ALU clauses */ 2721 2722 /* 16 */ 2723 /* MUL gpr[0].x gpr[0].x gpr[1].x */ 2724 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2725 SRC0_REL(ABSOLUTE), 2726 SRC0_ELEM(ELEM_X), 2727 SRC0_NEG(0), 2728 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2729 SRC1_REL(ABSOLUTE), 2730 SRC1_ELEM(ELEM_X), 2731 SRC1_NEG(0), 2732 INDEX_MODE(SQ_INDEX_LOOP), 2733 PRED_SEL(SQ_PRED_SEL_OFF), 2734 LAST(0)); 2735 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2736 SRC1_ABS(0), 2737 UPDATE_EXECUTE_MASK(0), 2738 UPDATE_PRED(0), 2739 WRITE_MASK(1), 2740 OMOD(SQ_ALU_OMOD_OFF), 2741 ALU_INST(SQ_OP2_INST_MUL), 2742 BANK_SWIZZLE(SQ_ALU_VEC_012), 2743 DST_GPR(0), 2744 DST_REL(ABSOLUTE), 2745 DST_ELEM(ELEM_X), 2746 CLAMP(1)); 2747 2748 /* 17 */ 2749 /* MUL gpr[0].y gpr[0].y gpr[1].y */ 2750 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2751 SRC0_REL(ABSOLUTE), 2752 SRC0_ELEM(ELEM_Y), 2753 SRC0_NEG(0), 2754 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2755 SRC1_REL(ABSOLUTE), 2756 SRC1_ELEM(ELEM_Y), 2757 SRC1_NEG(0), 2758 INDEX_MODE(SQ_INDEX_LOOP), 2759 PRED_SEL(SQ_PRED_SEL_OFF), 2760 LAST(0)); 2761 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2762 SRC1_ABS(0), 2763 UPDATE_EXECUTE_MASK(0), 2764 UPDATE_PRED(0), 2765 WRITE_MASK(1), 2766 OMOD(SQ_ALU_OMOD_OFF), 2767 ALU_INST(SQ_OP2_INST_MUL), 2768 BANK_SWIZZLE(SQ_ALU_VEC_012), 2769 DST_GPR(0), 2770 DST_REL(ABSOLUTE), 2771 DST_ELEM(ELEM_Y), 2772 CLAMP(1)); 2773 /* 18 */ 2774 /* MUL gpr[0].z gpr[0].z gpr[1].z */ 2775 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2776 SRC0_REL(ABSOLUTE), 2777 SRC0_ELEM(ELEM_Z), 2778 SRC0_NEG(0), 2779 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2780 SRC1_REL(ABSOLUTE), 2781 SRC1_ELEM(ELEM_Z), 2782 SRC1_NEG(0), 2783 INDEX_MODE(SQ_INDEX_LOOP), 2784 PRED_SEL(SQ_PRED_SEL_OFF), 2785 LAST(0)); 2786 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2787 SRC1_ABS(0), 2788 UPDATE_EXECUTE_MASK(0), 2789 UPDATE_PRED(0), 2790 WRITE_MASK(1), 2791 OMOD(SQ_ALU_OMOD_OFF), 2792 ALU_INST(SQ_OP2_INST_MUL), 2793 BANK_SWIZZLE(SQ_ALU_VEC_012), 2794 DST_GPR(0), 2795 DST_REL(ABSOLUTE), 2796 DST_ELEM(ELEM_Z), 2797 CLAMP(1)); 2798 /* 19 */ 2799 /* MUL gpr[0].w gpr[0].w gpr[1].w */ 2800 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2801 SRC0_REL(ABSOLUTE), 2802 SRC0_ELEM(ELEM_W), 2803 SRC0_NEG(0), 2804 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2805 SRC1_REL(ABSOLUTE), 2806 SRC1_ELEM(ELEM_W), 2807 SRC1_NEG(0), 2808 INDEX_MODE(SQ_INDEX_LOOP), 2809 PRED_SEL(SQ_PRED_SEL_OFF), 2810 LAST(1)); 2811 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2812 SRC1_ABS(0), 2813 UPDATE_EXECUTE_MASK(0), 2814 UPDATE_PRED(0), 2815 WRITE_MASK(1), 2816 OMOD(SQ_ALU_OMOD_OFF), 2817 ALU_INST(SQ_OP2_INST_MUL), 2818 BANK_SWIZZLE(SQ_ALU_VEC_012), 2819 DST_GPR(0), 2820 DST_REL(ABSOLUTE), 2821 DST_ELEM(ELEM_W), 2822 CLAMP(1)); 2823 2824 /* 20 */ 2825 /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */ 2826 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2827 SRC0_REL(ABSOLUTE), 2828 SRC0_ELEM(ELEM_Y), 2829 SRC0_NEG(0), 2830 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2831 SRC1_REL(ABSOLUTE), 2832 SRC1_ELEM(ELEM_X), 2833 SRC1_NEG(0), 2834 INDEX_MODE(SQ_INDEX_AR_X), 2835 PRED_SEL(SQ_PRED_SEL_OFF), 2836 LAST(0)); 2837 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2838 SRC1_ABS(0), 2839 UPDATE_EXECUTE_MASK(0), 2840 UPDATE_PRED(0), 2841 WRITE_MASK(1), 2842 OMOD(SQ_ALU_OMOD_OFF), 2843 ALU_INST(SQ_OP2_INST_INTERP_XY), 2844 BANK_SWIZZLE(SQ_ALU_VEC_210), 2845 DST_GPR(0), 2846 DST_REL(ABSOLUTE), 2847 DST_ELEM(ELEM_X), 2848 CLAMP(0)); 2849 /* 21 */ 2850 /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */ 2851 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2852 SRC0_REL(ABSOLUTE), 2853 SRC0_ELEM(ELEM_X), 2854 SRC0_NEG(0), 2855 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2856 SRC1_REL(ABSOLUTE), 2857 SRC1_ELEM(ELEM_X), 2858 SRC1_NEG(0), 2859 INDEX_MODE(SQ_INDEX_AR_X), 2860 PRED_SEL(SQ_PRED_SEL_OFF), 2861 LAST(0)); 2862 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2863 SRC1_ABS(0), 2864 UPDATE_EXECUTE_MASK(0), 2865 UPDATE_PRED(0), 2866 WRITE_MASK(1), 2867 OMOD(SQ_ALU_OMOD_OFF), 2868 ALU_INST(SQ_OP2_INST_INTERP_XY), 2869 BANK_SWIZZLE(SQ_ALU_VEC_210), 2870 DST_GPR(0), 2871 DST_REL(ABSOLUTE), 2872 DST_ELEM(ELEM_Y), 2873 CLAMP(0)); 2874 /* 22 */ 2875 /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */ 2876 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2877 SRC0_REL(ABSOLUTE), 2878 SRC0_ELEM(ELEM_Y), 2879 SRC0_NEG(0), 2880 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2881 SRC1_REL(ABSOLUTE), 2882 SRC1_ELEM(ELEM_X), 2883 SRC1_NEG(0), 2884 INDEX_MODE(SQ_INDEX_AR_X), 2885 PRED_SEL(SQ_PRED_SEL_OFF), 2886 LAST(0)); 2887 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2888 SRC1_ABS(0), 2889 UPDATE_EXECUTE_MASK(0), 2890 UPDATE_PRED(0), 2891 WRITE_MASK(0), 2892 OMOD(SQ_ALU_OMOD_OFF), 2893 ALU_INST(SQ_OP2_INST_INTERP_XY), 2894 BANK_SWIZZLE(SQ_ALU_VEC_210), 2895 DST_GPR(0), 2896 DST_REL(ABSOLUTE), 2897 DST_ELEM(ELEM_Z), 2898 CLAMP(0)); 2899 2900 /* 23 */ 2901 /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */ 2902 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2903 SRC0_REL(ABSOLUTE), 2904 SRC0_ELEM(ELEM_X), 2905 SRC0_NEG(0), 2906 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2907 SRC1_REL(ABSOLUTE), 2908 SRC1_ELEM(ELEM_X), 2909 SRC1_NEG(0), 2910 INDEX_MODE(SQ_INDEX_AR_X), 2911 PRED_SEL(SQ_PRED_SEL_OFF), 2912 LAST(1)); 2913 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2914 SRC1_ABS(0), 2915 UPDATE_EXECUTE_MASK(0), 2916 UPDATE_PRED(0), 2917 WRITE_MASK(0), 2918 OMOD(SQ_ALU_OMOD_OFF), 2919 ALU_INST(SQ_OP2_INST_INTERP_XY), 2920 BANK_SWIZZLE(SQ_ALU_VEC_210), 2921 DST_GPR(0), 2922 DST_REL(ABSOLUTE), 2923 DST_ELEM(ELEM_W), 2924 CLAMP(0)); 2925 2926 /* 24/25 */ 2927 /* SAMPLE RID=0 GPR0, GPR0 */ 2928 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 2929 INST_MOD(0), 2930 FETCH_WHOLE_QUAD(0), 2931 RESOURCE_ID(0), 2932 SRC_GPR(0), 2933 SRC_REL(ABSOLUTE), 2934 ALT_CONST(0), 2935 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 2936 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2937 shader[i++] = TEX_DWORD1(DST_GPR(0), 2938 DST_REL(ABSOLUTE), 2939 DST_SEL_X(SQ_SEL_X), 2940 DST_SEL_Y(SQ_SEL_Y), 2941 DST_SEL_Z(SQ_SEL_Z), 2942 DST_SEL_W(SQ_SEL_W), 2943 LOD_BIAS(0), 2944 COORD_TYPE_X(TEX_NORMALIZED), 2945 COORD_TYPE_Y(TEX_NORMALIZED), 2946 COORD_TYPE_Z(TEX_NORMALIZED), 2947 COORD_TYPE_W(TEX_NORMALIZED)); 2948 shader[i++] = TEX_DWORD2(OFFSET_X(0), 2949 OFFSET_Y(0), 2950 OFFSET_Z(0), 2951 SAMPLER_ID(0), 2952 SRC_SEL_X(SQ_SEL_X), 2953 SRC_SEL_Y(SQ_SEL_Y), 2954 SRC_SEL_Z(SQ_SEL_0), 2955 SRC_SEL_W(SQ_SEL_1)); 2956 shader[i++] = TEX_DWORD_PAD; 2957 2958 /* 26 */ 2959 /* MOV GPR0.x, KC4.x */ 2960 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), 2961 SRC0_REL(ABSOLUTE), 2962 SRC0_ELEM(ELEM_X), 2963 SRC0_NEG(0), 2964 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2965 SRC1_REL(ABSOLUTE), 2966 SRC1_ELEM(ELEM_X), 2967 SRC1_NEG(0), 2968 INDEX_MODE(SQ_INDEX_AR_X), 2969 PRED_SEL(SQ_PRED_SEL_OFF), 2970 LAST(0)); 2971 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2972 SRC1_ABS(0), 2973 UPDATE_EXECUTE_MASK(0), 2974 UPDATE_PRED(0), 2975 WRITE_MASK(1), 2976 OMOD(SQ_ALU_OMOD_OFF), 2977 ALU_INST(SQ_OP2_INST_MOV), 2978 BANK_SWIZZLE(SQ_ALU_VEC_012), 2979 DST_GPR(0), 2980 DST_REL(ABSOLUTE), 2981 DST_ELEM(ELEM_X), 2982 CLAMP(1)); 2983 2984 /* 27 */ 2985 /* MOV GPR0.y, KC4.y */ 2986 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), 2987 SRC0_REL(ABSOLUTE), 2988 SRC0_ELEM(ELEM_Y), 2989 SRC0_NEG(0), 2990 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2991 SRC1_REL(ABSOLUTE), 2992 SRC1_ELEM(ELEM_X), 2993 SRC1_NEG(0), 2994 INDEX_MODE(SQ_INDEX_AR_X), 2995 PRED_SEL(SQ_PRED_SEL_OFF), 2996 LAST(0)); 2997 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2998 SRC1_ABS(0), 2999 UPDATE_EXECUTE_MASK(0), 3000 UPDATE_PRED(0), 3001 WRITE_MASK(1), 3002 OMOD(SQ_ALU_OMOD_OFF), 3003 ALU_INST(SQ_OP2_INST_MOV), 3004 BANK_SWIZZLE(SQ_ALU_VEC_012), 3005 DST_GPR(0), 3006 DST_REL(ABSOLUTE), 3007 DST_ELEM(ELEM_Y), 3008 CLAMP(1)); 3009 3010 /* 28 */ 3011 /* MOV GPR0.z, KC4.z */ 3012 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), 3013 SRC0_REL(ABSOLUTE), 3014 SRC0_ELEM(ELEM_Z), 3015 SRC0_NEG(0), 3016 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3017 SRC1_REL(ABSOLUTE), 3018 SRC1_ELEM(ELEM_X), 3019 SRC1_NEG(0), 3020 INDEX_MODE(SQ_INDEX_AR_X), 3021 PRED_SEL(SQ_PRED_SEL_OFF), 3022 LAST(0)); 3023 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3024 SRC1_ABS(0), 3025 UPDATE_EXECUTE_MASK(0), 3026 UPDATE_PRED(0), 3027 WRITE_MASK(1), 3028 OMOD(SQ_ALU_OMOD_OFF), 3029 ALU_INST(SQ_OP2_INST_MOV), 3030 BANK_SWIZZLE(SQ_ALU_VEC_012), 3031 DST_GPR(0), 3032 DST_REL(ABSOLUTE), 3033 DST_ELEM(ELEM_Z), 3034 CLAMP(1)); 3035 3036 /* 29 */ 3037 /* MOV GPR0.w, KC4.w */ 3038 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), 3039 SRC0_REL(ABSOLUTE), 3040 SRC0_ELEM(ELEM_W), 3041 SRC0_NEG(0), 3042 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3043 SRC1_REL(ABSOLUTE), 3044 SRC1_ELEM(ELEM_X), 3045 SRC1_NEG(0), 3046 INDEX_MODE(SQ_INDEX_AR_X), 3047 PRED_SEL(SQ_PRED_SEL_OFF), 3048 LAST(1)); 3049 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3050 SRC1_ABS(0), 3051 UPDATE_EXECUTE_MASK(0), 3052 UPDATE_PRED(0), 3053 WRITE_MASK(1), 3054 OMOD(SQ_ALU_OMOD_OFF), 3055 ALU_INST(SQ_OP2_INST_MOV), 3056 BANK_SWIZZLE(SQ_ALU_VEC_012), 3057 DST_GPR(0), 3058 DST_REL(ABSOLUTE), 3059 DST_ELEM(ELEM_W), 3060 CLAMP(1)); 3061 3062 /* 30 */ 3063 /* INTERP_XY GPR1.x, PARAM1 */ 3064 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3065 SRC0_REL(ABSOLUTE), 3066 SRC0_ELEM(ELEM_Y), 3067 SRC0_NEG(0), 3068 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 3069 SRC1_REL(ABSOLUTE), 3070 SRC1_ELEM(ELEM_X), 3071 SRC1_NEG(0), 3072 INDEX_MODE(SQ_INDEX_AR_X), 3073 PRED_SEL(SQ_PRED_SEL_OFF), 3074 LAST(0)); 3075 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3076 SRC1_ABS(0), 3077 UPDATE_EXECUTE_MASK(0), 3078 UPDATE_PRED(0), 3079 WRITE_MASK(1), 3080 OMOD(SQ_ALU_OMOD_OFF), 3081 ALU_INST(SQ_OP2_INST_INTERP_XY), 3082 BANK_SWIZZLE(SQ_ALU_VEC_210), 3083 DST_GPR(1), 3084 DST_REL(ABSOLUTE), 3085 DST_ELEM(ELEM_X), 3086 CLAMP(0)); 3087 /* 31 */ 3088 /* INTERP_XY GPR1.y, PARAM1 */ 3089 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3090 SRC0_REL(ABSOLUTE), 3091 SRC0_ELEM(ELEM_X), 3092 SRC0_NEG(0), 3093 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 3094 SRC1_REL(ABSOLUTE), 3095 SRC1_ELEM(ELEM_X), 3096 SRC1_NEG(0), 3097 INDEX_MODE(SQ_INDEX_AR_X), 3098 PRED_SEL(SQ_PRED_SEL_OFF), 3099 LAST(0)); 3100 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3101 SRC1_ABS(0), 3102 UPDATE_EXECUTE_MASK(0), 3103 UPDATE_PRED(0), 3104 WRITE_MASK(1), 3105 OMOD(SQ_ALU_OMOD_OFF), 3106 ALU_INST(SQ_OP2_INST_INTERP_XY), 3107 BANK_SWIZZLE(SQ_ALU_VEC_210), 3108 DST_GPR(1), 3109 DST_REL(ABSOLUTE), 3110 DST_ELEM(ELEM_Y), 3111 CLAMP(0)); 3112 /* 32 */ 3113 /* INTERP_XY GPR1.z, PARAM1 */ 3114 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3115 SRC0_REL(ABSOLUTE), 3116 SRC0_ELEM(ELEM_Y), 3117 SRC0_NEG(0), 3118 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 3119 SRC1_REL(ABSOLUTE), 3120 SRC1_ELEM(ELEM_X), 3121 SRC1_NEG(0), 3122 INDEX_MODE(SQ_INDEX_AR_X), 3123 PRED_SEL(SQ_PRED_SEL_OFF), 3124 LAST(0)); 3125 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3126 SRC1_ABS(0), 3127 UPDATE_EXECUTE_MASK(0), 3128 UPDATE_PRED(0), 3129 WRITE_MASK(0), 3130 OMOD(SQ_ALU_OMOD_OFF), 3131 ALU_INST(SQ_OP2_INST_INTERP_XY), 3132 BANK_SWIZZLE(SQ_ALU_VEC_210), 3133 DST_GPR(1), 3134 DST_REL(ABSOLUTE), 3135 DST_ELEM(ELEM_Z), 3136 CLAMP(0)); 3137 /* 33 */ 3138 /* INTERP_XY GPR1.w, PARAM1 */ 3139 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3140 SRC0_REL(ABSOLUTE), 3141 SRC0_ELEM(ELEM_X), 3142 SRC0_NEG(0), 3143 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 3144 SRC1_REL(ABSOLUTE), 3145 SRC1_ELEM(ELEM_X), 3146 SRC1_NEG(0), 3147 INDEX_MODE(SQ_INDEX_AR_X), 3148 PRED_SEL(SQ_PRED_SEL_OFF), 3149 LAST(1)); 3150 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3151 SRC1_ABS(0), 3152 UPDATE_EXECUTE_MASK(0), 3153 UPDATE_PRED(0), 3154 WRITE_MASK(0), 3155 OMOD(SQ_ALU_OMOD_OFF), 3156 ALU_INST(SQ_OP2_INST_INTERP_XY), 3157 BANK_SWIZZLE(SQ_ALU_VEC_210), 3158 DST_GPR(1), 3159 DST_REL(ABSOLUTE), 3160 DST_ELEM(ELEM_W), 3161 CLAMP(0)); 3162 3163 /* 34/35 */ 3164 /* SAMPLE RID=1 GPR1, GPR1 */ 3165 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3166 INST_MOD(0), 3167 FETCH_WHOLE_QUAD(0), 3168 RESOURCE_ID(1), 3169 SRC_GPR(1), 3170 SRC_REL(ABSOLUTE), 3171 ALT_CONST(0), 3172 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3173 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3174 shader[i++] = TEX_DWORD1(DST_GPR(1), 3175 DST_REL(ABSOLUTE), 3176 DST_SEL_X(SQ_SEL_X), 3177 DST_SEL_Y(SQ_SEL_Y), 3178 DST_SEL_Z(SQ_SEL_Z), 3179 DST_SEL_W(SQ_SEL_W), 3180 LOD_BIAS(0), 3181 COORD_TYPE_X(TEX_NORMALIZED), 3182 COORD_TYPE_Y(TEX_NORMALIZED), 3183 COORD_TYPE_Z(TEX_NORMALIZED), 3184 COORD_TYPE_W(TEX_NORMALIZED)); 3185 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3186 OFFSET_Y(0), 3187 OFFSET_Z(0), 3188 SAMPLER_ID(1), 3189 SRC_SEL_X(SQ_SEL_X), 3190 SRC_SEL_Y(SQ_SEL_Y), 3191 SRC_SEL_Z(SQ_SEL_0), 3192 SRC_SEL_W(SQ_SEL_1)); 3193 shader[i++] = TEX_DWORD_PAD; 3194 3195 /* 36 */ 3196 /* MOV GPR1.x, KC5.x */ 3197 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), 3198 SRC0_REL(ABSOLUTE), 3199 SRC0_ELEM(ELEM_X), 3200 SRC0_NEG(0), 3201 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3202 SRC1_REL(ABSOLUTE), 3203 SRC1_ELEM(ELEM_X), 3204 SRC1_NEG(0), 3205 INDEX_MODE(SQ_INDEX_AR_X), 3206 PRED_SEL(SQ_PRED_SEL_OFF), 3207 LAST(0)); 3208 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3209 SRC1_ABS(0), 3210 UPDATE_EXECUTE_MASK(0), 3211 UPDATE_PRED(0), 3212 WRITE_MASK(1), 3213 OMOD(SQ_ALU_OMOD_OFF), 3214 ALU_INST(SQ_OP2_INST_MOV), 3215 BANK_SWIZZLE(SQ_ALU_VEC_012), 3216 DST_GPR(1), 3217 DST_REL(ABSOLUTE), 3218 DST_ELEM(ELEM_X), 3219 CLAMP(1)); 3220 3221 /* 37 */ 3222 /* MOV GPR1.y, KC5.y */ 3223 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), 3224 SRC0_REL(ABSOLUTE), 3225 SRC0_ELEM(ELEM_Y), 3226 SRC0_NEG(0), 3227 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3228 SRC1_REL(ABSOLUTE), 3229 SRC1_ELEM(ELEM_X), 3230 SRC1_NEG(0), 3231 INDEX_MODE(SQ_INDEX_AR_X), 3232 PRED_SEL(SQ_PRED_SEL_OFF), 3233 LAST(0)); 3234 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3235 SRC1_ABS(0), 3236 UPDATE_EXECUTE_MASK(0), 3237 UPDATE_PRED(0), 3238 WRITE_MASK(1), 3239 OMOD(SQ_ALU_OMOD_OFF), 3240 ALU_INST(SQ_OP2_INST_MOV), 3241 BANK_SWIZZLE(SQ_ALU_VEC_012), 3242 DST_GPR(1), 3243 DST_REL(ABSOLUTE), 3244 DST_ELEM(ELEM_Y), 3245 CLAMP(1)); 3246 3247 /* 38 */ 3248 /* MOV GPR1.z, KC5.z */ 3249 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), 3250 SRC0_REL(ABSOLUTE), 3251 SRC0_ELEM(ELEM_Z), 3252 SRC0_NEG(0), 3253 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3254 SRC1_REL(ABSOLUTE), 3255 SRC1_ELEM(ELEM_X), 3256 SRC1_NEG(0), 3257 INDEX_MODE(SQ_INDEX_AR_X), 3258 PRED_SEL(SQ_PRED_SEL_OFF), 3259 LAST(0)); 3260 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3261 SRC1_ABS(0), 3262 UPDATE_EXECUTE_MASK(0), 3263 UPDATE_PRED(0), 3264 WRITE_MASK(1), 3265 OMOD(SQ_ALU_OMOD_OFF), 3266 ALU_INST(SQ_OP2_INST_MOV), 3267 BANK_SWIZZLE(SQ_ALU_VEC_012), 3268 DST_GPR(1), 3269 DST_REL(ABSOLUTE), 3270 DST_ELEM(ELEM_Z), 3271 CLAMP(1)); 3272 3273 /* 39 */ 3274 /* MOV GPR1.w, KC5.w */ 3275 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), 3276 SRC0_REL(ABSOLUTE), 3277 SRC0_ELEM(ELEM_W), 3278 SRC0_NEG(0), 3279 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 3280 SRC1_REL(ABSOLUTE), 3281 SRC1_ELEM(ELEM_X), 3282 SRC1_NEG(0), 3283 INDEX_MODE(SQ_INDEX_AR_X), 3284 PRED_SEL(SQ_PRED_SEL_OFF), 3285 LAST(1)); 3286 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3287 SRC1_ABS(0), 3288 UPDATE_EXECUTE_MASK(0), 3289 UPDATE_PRED(0), 3290 WRITE_MASK(1), 3291 OMOD(SQ_ALU_OMOD_OFF), 3292 ALU_INST(SQ_OP2_INST_MOV), 3293 BANK_SWIZZLE(SQ_ALU_VEC_012), 3294 DST_GPR(1), 3295 DST_REL(ABSOLUTE), 3296 DST_ELEM(ELEM_W), 3297 CLAMP(1)); 3298 3299 return i; 3300} 3301