evergreen_shader.c revision b13dfe66
1/* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#ifdef XF86DRM_MODE 32 33#include "xf86.h" 34 35#include "evergreen_shader.h" 36#include "evergreen_reg.h" 37 38/* solid vs --------------------------------------- */ 39int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) 40{ 41 int i = 0; 42 43 /* 0 */ 44 shader[i++] = CF_DWORD0(ADDR(4), 45 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 46 shader[i++] = CF_DWORD1(POP_COUNT(0), 47 CF_CONST(0), 48 COND(SQ_CF_COND_ACTIVE), 49 I_COUNT(1), 50 VALID_PIXEL_MODE(0), 51 END_OF_PROGRAM(0), 52 CF_INST(SQ_CF_INST_VC), 53 WHOLE_QUAD_MODE(0), 54 BARRIER(1)); 55 /* 1 */ 56 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 57 TYPE(SQ_EXPORT_POS), 58 RW_GPR(1), 59 RW_REL(ABSOLUTE), 60 INDEX_GPR(0), 61 ELEM_SIZE(0)); 62 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 63 SRC_SEL_Y(SQ_SEL_Y), 64 SRC_SEL_Z(SQ_SEL_Z), 65 SRC_SEL_W(SQ_SEL_W), 66 BURST_COUNT(1), 67 VALID_PIXEL_MODE(0), 68 END_OF_PROGRAM(0), 69 CF_INST(SQ_CF_INST_EXPORT_DONE), 70 MARK(0), 71 BARRIER(1)); 72 /* 2 - always export a param whether it's used or not */ 73 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 74 TYPE(SQ_EXPORT_PARAM), 75 RW_GPR(0), 76 RW_REL(ABSOLUTE), 77 INDEX_GPR(0), 78 ELEM_SIZE(0)); 79 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 80 SRC_SEL_Y(SQ_SEL_Y), 81 SRC_SEL_Z(SQ_SEL_Z), 82 SRC_SEL_W(SQ_SEL_W), 83 BURST_COUNT(0), 84 VALID_PIXEL_MODE(0), 85 END_OF_PROGRAM(1), 86 CF_INST(SQ_CF_INST_EXPORT_DONE), 87 MARK(0), 88 BARRIER(0)); 89 /* 3 - padding */ 90 shader[i++] = 0x00000000; 91 shader[i++] = 0x00000000; 92 /* 4/5 */ 93 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 94 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 95 FETCH_WHOLE_QUAD(0), 96 BUFFER_ID(0), 97 SRC_GPR(0), 98 SRC_REL(ABSOLUTE), 99 SRC_SEL_X(SQ_SEL_X), 100 MEGA_FETCH_COUNT(8)); 101 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 102 DST_REL(0), 103 DST_SEL_X(SQ_SEL_X), 104 DST_SEL_Y(SQ_SEL_Y), 105 DST_SEL_Z(SQ_SEL_0), 106 DST_SEL_W(SQ_SEL_1), 107 USE_CONST_FIELDS(0), 108 DATA_FORMAT(FMT_32_32_FLOAT), 109 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 110 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 111 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 112 shader[i++] = VTX_DWORD2(OFFSET(0), 113#if X_BYTE_ORDER == X_BIG_ENDIAN 114 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 115#else 116 ENDIAN_SWAP(SQ_ENDIAN_NONE), 117#endif 118 CONST_BUF_NO_STRIDE(0), 119 MEGA_FETCH(1), 120 ALT_CONST(0), 121 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 122 shader[i++] = VTX_DWORD_PAD; 123 124 return i; 125} 126 127/* solid ps --------------------------------------- */ 128int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) 129{ 130 int i = 0; 131 132 /* 0 */ 133 shader[i++] = CF_ALU_DWORD0(ADDR(2), 134 KCACHE_BANK0(0), 135 KCACHE_BANK1(0), 136 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 137 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 138 KCACHE_ADDR0(0), 139 KCACHE_ADDR1(0), 140 I_COUNT(4), 141 ALT_CONST(0), 142 CF_INST(SQ_CF_INST_ALU), 143 WHOLE_QUAD_MODE(0), 144 BARRIER(1)); 145 /* 1 */ 146 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 147 TYPE(SQ_EXPORT_PIXEL), 148 RW_GPR(0), 149 RW_REL(ABSOLUTE), 150 INDEX_GPR(0), 151 ELEM_SIZE(1)); 152 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 153 SRC_SEL_Y(SQ_SEL_Y), 154 SRC_SEL_Z(SQ_SEL_Z), 155 SRC_SEL_W(SQ_SEL_W), 156 BURST_COUNT(1), 157 VALID_PIXEL_MODE(0), 158 END_OF_PROGRAM(1), 159 CF_INST(SQ_CF_INST_EXPORT_DONE), 160 MARK(0), 161 BARRIER(1)); 162 163 /* 2 */ 164 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 165 SRC0_REL(ABSOLUTE), 166 SRC0_ELEM(ELEM_X), 167 SRC0_NEG(0), 168 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 169 SRC1_REL(ABSOLUTE), 170 SRC1_ELEM(ELEM_X), 171 SRC1_NEG(0), 172 INDEX_MODE(SQ_INDEX_AR_X), 173 PRED_SEL(SQ_PRED_SEL_OFF), 174 LAST(0)); 175 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 176 SRC1_ABS(0), 177 UPDATE_EXECUTE_MASK(0), 178 UPDATE_PRED(0), 179 WRITE_MASK(1), 180 OMOD(SQ_ALU_OMOD_OFF), 181 ALU_INST(SQ_OP2_INST_MOV), 182 BANK_SWIZZLE(SQ_ALU_VEC_012), 183 DST_GPR(0), 184 DST_REL(ABSOLUTE), 185 DST_ELEM(ELEM_X), 186 CLAMP(1)); 187 /* 3 */ 188 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 189 SRC0_REL(ABSOLUTE), 190 SRC0_ELEM(ELEM_Y), 191 SRC0_NEG(0), 192 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 193 SRC1_REL(ABSOLUTE), 194 SRC1_ELEM(ELEM_Y), 195 SRC1_NEG(0), 196 INDEX_MODE(SQ_INDEX_AR_X), 197 PRED_SEL(SQ_PRED_SEL_OFF), 198 LAST(0)); 199 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 200 SRC1_ABS(0), 201 UPDATE_EXECUTE_MASK(0), 202 UPDATE_PRED(0), 203 WRITE_MASK(1), 204 OMOD(SQ_ALU_OMOD_OFF), 205 ALU_INST(SQ_OP2_INST_MOV), 206 BANK_SWIZZLE(SQ_ALU_VEC_012), 207 DST_GPR(0), 208 DST_REL(ABSOLUTE), 209 DST_ELEM(ELEM_Y), 210 CLAMP(1)); 211 /* 4 */ 212 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 213 SRC0_REL(ABSOLUTE), 214 SRC0_ELEM(ELEM_Z), 215 SRC0_NEG(0), 216 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 217 SRC1_REL(ABSOLUTE), 218 SRC1_ELEM(ELEM_Z), 219 SRC1_NEG(0), 220 INDEX_MODE(SQ_INDEX_AR_X), 221 PRED_SEL(SQ_PRED_SEL_OFF), 222 LAST(0)); 223 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 224 SRC1_ABS(0), 225 UPDATE_EXECUTE_MASK(0), 226 UPDATE_PRED(0), 227 WRITE_MASK(1), 228 OMOD(SQ_ALU_OMOD_OFF), 229 ALU_INST(SQ_OP2_INST_MOV), 230 BANK_SWIZZLE(SQ_ALU_VEC_012), 231 DST_GPR(0), 232 DST_REL(ABSOLUTE), 233 DST_ELEM(ELEM_Z), 234 CLAMP(1)); 235 /* 5 */ 236 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 237 SRC0_REL(ABSOLUTE), 238 SRC0_ELEM(ELEM_W), 239 SRC0_NEG(0), 240 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 241 SRC1_REL(ABSOLUTE), 242 SRC1_ELEM(ELEM_W), 243 SRC1_NEG(0), 244 INDEX_MODE(SQ_INDEX_AR_X), 245 PRED_SEL(SQ_PRED_SEL_OFF), 246 LAST(1)); 247 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 248 SRC1_ABS(0), 249 UPDATE_EXECUTE_MASK(0), 250 UPDATE_PRED(0), 251 WRITE_MASK(1), 252 OMOD(SQ_ALU_OMOD_OFF), 253 ALU_INST(SQ_OP2_INST_MOV), 254 BANK_SWIZZLE(SQ_ALU_VEC_012), 255 DST_GPR(0), 256 DST_REL(ABSOLUTE), 257 DST_ELEM(ELEM_W), 258 CLAMP(1)); 259 260 return i; 261} 262 263/* copy vs --------------------------------------- */ 264int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) 265{ 266 int i = 0; 267 268 /* 0 */ 269 shader[i++] = CF_DWORD0(ADDR(4), 270 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 271 shader[i++] = CF_DWORD1(POP_COUNT(0), 272 CF_CONST(0), 273 COND(SQ_CF_COND_ACTIVE), 274 I_COUNT(2), 275 VALID_PIXEL_MODE(0), 276 END_OF_PROGRAM(0), 277 CF_INST(SQ_CF_INST_VC), 278 WHOLE_QUAD_MODE(0), 279 BARRIER(1)); 280 /* 1 */ 281 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 282 TYPE(SQ_EXPORT_POS), 283 RW_GPR(1), 284 RW_REL(ABSOLUTE), 285 INDEX_GPR(0), 286 ELEM_SIZE(0)); 287 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 288 SRC_SEL_Y(SQ_SEL_Y), 289 SRC_SEL_Z(SQ_SEL_Z), 290 SRC_SEL_W(SQ_SEL_W), 291 BURST_COUNT(0), 292 VALID_PIXEL_MODE(0), 293 END_OF_PROGRAM(0), 294 CF_INST(SQ_CF_INST_EXPORT_DONE), 295 MARK(0), 296 BARRIER(1)); 297 /* 2 */ 298 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 299 TYPE(SQ_EXPORT_PARAM), 300 RW_GPR(0), 301 RW_REL(ABSOLUTE), 302 INDEX_GPR(0), 303 ELEM_SIZE(0)); 304 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 305 SRC_SEL_Y(SQ_SEL_Y), 306 SRC_SEL_Z(SQ_SEL_Z), 307 SRC_SEL_W(SQ_SEL_W), 308 BURST_COUNT(0), 309 VALID_PIXEL_MODE(0), 310 END_OF_PROGRAM(1), 311 CF_INST(SQ_CF_INST_EXPORT_DONE), 312 MARK(0), 313 BARRIER(0)); 314 /* 3 */ 315 shader[i++] = 0x00000000; 316 shader[i++] = 0x00000000; 317 /* 4/5 */ 318 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 319 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 320 FETCH_WHOLE_QUAD(0), 321 BUFFER_ID(0), 322 SRC_GPR(0), 323 SRC_REL(ABSOLUTE), 324 SRC_SEL_X(SQ_SEL_X), 325 MEGA_FETCH_COUNT(16)); 326 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 327 DST_REL(0), 328 DST_SEL_X(SQ_SEL_X), 329 DST_SEL_Y(SQ_SEL_Y), 330 DST_SEL_Z(SQ_SEL_0), 331 DST_SEL_W(SQ_SEL_1), 332 USE_CONST_FIELDS(0), 333 DATA_FORMAT(FMT_32_32_FLOAT), 334 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 335 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 336 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 337 shader[i++] = VTX_DWORD2(OFFSET(0), 338#if X_BYTE_ORDER == X_BIG_ENDIAN 339 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 340#else 341 ENDIAN_SWAP(SQ_ENDIAN_NONE), 342#endif 343 CONST_BUF_NO_STRIDE(0), 344 MEGA_FETCH(1), 345 ALT_CONST(0), 346 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 347 shader[i++] = VTX_DWORD_PAD; 348 /* 6/7 */ 349 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 350 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 351 FETCH_WHOLE_QUAD(0), 352 BUFFER_ID(0), 353 SRC_GPR(0), 354 SRC_REL(ABSOLUTE), 355 SRC_SEL_X(SQ_SEL_X), 356 MEGA_FETCH_COUNT(8)); 357 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 358 DST_REL(0), 359 DST_SEL_X(SQ_SEL_X), 360 DST_SEL_Y(SQ_SEL_Y), 361 DST_SEL_Z(SQ_SEL_0), 362 DST_SEL_W(SQ_SEL_1), 363 USE_CONST_FIELDS(0), 364 DATA_FORMAT(FMT_32_32_FLOAT), 365 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 366 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 367 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 368 shader[i++] = VTX_DWORD2(OFFSET(8), 369#if X_BYTE_ORDER == X_BIG_ENDIAN 370 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 371#else 372 ENDIAN_SWAP(SQ_ENDIAN_NONE), 373#endif 374 CONST_BUF_NO_STRIDE(0), 375 MEGA_FETCH(0), 376 ALT_CONST(0), 377 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 378 shader[i++] = VTX_DWORD_PAD; 379 380 return i; 381} 382 383/* copy ps --------------------------------------- */ 384int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) 385{ 386 int i = 0; 387 388 /* CF INST 0 */ 389 shader[i++] = CF_ALU_DWORD0(ADDR(3), 390 KCACHE_BANK0(0), 391 KCACHE_BANK1(0), 392 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 393 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 394 KCACHE_ADDR0(0), 395 KCACHE_ADDR1(0), 396 I_COUNT(4), 397 ALT_CONST(0), 398 CF_INST(SQ_CF_INST_ALU), 399 WHOLE_QUAD_MODE(0), 400 BARRIER(1)); 401 /* CF INST 1 */ 402 shader[i++] = CF_DWORD0(ADDR(8), 403 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 404 shader[i++] = CF_DWORD1(POP_COUNT(0), 405 CF_CONST(0), 406 COND(SQ_CF_COND_ACTIVE), 407 I_COUNT(1), 408 VALID_PIXEL_MODE(0), 409 END_OF_PROGRAM(0), 410 CF_INST(SQ_CF_INST_TC), 411 WHOLE_QUAD_MODE(0), 412 BARRIER(1)); 413 /* CF INST 2 */ 414 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 415 TYPE(SQ_EXPORT_PIXEL), 416 RW_GPR(0), 417 RW_REL(ABSOLUTE), 418 INDEX_GPR(0), 419 ELEM_SIZE(1)); 420 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 421 SRC_SEL_Y(SQ_SEL_Y), 422 SRC_SEL_Z(SQ_SEL_Z), 423 SRC_SEL_W(SQ_SEL_W), 424 BURST_COUNT(1), 425 VALID_PIXEL_MODE(0), 426 END_OF_PROGRAM(1), 427 CF_INST(SQ_CF_INST_EXPORT_DONE), 428 MARK(0), 429 BARRIER(1)); 430 431 /* 3 interpolate tex coords */ 432 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 433 SRC0_REL(ABSOLUTE), 434 SRC0_ELEM(ELEM_Y), 435 SRC0_NEG(0), 436 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 437 SRC1_REL(ABSOLUTE), 438 SRC1_ELEM(ELEM_X), 439 SRC1_NEG(0), 440 INDEX_MODE(SQ_INDEX_AR_X), 441 PRED_SEL(SQ_PRED_SEL_OFF), 442 LAST(0)); 443 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 444 SRC1_ABS(0), 445 UPDATE_EXECUTE_MASK(0), 446 UPDATE_PRED(0), 447 WRITE_MASK(1), 448 OMOD(SQ_ALU_OMOD_OFF), 449 ALU_INST(SQ_OP2_INST_INTERP_XY), 450 BANK_SWIZZLE(SQ_ALU_VEC_210), 451 DST_GPR(0), 452 DST_REL(ABSOLUTE), 453 DST_ELEM(ELEM_X), 454 CLAMP(0)); 455 /* 4 */ 456 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 457 SRC0_REL(ABSOLUTE), 458 SRC0_ELEM(ELEM_X), 459 SRC0_NEG(0), 460 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 461 SRC1_REL(ABSOLUTE), 462 SRC1_ELEM(ELEM_X), 463 SRC1_NEG(0), 464 INDEX_MODE(SQ_INDEX_AR_X), 465 PRED_SEL(SQ_PRED_SEL_OFF), 466 LAST(0)); 467 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 468 SRC1_ABS(0), 469 UPDATE_EXECUTE_MASK(0), 470 UPDATE_PRED(0), 471 WRITE_MASK(1), 472 OMOD(SQ_ALU_OMOD_OFF), 473 ALU_INST(SQ_OP2_INST_INTERP_XY), 474 BANK_SWIZZLE(SQ_ALU_VEC_210), 475 DST_GPR(0), 476 DST_REL(ABSOLUTE), 477 DST_ELEM(ELEM_Y), 478 CLAMP(0)); 479 /* 5 */ 480 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 481 SRC0_REL(ABSOLUTE), 482 SRC0_ELEM(ELEM_Y), 483 SRC0_NEG(0), 484 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 485 SRC1_REL(ABSOLUTE), 486 SRC1_ELEM(ELEM_X), 487 SRC1_NEG(0), 488 INDEX_MODE(SQ_INDEX_AR_X), 489 PRED_SEL(SQ_PRED_SEL_OFF), 490 LAST(0)); 491 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 492 SRC1_ABS(0), 493 UPDATE_EXECUTE_MASK(0), 494 UPDATE_PRED(0), 495 WRITE_MASK(0), 496 OMOD(SQ_ALU_OMOD_OFF), 497 ALU_INST(SQ_OP2_INST_INTERP_XY), 498 BANK_SWIZZLE(SQ_ALU_VEC_210), 499 DST_GPR(0), 500 DST_REL(ABSOLUTE), 501 DST_ELEM(ELEM_Z), 502 CLAMP(0)); 503 /* 6 */ 504 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 505 SRC0_REL(ABSOLUTE), 506 SRC0_ELEM(ELEM_X), 507 SRC0_NEG(0), 508 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 509 SRC1_REL(ABSOLUTE), 510 SRC1_ELEM(ELEM_X), 511 SRC1_NEG(0), 512 INDEX_MODE(SQ_INDEX_AR_X), 513 PRED_SEL(SQ_PRED_SEL_OFF), 514 LAST(1)); 515 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 516 SRC1_ABS(0), 517 UPDATE_EXECUTE_MASK(0), 518 UPDATE_PRED(0), 519 WRITE_MASK(0), 520 OMOD(SQ_ALU_OMOD_OFF), 521 ALU_INST(SQ_OP2_INST_INTERP_XY), 522 BANK_SWIZZLE(SQ_ALU_VEC_210), 523 DST_GPR(0), 524 DST_REL(ABSOLUTE), 525 DST_ELEM(ELEM_W), 526 CLAMP(0)); 527 528 /* 7 */ 529 shader[i++] = 0x00000000; 530 shader[i++] = 0x00000000; 531 532 /* 8/9 TEX INST 0 */ 533 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 534 INST_MOD(0), 535 FETCH_WHOLE_QUAD(0), 536 RESOURCE_ID(0), 537 SRC_GPR(0), 538 SRC_REL(ABSOLUTE), 539 ALT_CONST(0), 540 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 541 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 542 shader[i++] = TEX_DWORD1(DST_GPR(0), 543 DST_REL(ABSOLUTE), 544 DST_SEL_X(SQ_SEL_X), /* R */ 545 DST_SEL_Y(SQ_SEL_Y), /* G */ 546 DST_SEL_Z(SQ_SEL_Z), /* B */ 547 DST_SEL_W(SQ_SEL_W), /* A */ 548 LOD_BIAS(0), 549 COORD_TYPE_X(TEX_UNNORMALIZED), 550 COORD_TYPE_Y(TEX_UNNORMALIZED), 551 COORD_TYPE_Z(TEX_UNNORMALIZED), 552 COORD_TYPE_W(TEX_UNNORMALIZED)); 553 shader[i++] = TEX_DWORD2(OFFSET_X(0), 554 OFFSET_Y(0), 555 OFFSET_Z(0), 556 SAMPLER_ID(0), 557 SRC_SEL_X(SQ_SEL_X), 558 SRC_SEL_Y(SQ_SEL_Y), 559 SRC_SEL_Z(SQ_SEL_0), 560 SRC_SEL_W(SQ_SEL_1)); 561 shader[i++] = TEX_DWORD_PAD; 562 563 return i; 564} 565 566int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) 567{ 568 int i = 0; 569 570 /* 0 */ 571 shader[i++] = CF_DWORD0(ADDR(6), 572 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 573 shader[i++] = CF_DWORD1(POP_COUNT(0), 574 CF_CONST(0), 575 COND(SQ_CF_COND_ACTIVE), 576 I_COUNT(2), 577 VALID_PIXEL_MODE(0), 578 END_OF_PROGRAM(0), 579 CF_INST(SQ_CF_INST_VC), 580 WHOLE_QUAD_MODE(0), 581 BARRIER(1)); 582 583 /* 1 - ALU */ 584 shader[i++] = CF_ALU_DWORD0(ADDR(4), 585 KCACHE_BANK0(0), 586 KCACHE_BANK1(0), 587 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 588 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 589 KCACHE_ADDR0(0), 590 KCACHE_ADDR1(0), 591 I_COUNT(2), 592 ALT_CONST(0), 593 CF_INST(SQ_CF_INST_ALU), 594 WHOLE_QUAD_MODE(0), 595 BARRIER(1)); 596 597 /* 2 */ 598 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 599 TYPE(SQ_EXPORT_POS), 600 RW_GPR(1), 601 RW_REL(ABSOLUTE), 602 INDEX_GPR(0), 603 ELEM_SIZE(3)); 604 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 605 SRC_SEL_Y(SQ_SEL_Y), 606 SRC_SEL_Z(SQ_SEL_Z), 607 SRC_SEL_W(SQ_SEL_W), 608 BURST_COUNT(1), 609 VALID_PIXEL_MODE(0), 610 END_OF_PROGRAM(0), 611 CF_INST(SQ_CF_INST_EXPORT_DONE), 612 MARK(0), 613 BARRIER(1)); 614 /* 3 */ 615 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 616 TYPE(SQ_EXPORT_PARAM), 617 RW_GPR(0), 618 RW_REL(ABSOLUTE), 619 INDEX_GPR(0), 620 ELEM_SIZE(3)); 621 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 622 SRC_SEL_Y(SQ_SEL_Y), 623 SRC_SEL_Z(SQ_SEL_Z), 624 SRC_SEL_W(SQ_SEL_W), 625 BURST_COUNT(1), 626 VALID_PIXEL_MODE(0), 627 END_OF_PROGRAM(1), 628 CF_INST(SQ_CF_INST_EXPORT_DONE), 629 MARK(0), 630 BARRIER(0)); 631 632 633 /* 4 texX / w */ 634 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 635 SRC0_REL(ABSOLUTE), 636 SRC0_ELEM(ELEM_X), 637 SRC0_NEG(0), 638 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 639 SRC1_REL(ABSOLUTE), 640 SRC1_ELEM(ELEM_X), 641 SRC1_NEG(0), 642 INDEX_MODE(SQ_INDEX_AR_X), 643 PRED_SEL(SQ_PRED_SEL_OFF), 644 LAST(0)); 645 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 646 SRC1_ABS(0), 647 UPDATE_EXECUTE_MASK(0), 648 UPDATE_PRED(0), 649 WRITE_MASK(1), 650 OMOD(SQ_ALU_OMOD_OFF), 651 ALU_INST(SQ_OP2_INST_MUL), 652 BANK_SWIZZLE(SQ_ALU_VEC_012), 653 DST_GPR(0), 654 DST_REL(ABSOLUTE), 655 DST_ELEM(ELEM_X), 656 CLAMP(0)); 657 658 /* 5 texY / h */ 659 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 660 SRC0_REL(ABSOLUTE), 661 SRC0_ELEM(ELEM_Y), 662 SRC0_NEG(0), 663 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 664 SRC1_REL(ABSOLUTE), 665 SRC1_ELEM(ELEM_Y), 666 SRC1_NEG(0), 667 INDEX_MODE(SQ_INDEX_AR_X), 668 PRED_SEL(SQ_PRED_SEL_OFF), 669 LAST(1)); 670 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 671 SRC1_ABS(0), 672 UPDATE_EXECUTE_MASK(0), 673 UPDATE_PRED(0), 674 WRITE_MASK(1), 675 OMOD(SQ_ALU_OMOD_OFF), 676 ALU_INST(SQ_OP2_INST_MUL), 677 BANK_SWIZZLE(SQ_ALU_VEC_012), 678 DST_GPR(0), 679 DST_REL(ABSOLUTE), 680 DST_ELEM(ELEM_Y), 681 CLAMP(0)); 682 683 /* 6/7 */ 684 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 685 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 686 FETCH_WHOLE_QUAD(0), 687 BUFFER_ID(0), 688 SRC_GPR(0), 689 SRC_REL(ABSOLUTE), 690 SRC_SEL_X(SQ_SEL_X), 691 MEGA_FETCH_COUNT(16)); 692 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 693 DST_REL(ABSOLUTE), 694 DST_SEL_X(SQ_SEL_X), 695 DST_SEL_Y(SQ_SEL_Y), 696 DST_SEL_Z(SQ_SEL_0), 697 DST_SEL_W(SQ_SEL_1), 698 USE_CONST_FIELDS(0), 699 DATA_FORMAT(FMT_32_32_FLOAT), 700 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 701 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 702 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 703 shader[i++] = VTX_DWORD2(OFFSET(0), 704#if X_BYTE_ORDER == X_BIG_ENDIAN 705 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 706#else 707 ENDIAN_SWAP(SQ_ENDIAN_NONE), 708#endif 709 CONST_BUF_NO_STRIDE(0), 710 MEGA_FETCH(1), 711 ALT_CONST(0), 712 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 713 shader[i++] = VTX_DWORD_PAD; 714 /* 8/9 */ 715 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 716 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 717 FETCH_WHOLE_QUAD(0), 718 BUFFER_ID(0), 719 SRC_GPR(0), 720 SRC_REL(ABSOLUTE), 721 SRC_SEL_X(SQ_SEL_X), 722 MEGA_FETCH_COUNT(8)); 723 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 724 DST_REL(ABSOLUTE), 725 DST_SEL_X(SQ_SEL_X), 726 DST_SEL_Y(SQ_SEL_Y), 727 DST_SEL_Z(SQ_SEL_0), 728 DST_SEL_W(SQ_SEL_1), 729 USE_CONST_FIELDS(0), 730 DATA_FORMAT(FMT_32_32_FLOAT), 731 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 732 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 733 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 734 shader[i++] = VTX_DWORD2(OFFSET(8), 735#if X_BYTE_ORDER == X_BIG_ENDIAN 736 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 737#else 738 ENDIAN_SWAP(SQ_ENDIAN_NONE), 739#endif 740 CONST_BUF_NO_STRIDE(0), 741 MEGA_FETCH(0), 742 ALT_CONST(0), 743 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 744 shader[i++] = VTX_DWORD_PAD; 745 746 return i; 747} 748 749int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) 750{ 751 int i = 0; 752 753 /* 0 */ 754 shader[i++] = CF_ALU_DWORD0(ADDR(5), 755 KCACHE_BANK0(0), 756 KCACHE_BANK1(0), 757 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 758 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 759 KCACHE_ADDR0(0), 760 KCACHE_ADDR1(0), 761 I_COUNT(4), 762 ALT_CONST(0), 763 CF_INST(SQ_CF_INST_ALU), 764 WHOLE_QUAD_MODE(0), 765 BARRIER(1)); 766 /* 1 */ 767 shader[i++] = CF_DWORD0(ADDR(21), 768 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 769 shader[i++] = CF_DWORD1(POP_COUNT(0), 770 CF_CONST(0), 771 COND(SQ_CF_COND_BOOL), 772 I_COUNT(0), 773 VALID_PIXEL_MODE(0), 774 END_OF_PROGRAM(0), 775 CF_INST(SQ_CF_INST_CALL), 776 WHOLE_QUAD_MODE(0), 777 BARRIER(0)); 778 /* 2 */ 779 shader[i++] = CF_DWORD0(ADDR(30), 780 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 781 shader[i++] = CF_DWORD1(POP_COUNT(0), 782 CF_CONST(0), 783 COND(SQ_CF_COND_NOT_BOOL), 784 I_COUNT(0), 785 VALID_PIXEL_MODE(0), 786 END_OF_PROGRAM(0), 787 CF_INST(SQ_CF_INST_CALL), 788 WHOLE_QUAD_MODE(0), 789 BARRIER(0)); 790 /* 3 */ 791 shader[i++] = CF_ALU_DWORD0(ADDR(9), 792 KCACHE_BANK0(0), 793 KCACHE_BANK1(0), 794 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 795 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 796 KCACHE_ADDR0(0), 797 KCACHE_ADDR1(0), 798 I_COUNT(12), 799 ALT_CONST(0), 800 CF_INST(SQ_CF_INST_ALU), 801 WHOLE_QUAD_MODE(0), 802 BARRIER(1)); 803 /* 4 */ 804 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 805 TYPE(SQ_EXPORT_PIXEL), 806 RW_GPR(2), 807 RW_REL(ABSOLUTE), 808 INDEX_GPR(0), 809 ELEM_SIZE(3)); 810 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 811 SRC_SEL_Y(SQ_SEL_Y), 812 SRC_SEL_Z(SQ_SEL_Z), 813 SRC_SEL_W(SQ_SEL_W), 814 BURST_COUNT(1), 815 VALID_PIXEL_MODE(0), 816 END_OF_PROGRAM(1), 817 CF_INST(SQ_CF_INST_EXPORT_DONE), 818 MARK(0), 819 BARRIER(1)); 820 /* 5 interpolate tex coords */ 821 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 822 SRC0_REL(ABSOLUTE), 823 SRC0_ELEM(ELEM_Y), 824 SRC0_NEG(0), 825 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 826 SRC1_REL(ABSOLUTE), 827 SRC1_ELEM(ELEM_X), 828 SRC1_NEG(0), 829 INDEX_MODE(SQ_INDEX_AR_X), 830 PRED_SEL(SQ_PRED_SEL_OFF), 831 LAST(0)); 832 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 833 SRC1_ABS(0), 834 UPDATE_EXECUTE_MASK(0), 835 UPDATE_PRED(0), 836 WRITE_MASK(1), 837 OMOD(SQ_ALU_OMOD_OFF), 838 ALU_INST(SQ_OP2_INST_INTERP_XY), 839 BANK_SWIZZLE(SQ_ALU_VEC_210), 840 DST_GPR(0), 841 DST_REL(ABSOLUTE), 842 DST_ELEM(ELEM_X), 843 CLAMP(0)); 844 /* 6 */ 845 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 846 SRC0_REL(ABSOLUTE), 847 SRC0_ELEM(ELEM_X), 848 SRC0_NEG(0), 849 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 850 SRC1_REL(ABSOLUTE), 851 SRC1_ELEM(ELEM_X), 852 SRC1_NEG(0), 853 INDEX_MODE(SQ_INDEX_AR_X), 854 PRED_SEL(SQ_PRED_SEL_OFF), 855 LAST(0)); 856 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 857 SRC1_ABS(0), 858 UPDATE_EXECUTE_MASK(0), 859 UPDATE_PRED(0), 860 WRITE_MASK(1), 861 OMOD(SQ_ALU_OMOD_OFF), 862 ALU_INST(SQ_OP2_INST_INTERP_XY), 863 BANK_SWIZZLE(SQ_ALU_VEC_210), 864 DST_GPR(0), 865 DST_REL(ABSOLUTE), 866 DST_ELEM(ELEM_Y), 867 CLAMP(0)); 868 /* 7 */ 869 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 870 SRC0_REL(ABSOLUTE), 871 SRC0_ELEM(ELEM_Y), 872 SRC0_NEG(0), 873 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 874 SRC1_REL(ABSOLUTE), 875 SRC1_ELEM(ELEM_X), 876 SRC1_NEG(0), 877 INDEX_MODE(SQ_INDEX_AR_X), 878 PRED_SEL(SQ_PRED_SEL_OFF), 879 LAST(0)); 880 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 881 SRC1_ABS(0), 882 UPDATE_EXECUTE_MASK(0), 883 UPDATE_PRED(0), 884 WRITE_MASK(0), 885 OMOD(SQ_ALU_OMOD_OFF), 886 ALU_INST(SQ_OP2_INST_INTERP_XY), 887 BANK_SWIZZLE(SQ_ALU_VEC_210), 888 DST_GPR(0), 889 DST_REL(ABSOLUTE), 890 DST_ELEM(ELEM_Z), 891 CLAMP(0)); 892 /* 8 */ 893 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 894 SRC0_REL(ABSOLUTE), 895 SRC0_ELEM(ELEM_X), 896 SRC0_NEG(0), 897 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 898 SRC1_REL(ABSOLUTE), 899 SRC1_ELEM(ELEM_X), 900 SRC1_NEG(0), 901 INDEX_MODE(SQ_INDEX_AR_X), 902 PRED_SEL(SQ_PRED_SEL_OFF), 903 LAST(1)); 904 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 905 SRC1_ABS(0), 906 UPDATE_EXECUTE_MASK(0), 907 UPDATE_PRED(0), 908 WRITE_MASK(0), 909 OMOD(SQ_ALU_OMOD_OFF), 910 ALU_INST(SQ_OP2_INST_INTERP_XY), 911 BANK_SWIZZLE(SQ_ALU_VEC_210), 912 DST_GPR(0), 913 DST_REL(ABSOLUTE), 914 DST_ELEM(ELEM_W), 915 CLAMP(0)); 916 917 /* 9,10,11,12 */ 918 /* r2.x = MAD(c0.w, r1.x, c0.x) */ 919 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 920 SRC0_REL(ABSOLUTE), 921 SRC0_ELEM(ELEM_W), 922 SRC0_NEG(0), 923 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 924 SRC1_REL(ABSOLUTE), 925 SRC1_ELEM(ELEM_X), 926 SRC1_NEG(0), 927 INDEX_MODE(SQ_INDEX_LOOP), 928 PRED_SEL(SQ_PRED_SEL_OFF), 929 LAST(0)); 930 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 931 SRC2_REL(ABSOLUTE), 932 SRC2_ELEM(ELEM_X), 933 SRC2_NEG(0), 934 ALU_INST(SQ_OP3_INST_MULADD), 935 BANK_SWIZZLE(SQ_ALU_VEC_012), 936 DST_GPR(2), 937 DST_REL(ABSOLUTE), 938 DST_ELEM(ELEM_X), 939 CLAMP(0)); 940 /* r2.y = MAD(c0.w, r1.x, c0.y) */ 941 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 942 SRC0_REL(ABSOLUTE), 943 SRC0_ELEM(ELEM_W), 944 SRC0_NEG(0), 945 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 946 SRC1_REL(ABSOLUTE), 947 SRC1_ELEM(ELEM_X), 948 SRC1_NEG(0), 949 INDEX_MODE(SQ_INDEX_LOOP), 950 PRED_SEL(SQ_PRED_SEL_OFF), 951 LAST(0)); 952 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 953 SRC2_REL(ABSOLUTE), 954 SRC2_ELEM(ELEM_Y), 955 SRC2_NEG(0), 956 ALU_INST(SQ_OP3_INST_MULADD), 957 BANK_SWIZZLE(SQ_ALU_VEC_012), 958 DST_GPR(2), 959 DST_REL(ABSOLUTE), 960 DST_ELEM(ELEM_Y), 961 CLAMP(0)); 962 /* r2.z = MAD(c0.w, r1.x, c0.z) */ 963 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 964 SRC0_REL(ABSOLUTE), 965 SRC0_ELEM(ELEM_W), 966 SRC0_NEG(0), 967 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 968 SRC1_REL(ABSOLUTE), 969 SRC1_ELEM(ELEM_X), 970 SRC1_NEG(0), 971 INDEX_MODE(SQ_INDEX_LOOP), 972 PRED_SEL(SQ_PRED_SEL_OFF), 973 LAST(0)); 974 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 975 SRC2_REL(ABSOLUTE), 976 SRC2_ELEM(ELEM_Z), 977 SRC2_NEG(0), 978 ALU_INST(SQ_OP3_INST_MULADD), 979 BANK_SWIZZLE(SQ_ALU_VEC_012), 980 DST_GPR(2), 981 DST_REL(ABSOLUTE), 982 DST_ELEM(ELEM_Z), 983 CLAMP(0)); 984 /* r2.w = MAD(0, 0, 1) */ 985 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 986 SRC0_REL(ABSOLUTE), 987 SRC0_ELEM(ELEM_X), 988 SRC0_NEG(0), 989 SRC1_SEL(SQ_ALU_SRC_0), 990 SRC1_REL(ABSOLUTE), 991 SRC1_ELEM(ELEM_X), 992 SRC1_NEG(0), 993 INDEX_MODE(SQ_INDEX_LOOP), 994 PRED_SEL(SQ_PRED_SEL_OFF), 995 LAST(1)); 996 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 997 SRC2_REL(ABSOLUTE), 998 SRC2_ELEM(ELEM_X), 999 SRC2_NEG(0), 1000 ALU_INST(SQ_OP3_INST_MULADD), 1001 BANK_SWIZZLE(SQ_ALU_VEC_012), 1002 DST_GPR(2), 1003 DST_REL(ABSOLUTE), 1004 DST_ELEM(ELEM_W), 1005 CLAMP(0)); 1006 1007 /* 13,14,15,16 */ 1008 /* r2.x = MAD(c1.x, r1.y, pv.x) */ 1009 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1010 SRC0_REL(ABSOLUTE), 1011 SRC0_ELEM(ELEM_X), 1012 SRC0_NEG(0), 1013 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1014 SRC1_REL(ABSOLUTE), 1015 SRC1_ELEM(ELEM_Y), 1016 SRC1_NEG(0), 1017 INDEX_MODE(SQ_INDEX_LOOP), 1018 PRED_SEL(SQ_PRED_SEL_OFF), 1019 LAST(0)); 1020 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1021 SRC2_REL(ABSOLUTE), 1022 SRC2_ELEM(ELEM_X), 1023 SRC2_NEG(0), 1024 ALU_INST(SQ_OP3_INST_MULADD), 1025 BANK_SWIZZLE(SQ_ALU_VEC_012), 1026 DST_GPR(2), 1027 DST_REL(ABSOLUTE), 1028 DST_ELEM(ELEM_X), 1029 CLAMP(0)); 1030 /* r2.y = MAD(c1.y, r1.y, pv.y) */ 1031 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1032 SRC0_REL(ABSOLUTE), 1033 SRC0_ELEM(ELEM_Y), 1034 SRC0_NEG(0), 1035 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1036 SRC1_REL(ABSOLUTE), 1037 SRC1_ELEM(ELEM_Y), 1038 SRC1_NEG(0), 1039 INDEX_MODE(SQ_INDEX_LOOP), 1040 PRED_SEL(SQ_PRED_SEL_OFF), 1041 LAST(0)); 1042 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1043 SRC2_REL(ABSOLUTE), 1044 SRC2_ELEM(ELEM_Y), 1045 SRC2_NEG(0), 1046 ALU_INST(SQ_OP3_INST_MULADD), 1047 BANK_SWIZZLE(SQ_ALU_VEC_012), 1048 DST_GPR(2), 1049 DST_REL(ABSOLUTE), 1050 DST_ELEM(ELEM_Y), 1051 CLAMP(0)); 1052 /* r2.z = MAD(c1.z, r1.y, pv.z) */ 1053 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1054 SRC0_REL(ABSOLUTE), 1055 SRC0_ELEM(ELEM_Z), 1056 SRC0_NEG(0), 1057 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1058 SRC1_REL(ABSOLUTE), 1059 SRC1_ELEM(ELEM_Y), 1060 SRC1_NEG(0), 1061 INDEX_MODE(SQ_INDEX_LOOP), 1062 PRED_SEL(SQ_PRED_SEL_OFF), 1063 LAST(0)); 1064 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1065 SRC2_REL(ABSOLUTE), 1066 SRC2_ELEM(ELEM_Z), 1067 SRC2_NEG(0), 1068 ALU_INST(SQ_OP3_INST_MULADD), 1069 BANK_SWIZZLE(SQ_ALU_VEC_012), 1070 DST_GPR(2), 1071 DST_REL(ABSOLUTE), 1072 DST_ELEM(ELEM_Z), 1073 CLAMP(0)); 1074 /* r2.w = MAD(0, 0, 1) */ 1075 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1076 SRC0_REL(ABSOLUTE), 1077 SRC0_ELEM(ELEM_X), 1078 SRC0_NEG(0), 1079 SRC1_SEL(SQ_ALU_SRC_0), 1080 SRC1_REL(ABSOLUTE), 1081 SRC1_ELEM(ELEM_X), 1082 SRC1_NEG(0), 1083 INDEX_MODE(SQ_INDEX_LOOP), 1084 PRED_SEL(SQ_PRED_SEL_OFF), 1085 LAST(1)); 1086 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1087 SRC2_REL(ABSOLUTE), 1088 SRC2_ELEM(ELEM_W), 1089 SRC2_NEG(0), 1090 ALU_INST(SQ_OP3_INST_MULADD), 1091 BANK_SWIZZLE(SQ_ALU_VEC_012), 1092 DST_GPR(2), 1093 DST_REL(ABSOLUTE), 1094 DST_ELEM(ELEM_W), 1095 CLAMP(0)); 1096 /* 17,18,19,20 */ 1097 /* r2.x = MAD(c2.x, r1.z, pv.x) */ 1098 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1099 SRC0_REL(ABSOLUTE), 1100 SRC0_ELEM(ELEM_X), 1101 SRC0_NEG(0), 1102 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1103 SRC1_REL(ABSOLUTE), 1104 SRC1_ELEM(ELEM_Z), 1105 SRC1_NEG(0), 1106 INDEX_MODE(SQ_INDEX_LOOP), 1107 PRED_SEL(SQ_PRED_SEL_OFF), 1108 LAST(0)); 1109 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1110 SRC2_REL(ABSOLUTE), 1111 SRC2_ELEM(ELEM_X), 1112 SRC2_NEG(0), 1113 ALU_INST(SQ_OP3_INST_MULADD), 1114 BANK_SWIZZLE(SQ_ALU_VEC_012), 1115 DST_GPR(2), 1116 DST_REL(ABSOLUTE), 1117 DST_ELEM(ELEM_X), 1118 CLAMP(1)); 1119 /* r2.y = MAD(c2.y, r1.z, pv.y) */ 1120 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1121 SRC0_REL(ABSOLUTE), 1122 SRC0_ELEM(ELEM_Y), 1123 SRC0_NEG(0), 1124 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1125 SRC1_REL(ABSOLUTE), 1126 SRC1_ELEM(ELEM_Z), 1127 SRC1_NEG(0), 1128 INDEX_MODE(SQ_INDEX_LOOP), 1129 PRED_SEL(SQ_PRED_SEL_OFF), 1130 LAST(0)); 1131 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1132 SRC2_REL(ABSOLUTE), 1133 SRC2_ELEM(ELEM_Y), 1134 SRC2_NEG(0), 1135 ALU_INST(SQ_OP3_INST_MULADD), 1136 BANK_SWIZZLE(SQ_ALU_VEC_012), 1137 DST_GPR(2), 1138 DST_REL(ABSOLUTE), 1139 DST_ELEM(ELEM_Y), 1140 CLAMP(1)); 1141 /* r2.z = MAD(c2.z, r1.z, pv.z) */ 1142 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1143 SRC0_REL(ABSOLUTE), 1144 SRC0_ELEM(ELEM_Z), 1145 SRC0_NEG(0), 1146 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1147 SRC1_REL(ABSOLUTE), 1148 SRC1_ELEM(ELEM_Z), 1149 SRC1_NEG(0), 1150 INDEX_MODE(SQ_INDEX_LOOP), 1151 PRED_SEL(SQ_PRED_SEL_OFF), 1152 LAST(0)); 1153 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1154 SRC2_REL(ABSOLUTE), 1155 SRC2_ELEM(ELEM_Z), 1156 SRC2_NEG(0), 1157 ALU_INST(SQ_OP3_INST_MULADD), 1158 BANK_SWIZZLE(SQ_ALU_VEC_012), 1159 DST_GPR(2), 1160 DST_REL(ABSOLUTE), 1161 DST_ELEM(ELEM_Z), 1162 CLAMP(1)); 1163 /* r2.w = MAD(0, 0, 1) */ 1164 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1165 SRC0_REL(ABSOLUTE), 1166 SRC0_ELEM(ELEM_X), 1167 SRC0_NEG(0), 1168 SRC1_SEL(SQ_ALU_SRC_0), 1169 SRC1_REL(ABSOLUTE), 1170 SRC1_ELEM(ELEM_X), 1171 SRC1_NEG(0), 1172 INDEX_MODE(SQ_INDEX_LOOP), 1173 PRED_SEL(SQ_PRED_SEL_OFF), 1174 LAST(1)); 1175 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1176 SRC2_REL(ABSOLUTE), 1177 SRC2_ELEM(ELEM_X), 1178 SRC2_NEG(0), 1179 ALU_INST(SQ_OP3_INST_MULADD), 1180 BANK_SWIZZLE(SQ_ALU_VEC_012), 1181 DST_GPR(2), 1182 DST_REL(ABSOLUTE), 1183 DST_ELEM(ELEM_W), 1184 CLAMP(1)); 1185 1186 /* 21 */ 1187 shader[i++] = CF_DWORD0(ADDR(24), 1188 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1189 shader[i++] = CF_DWORD1(POP_COUNT(0), 1190 CF_CONST(0), 1191 COND(SQ_CF_COND_ACTIVE), 1192 I_COUNT(3), 1193 VALID_PIXEL_MODE(0), 1194 END_OF_PROGRAM(0), 1195 CF_INST(SQ_CF_INST_TC), 1196 WHOLE_QUAD_MODE(0), 1197 BARRIER(1)); 1198 /* 22 */ 1199 shader[i++] = CF_DWORD0(ADDR(0), 1200 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1201 shader[i++] = CF_DWORD1(POP_COUNT(0), 1202 CF_CONST(0), 1203 COND(SQ_CF_COND_ACTIVE), 1204 I_COUNT(0), 1205 VALID_PIXEL_MODE(0), 1206 END_OF_PROGRAM(0), 1207 CF_INST(SQ_CF_INST_RETURN), 1208 WHOLE_QUAD_MODE(0), 1209 BARRIER(1)); 1210 /* 23 */ 1211 shader[i++] = 0x00000000; 1212 shader[i++] = 0x00000000; 1213 /* 24/25 */ 1214 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1215 INST_MOD(0), 1216 FETCH_WHOLE_QUAD(0), 1217 RESOURCE_ID(0), 1218 SRC_GPR(0), 1219 SRC_REL(ABSOLUTE), 1220 ALT_CONST(0), 1221 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1222 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1223 shader[i++] = TEX_DWORD1(DST_GPR(1), 1224 DST_REL(ABSOLUTE), 1225 DST_SEL_X(SQ_SEL_X), 1226 DST_SEL_Y(SQ_SEL_MASK), 1227 DST_SEL_Z(SQ_SEL_MASK), 1228 DST_SEL_W(SQ_SEL_1), 1229 LOD_BIAS(0), 1230 COORD_TYPE_X(TEX_NORMALIZED), 1231 COORD_TYPE_Y(TEX_NORMALIZED), 1232 COORD_TYPE_Z(TEX_NORMALIZED), 1233 COORD_TYPE_W(TEX_NORMALIZED)); 1234 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1235 OFFSET_Y(0), 1236 OFFSET_Z(0), 1237 SAMPLER_ID(0), 1238 SRC_SEL_X(SQ_SEL_X), 1239 SRC_SEL_Y(SQ_SEL_Y), 1240 SRC_SEL_Z(SQ_SEL_0), 1241 SRC_SEL_W(SQ_SEL_1)); 1242 shader[i++] = TEX_DWORD_PAD; 1243 /* 26/27 */ 1244 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1245 INST_MOD(0), 1246 FETCH_WHOLE_QUAD(0), 1247 RESOURCE_ID(1), 1248 SRC_GPR(0), 1249 SRC_REL(ABSOLUTE), 1250 ALT_CONST(0), 1251 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1252 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1253 shader[i++] = TEX_DWORD1(DST_GPR(1), 1254 DST_REL(ABSOLUTE), 1255 DST_SEL_X(SQ_SEL_MASK), 1256 DST_SEL_Y(SQ_SEL_MASK), 1257 DST_SEL_Z(SQ_SEL_X), 1258 DST_SEL_W(SQ_SEL_MASK), 1259 LOD_BIAS(0), 1260 COORD_TYPE_X(TEX_NORMALIZED), 1261 COORD_TYPE_Y(TEX_NORMALIZED), 1262 COORD_TYPE_Z(TEX_NORMALIZED), 1263 COORD_TYPE_W(TEX_NORMALIZED)); 1264 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1265 OFFSET_Y(0), 1266 OFFSET_Z(0), 1267 SAMPLER_ID(1), 1268 SRC_SEL_X(SQ_SEL_X), 1269 SRC_SEL_Y(SQ_SEL_Y), 1270 SRC_SEL_Z(SQ_SEL_0), 1271 SRC_SEL_W(SQ_SEL_1)); 1272 shader[i++] = TEX_DWORD_PAD; 1273 /* 28/29 */ 1274 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1275 INST_MOD(0), 1276 FETCH_WHOLE_QUAD(0), 1277 RESOURCE_ID(2), 1278 SRC_GPR(0), 1279 SRC_REL(ABSOLUTE), 1280 ALT_CONST(0), 1281 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1282 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1283 shader[i++] = TEX_DWORD1(DST_GPR(1), 1284 DST_REL(ABSOLUTE), 1285 DST_SEL_X(SQ_SEL_MASK), 1286 DST_SEL_Y(SQ_SEL_X), 1287 DST_SEL_Z(SQ_SEL_MASK), 1288 DST_SEL_W(SQ_SEL_MASK), 1289 LOD_BIAS(0), 1290 COORD_TYPE_X(TEX_NORMALIZED), 1291 COORD_TYPE_Y(TEX_NORMALIZED), 1292 COORD_TYPE_Z(TEX_NORMALIZED), 1293 COORD_TYPE_W(TEX_NORMALIZED)); 1294 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1295 OFFSET_Y(0), 1296 OFFSET_Z(0), 1297 SAMPLER_ID(2), 1298 SRC_SEL_X(SQ_SEL_X), 1299 SRC_SEL_Y(SQ_SEL_Y), 1300 SRC_SEL_Z(SQ_SEL_0), 1301 SRC_SEL_W(SQ_SEL_1)); 1302 shader[i++] = TEX_DWORD_PAD; 1303 /* 30 */ 1304 shader[i++] = CF_DWORD0(ADDR(32), 1305 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1306 shader[i++] = CF_DWORD1(POP_COUNT(0), 1307 CF_CONST(0), 1308 COND(SQ_CF_COND_ACTIVE), 1309 I_COUNT(2), 1310 VALID_PIXEL_MODE(0), 1311 END_OF_PROGRAM(0), 1312 CF_INST(SQ_CF_INST_TC), 1313 WHOLE_QUAD_MODE(0), 1314 BARRIER(1)); 1315 /* 31 */ 1316 shader[i++] = CF_DWORD0(ADDR(0), 1317 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1318 shader[i++] = CF_DWORD1(POP_COUNT(0), 1319 CF_CONST(0), 1320 COND(SQ_CF_COND_ACTIVE), 1321 I_COUNT(0), 1322 VALID_PIXEL_MODE(0), 1323 END_OF_PROGRAM(0), 1324 CF_INST(SQ_CF_INST_RETURN), 1325 WHOLE_QUAD_MODE(0), 1326 BARRIER(1)); 1327 /* 32/33 */ 1328 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1329 INST_MOD(0), 1330 FETCH_WHOLE_QUAD(0), 1331 RESOURCE_ID(0), 1332 SRC_GPR(0), 1333 SRC_REL(ABSOLUTE), 1334 ALT_CONST(0), 1335 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1336 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1337 shader[i++] = TEX_DWORD1(DST_GPR(1), 1338 DST_REL(ABSOLUTE), 1339 DST_SEL_X(SQ_SEL_X), 1340 DST_SEL_Y(SQ_SEL_MASK), 1341 DST_SEL_Z(SQ_SEL_MASK), 1342 DST_SEL_W(SQ_SEL_1), 1343 LOD_BIAS(0), 1344 COORD_TYPE_X(TEX_NORMALIZED), 1345 COORD_TYPE_Y(TEX_NORMALIZED), 1346 COORD_TYPE_Z(TEX_NORMALIZED), 1347 COORD_TYPE_W(TEX_NORMALIZED)); 1348 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1349 OFFSET_Y(0), 1350 OFFSET_Z(0), 1351 SAMPLER_ID(0), 1352 SRC_SEL_X(SQ_SEL_X), 1353 SRC_SEL_Y(SQ_SEL_Y), 1354 SRC_SEL_Z(SQ_SEL_0), 1355 SRC_SEL_W(SQ_SEL_1)); 1356 shader[i++] = TEX_DWORD_PAD; 1357 /* 34/35 */ 1358 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1359 INST_MOD(0), 1360 FETCH_WHOLE_QUAD(0), 1361 RESOURCE_ID(1), 1362 SRC_GPR(0), 1363 SRC_REL(ABSOLUTE), 1364 ALT_CONST(0), 1365 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1366 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1367 shader[i++] = TEX_DWORD1(DST_GPR(1), 1368 DST_REL(ABSOLUTE), 1369 DST_SEL_X(SQ_SEL_MASK), 1370 DST_SEL_Y(SQ_SEL_X), 1371 DST_SEL_Z(SQ_SEL_Y), 1372 DST_SEL_W(SQ_SEL_MASK), 1373 LOD_BIAS(0), 1374 COORD_TYPE_X(TEX_NORMALIZED), 1375 COORD_TYPE_Y(TEX_NORMALIZED), 1376 COORD_TYPE_Z(TEX_NORMALIZED), 1377 COORD_TYPE_W(TEX_NORMALIZED)); 1378 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1379 OFFSET_Y(0), 1380 OFFSET_Z(0), 1381 SAMPLER_ID(1), 1382 SRC_SEL_X(SQ_SEL_X), 1383 SRC_SEL_Y(SQ_SEL_Y), 1384 SRC_SEL_Z(SQ_SEL_0), 1385 SRC_SEL_W(SQ_SEL_1)); 1386 shader[i++] = TEX_DWORD_PAD; 1387 1388 return i; 1389} 1390 1391/* comp vs --------------------------------------- */ 1392int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) 1393{ 1394 int i = 0; 1395 1396 /* 0 */ 1397 shader[i++] = CF_DWORD0(ADDR(3), 1398 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1399 shader[i++] = CF_DWORD1(POP_COUNT(0), 1400 CF_CONST(0), 1401 COND(SQ_CF_COND_BOOL), 1402 I_COUNT(0), 1403 VALID_PIXEL_MODE(0), 1404 END_OF_PROGRAM(0), 1405 CF_INST(SQ_CF_INST_CALL), 1406 WHOLE_QUAD_MODE(0), 1407 BARRIER(0)); 1408 /* 1 */ 1409 shader[i++] = CF_DWORD0(ADDR(9), 1410 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1411 shader[i++] = CF_DWORD1(POP_COUNT(0), 1412 CF_CONST(0), 1413 COND(SQ_CF_COND_NOT_BOOL), 1414 I_COUNT(0), 1415 VALID_PIXEL_MODE(0), 1416 END_OF_PROGRAM(0), 1417 CF_INST(SQ_CF_INST_CALL), 1418 WHOLE_QUAD_MODE(0), 1419 BARRIER(0)); 1420 /* 2 */ 1421 shader[i++] = CF_DWORD0(ADDR(0), 1422 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1423 shader[i++] = CF_DWORD1(POP_COUNT(0), 1424 CF_CONST(0), 1425 COND(SQ_CF_COND_ACTIVE), 1426 I_COUNT(0), 1427 VALID_PIXEL_MODE(0), 1428 END_OF_PROGRAM(1), 1429 CF_INST(SQ_CF_INST_NOP), 1430 WHOLE_QUAD_MODE(0), 1431 BARRIER(1)); 1432 /* 3 - mask sub */ 1433 shader[i++] = CF_DWORD0(ADDR(44), 1434 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1435 shader[i++] = CF_DWORD1(POP_COUNT(0), 1436 CF_CONST(0), 1437 COND(SQ_CF_COND_ACTIVE), 1438 I_COUNT(3), 1439 VALID_PIXEL_MODE(0), 1440 END_OF_PROGRAM(0), 1441 CF_INST(SQ_CF_INST_VC), 1442 WHOLE_QUAD_MODE(0), 1443 BARRIER(1)); 1444 1445 /* 4 - ALU */ 1446 shader[i++] = CF_ALU_DWORD0(ADDR(14), 1447 KCACHE_BANK0(0), 1448 KCACHE_BANK1(0), 1449 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 1450 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1451 KCACHE_ADDR0(0), 1452 KCACHE_ADDR1(0), 1453 I_COUNT(20), 1454 ALT_CONST(0), 1455 CF_INST(SQ_CF_INST_ALU), 1456 WHOLE_QUAD_MODE(0), 1457 BARRIER(1)); 1458 1459 /* 5 - dst */ 1460 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1461 TYPE(SQ_EXPORT_POS), 1462 RW_GPR(2), 1463 RW_REL(ABSOLUTE), 1464 INDEX_GPR(0), 1465 ELEM_SIZE(0)); 1466 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1467 SRC_SEL_Y(SQ_SEL_Y), 1468 SRC_SEL_Z(SQ_SEL_0), 1469 SRC_SEL_W(SQ_SEL_1), 1470 BURST_COUNT(1), 1471 VALID_PIXEL_MODE(0), 1472 END_OF_PROGRAM(0), 1473 CF_INST(SQ_CF_INST_EXPORT_DONE), 1474 MARK(0), 1475 BARRIER(1)); 1476 /* 6 - src */ 1477 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1478 TYPE(SQ_EXPORT_PARAM), 1479 RW_GPR(1), 1480 RW_REL(ABSOLUTE), 1481 INDEX_GPR(0), 1482 ELEM_SIZE(0)); 1483 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1484 SRC_SEL_Y(SQ_SEL_Y), 1485 SRC_SEL_Z(SQ_SEL_0), 1486 SRC_SEL_W(SQ_SEL_1), 1487 BURST_COUNT(1), 1488 VALID_PIXEL_MODE(0), 1489 END_OF_PROGRAM(0), 1490 CF_INST(SQ_CF_INST_EXPORT), 1491 MARK(0), 1492 BARRIER(0)); 1493 /* 7 - mask */ 1494 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), 1495 TYPE(SQ_EXPORT_PARAM), 1496 RW_GPR(0), 1497 RW_REL(ABSOLUTE), 1498 INDEX_GPR(0), 1499 ELEM_SIZE(0)); 1500 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1501 SRC_SEL_Y(SQ_SEL_Y), 1502 SRC_SEL_Z(SQ_SEL_0), 1503 SRC_SEL_W(SQ_SEL_1), 1504 BURST_COUNT(1), 1505 VALID_PIXEL_MODE(0), 1506 END_OF_PROGRAM(0), 1507 CF_INST(SQ_CF_INST_EXPORT_DONE), 1508 WHOLE_QUAD_MODE(0), 1509 BARRIER(0)); 1510 /* 8 */ 1511 shader[i++] = CF_DWORD0(ADDR(0), 1512 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1513 shader[i++] = CF_DWORD1(POP_COUNT(0), 1514 CF_CONST(0), 1515 COND(SQ_CF_COND_ACTIVE), 1516 I_COUNT(0), 1517 VALID_PIXEL_MODE(0), 1518 END_OF_PROGRAM(0), 1519 CF_INST(SQ_CF_INST_RETURN), 1520 WHOLE_QUAD_MODE(0), 1521 BARRIER(1)); 1522 /* 9 - non-mask sub */ 1523 shader[i++] = CF_DWORD0(ADDR(50), 1524 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1525 shader[i++] = CF_DWORD1(POP_COUNT(0), 1526 CF_CONST(0), 1527 COND(SQ_CF_COND_ACTIVE), 1528 I_COUNT(2), 1529 VALID_PIXEL_MODE(0), 1530 END_OF_PROGRAM(0), 1531 CF_INST(SQ_CF_INST_VC), 1532 WHOLE_QUAD_MODE(0), 1533 BARRIER(1)); 1534 1535 /* 10 - ALU */ 1536 shader[i++] = CF_ALU_DWORD0(ADDR(34), 1537 KCACHE_BANK0(0), 1538 KCACHE_BANK1(0), 1539 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 1540 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1541 KCACHE_ADDR0(0), 1542 KCACHE_ADDR1(0), 1543 I_COUNT(10), 1544 ALT_CONST(0), 1545 CF_INST(SQ_CF_INST_ALU), 1546 WHOLE_QUAD_MODE(0), 1547 BARRIER(1)); 1548 1549 /* 11 - dst */ 1550 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1551 TYPE(SQ_EXPORT_POS), 1552 RW_GPR(1), 1553 RW_REL(ABSOLUTE), 1554 INDEX_GPR(0), 1555 ELEM_SIZE(0)); 1556 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1557 SRC_SEL_Y(SQ_SEL_Y), 1558 SRC_SEL_Z(SQ_SEL_0), 1559 SRC_SEL_W(SQ_SEL_1), 1560 BURST_COUNT(0), 1561 VALID_PIXEL_MODE(0), 1562 END_OF_PROGRAM(0), 1563 CF_INST(SQ_CF_INST_EXPORT_DONE), 1564 MARK(0), 1565 BARRIER(1)); 1566 /* 12 - src */ 1567 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1568 TYPE(SQ_EXPORT_PARAM), 1569 RW_GPR(0), 1570 RW_REL(ABSOLUTE), 1571 INDEX_GPR(0), 1572 ELEM_SIZE(0)); 1573 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1574 SRC_SEL_Y(SQ_SEL_Y), 1575 SRC_SEL_Z(SQ_SEL_0), 1576 SRC_SEL_W(SQ_SEL_1), 1577 BURST_COUNT(0), 1578 VALID_PIXEL_MODE(0), 1579 END_OF_PROGRAM(0), 1580 CF_INST(SQ_CF_INST_EXPORT_DONE), 1581 MARK(0), 1582 BARRIER(0)); 1583 /* 13 */ 1584 shader[i++] = CF_DWORD0(ADDR(0), 1585 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1586 shader[i++] = CF_DWORD1(POP_COUNT(0), 1587 CF_CONST(0), 1588 COND(SQ_CF_COND_ACTIVE), 1589 I_COUNT(0), 1590 VALID_PIXEL_MODE(0), 1591 END_OF_PROGRAM(0), 1592 CF_INST(SQ_CF_INST_RETURN), 1593 WHOLE_QUAD_MODE(0), 1594 BARRIER(1)); 1595 1596 /* 14 srcX.x DOT4 - mask */ 1597 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1598 SRC0_REL(ABSOLUTE), 1599 SRC0_ELEM(ELEM_X), 1600 SRC0_NEG(0), 1601 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1602 SRC1_REL(ABSOLUTE), 1603 SRC1_ELEM(ELEM_X), 1604 SRC1_NEG(0), 1605 INDEX_MODE(SQ_INDEX_LOOP), 1606 PRED_SEL(SQ_PRED_SEL_OFF), 1607 LAST(0)); 1608 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1609 SRC1_ABS(0), 1610 UPDATE_EXECUTE_MASK(0), 1611 UPDATE_PRED(0), 1612 WRITE_MASK(1), 1613 OMOD(SQ_ALU_OMOD_OFF), 1614 ALU_INST(SQ_OP2_INST_DOT4), 1615 BANK_SWIZZLE(SQ_ALU_VEC_012), 1616 DST_GPR(3), 1617 DST_REL(ABSOLUTE), 1618 DST_ELEM(ELEM_X), 1619 CLAMP(0)); 1620 1621 /* 15 srcX.y DOT4 - mask */ 1622 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1623 SRC0_REL(ABSOLUTE), 1624 SRC0_ELEM(ELEM_Y), 1625 SRC0_NEG(0), 1626 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1627 SRC1_REL(ABSOLUTE), 1628 SRC1_ELEM(ELEM_Y), 1629 SRC1_NEG(0), 1630 INDEX_MODE(SQ_INDEX_LOOP), 1631 PRED_SEL(SQ_PRED_SEL_OFF), 1632 LAST(0)); 1633 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1634 SRC1_ABS(0), 1635 UPDATE_EXECUTE_MASK(0), 1636 UPDATE_PRED(0), 1637 WRITE_MASK(0), 1638 OMOD(SQ_ALU_OMOD_OFF), 1639 ALU_INST(SQ_OP2_INST_DOT4), 1640 BANK_SWIZZLE(SQ_ALU_VEC_012), 1641 DST_GPR(3), 1642 DST_REL(ABSOLUTE), 1643 DST_ELEM(ELEM_Y), 1644 CLAMP(0)); 1645 1646 /* 16 srcX.z DOT4 - mask */ 1647 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1648 SRC0_REL(ABSOLUTE), 1649 SRC0_ELEM(ELEM_Z), 1650 SRC0_NEG(0), 1651 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1652 SRC1_REL(ABSOLUTE), 1653 SRC1_ELEM(ELEM_Z), 1654 SRC1_NEG(0), 1655 INDEX_MODE(SQ_INDEX_LOOP), 1656 PRED_SEL(SQ_PRED_SEL_OFF), 1657 LAST(0)); 1658 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1659 SRC1_ABS(0), 1660 UPDATE_EXECUTE_MASK(0), 1661 UPDATE_PRED(0), 1662 WRITE_MASK(0), 1663 OMOD(SQ_ALU_OMOD_OFF), 1664 ALU_INST(SQ_OP2_INST_DOT4), 1665 BANK_SWIZZLE(SQ_ALU_VEC_012), 1666 DST_GPR(3), 1667 DST_REL(ABSOLUTE), 1668 DST_ELEM(ELEM_Z), 1669 CLAMP(0)); 1670 1671 /* 17 srcX.w DOT4 - mask */ 1672 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1673 SRC0_REL(ABSOLUTE), 1674 SRC0_ELEM(ELEM_W), 1675 SRC0_NEG(0), 1676 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1677 SRC1_REL(ABSOLUTE), 1678 SRC1_ELEM(ELEM_W), 1679 SRC1_NEG(0), 1680 INDEX_MODE(SQ_INDEX_LOOP), 1681 PRED_SEL(SQ_PRED_SEL_OFF), 1682 LAST(1)); 1683 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1684 SRC1_ABS(0), 1685 UPDATE_EXECUTE_MASK(0), 1686 UPDATE_PRED(0), 1687 WRITE_MASK(0), 1688 OMOD(SQ_ALU_OMOD_OFF), 1689 ALU_INST(SQ_OP2_INST_DOT4), 1690 BANK_SWIZZLE(SQ_ALU_VEC_012), 1691 DST_GPR(3), 1692 DST_REL(ABSOLUTE), 1693 DST_ELEM(ELEM_W), 1694 CLAMP(0)); 1695 1696 /* 18 srcY.x DOT4 - mask */ 1697 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1698 SRC0_REL(ABSOLUTE), 1699 SRC0_ELEM(ELEM_X), 1700 SRC0_NEG(0), 1701 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1702 SRC1_REL(ABSOLUTE), 1703 SRC1_ELEM(ELEM_X), 1704 SRC1_NEG(0), 1705 INDEX_MODE(SQ_INDEX_LOOP), 1706 PRED_SEL(SQ_PRED_SEL_OFF), 1707 LAST(0)); 1708 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1709 SRC1_ABS(0), 1710 UPDATE_EXECUTE_MASK(0), 1711 UPDATE_PRED(0), 1712 WRITE_MASK(0), 1713 OMOD(SQ_ALU_OMOD_OFF), 1714 ALU_INST(SQ_OP2_INST_DOT4), 1715 BANK_SWIZZLE(SQ_ALU_VEC_012), 1716 DST_GPR(3), 1717 DST_REL(ABSOLUTE), 1718 DST_ELEM(ELEM_X), 1719 CLAMP(0)); 1720 1721 /* 19 srcY.y DOT4 - mask */ 1722 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1723 SRC0_REL(ABSOLUTE), 1724 SRC0_ELEM(ELEM_Y), 1725 SRC0_NEG(0), 1726 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1727 SRC1_REL(ABSOLUTE), 1728 SRC1_ELEM(ELEM_Y), 1729 SRC1_NEG(0), 1730 INDEX_MODE(SQ_INDEX_LOOP), 1731 PRED_SEL(SQ_PRED_SEL_OFF), 1732 LAST(0)); 1733 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1734 SRC1_ABS(0), 1735 UPDATE_EXECUTE_MASK(0), 1736 UPDATE_PRED(0), 1737 WRITE_MASK(1), 1738 OMOD(SQ_ALU_OMOD_OFF), 1739 ALU_INST(SQ_OP2_INST_DOT4), 1740 BANK_SWIZZLE(SQ_ALU_VEC_012), 1741 DST_GPR(3), 1742 DST_REL(ABSOLUTE), 1743 DST_ELEM(ELEM_Y), 1744 CLAMP(0)); 1745 1746 /* 20 srcY.z DOT4 - mask */ 1747 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1748 SRC0_REL(ABSOLUTE), 1749 SRC0_ELEM(ELEM_Z), 1750 SRC0_NEG(0), 1751 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1752 SRC1_REL(ABSOLUTE), 1753 SRC1_ELEM(ELEM_Z), 1754 SRC1_NEG(0), 1755 INDEX_MODE(SQ_INDEX_LOOP), 1756 PRED_SEL(SQ_PRED_SEL_OFF), 1757 LAST(0)); 1758 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1759 SRC1_ABS(0), 1760 UPDATE_EXECUTE_MASK(0), 1761 UPDATE_PRED(0), 1762 WRITE_MASK(0), 1763 OMOD(SQ_ALU_OMOD_OFF), 1764 ALU_INST(SQ_OP2_INST_DOT4), 1765 BANK_SWIZZLE(SQ_ALU_VEC_012), 1766 DST_GPR(3), 1767 DST_REL(ABSOLUTE), 1768 DST_ELEM(ELEM_Z), 1769 CLAMP(0)); 1770 1771 /* 21 srcY.w DOT4 - mask */ 1772 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1773 SRC0_REL(ABSOLUTE), 1774 SRC0_ELEM(ELEM_W), 1775 SRC0_NEG(0), 1776 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1777 SRC1_REL(ABSOLUTE), 1778 SRC1_ELEM(ELEM_W), 1779 SRC1_NEG(0), 1780 INDEX_MODE(SQ_INDEX_LOOP), 1781 PRED_SEL(SQ_PRED_SEL_OFF), 1782 LAST(1)); 1783 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1784 SRC1_ABS(0), 1785 UPDATE_EXECUTE_MASK(0), 1786 UPDATE_PRED(0), 1787 WRITE_MASK(0), 1788 OMOD(SQ_ALU_OMOD_OFF), 1789 ALU_INST(SQ_OP2_INST_DOT4), 1790 BANK_SWIZZLE(SQ_ALU_VEC_012), 1791 DST_GPR(3), 1792 DST_REL(ABSOLUTE), 1793 DST_ELEM(ELEM_W), 1794 CLAMP(0)); 1795 1796 /* 22 maskX.x DOT4 - mask */ 1797 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1798 SRC0_REL(ABSOLUTE), 1799 SRC0_ELEM(ELEM_X), 1800 SRC0_NEG(0), 1801 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1802 SRC1_REL(ABSOLUTE), 1803 SRC1_ELEM(ELEM_X), 1804 SRC1_NEG(0), 1805 INDEX_MODE(SQ_INDEX_LOOP), 1806 PRED_SEL(SQ_PRED_SEL_OFF), 1807 LAST(0)); 1808 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1809 SRC1_ABS(0), 1810 UPDATE_EXECUTE_MASK(0), 1811 UPDATE_PRED(0), 1812 WRITE_MASK(1), 1813 OMOD(SQ_ALU_OMOD_OFF), 1814 ALU_INST(SQ_OP2_INST_DOT4), 1815 BANK_SWIZZLE(SQ_ALU_VEC_012), 1816 DST_GPR(4), 1817 DST_REL(ABSOLUTE), 1818 DST_ELEM(ELEM_X), 1819 CLAMP(0)); 1820 1821 /* 23 maskX.y DOT4 - mask */ 1822 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1823 SRC0_REL(ABSOLUTE), 1824 SRC0_ELEM(ELEM_Y), 1825 SRC0_NEG(0), 1826 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1827 SRC1_REL(ABSOLUTE), 1828 SRC1_ELEM(ELEM_Y), 1829 SRC1_NEG(0), 1830 INDEX_MODE(SQ_INDEX_LOOP), 1831 PRED_SEL(SQ_PRED_SEL_OFF), 1832 LAST(0)); 1833 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1834 SRC1_ABS(0), 1835 UPDATE_EXECUTE_MASK(0), 1836 UPDATE_PRED(0), 1837 WRITE_MASK(0), 1838 OMOD(SQ_ALU_OMOD_OFF), 1839 ALU_INST(SQ_OP2_INST_DOT4), 1840 BANK_SWIZZLE(SQ_ALU_VEC_012), 1841 DST_GPR(4), 1842 DST_REL(ABSOLUTE), 1843 DST_ELEM(ELEM_Y), 1844 CLAMP(0)); 1845 1846 /* 24 maskX.z DOT4 - mask */ 1847 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1848 SRC0_REL(ABSOLUTE), 1849 SRC0_ELEM(ELEM_Z), 1850 SRC0_NEG(0), 1851 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1852 SRC1_REL(ABSOLUTE), 1853 SRC1_ELEM(ELEM_Z), 1854 SRC1_NEG(0), 1855 INDEX_MODE(SQ_INDEX_LOOP), 1856 PRED_SEL(SQ_PRED_SEL_OFF), 1857 LAST(0)); 1858 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1859 SRC1_ABS(0), 1860 UPDATE_EXECUTE_MASK(0), 1861 UPDATE_PRED(0), 1862 WRITE_MASK(0), 1863 OMOD(SQ_ALU_OMOD_OFF), 1864 ALU_INST(SQ_OP2_INST_DOT4), 1865 BANK_SWIZZLE(SQ_ALU_VEC_012), 1866 DST_GPR(4), 1867 DST_REL(ABSOLUTE), 1868 DST_ELEM(ELEM_Z), 1869 CLAMP(0)); 1870 1871 /* 25 maskX.w DOT4 - mask */ 1872 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1873 SRC0_REL(ABSOLUTE), 1874 SRC0_ELEM(ELEM_W), 1875 SRC0_NEG(0), 1876 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1877 SRC1_REL(ABSOLUTE), 1878 SRC1_ELEM(ELEM_W), 1879 SRC1_NEG(0), 1880 INDEX_MODE(SQ_INDEX_LOOP), 1881 PRED_SEL(SQ_PRED_SEL_OFF), 1882 LAST(1)); 1883 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1884 SRC1_ABS(0), 1885 UPDATE_EXECUTE_MASK(0), 1886 UPDATE_PRED(0), 1887 WRITE_MASK(0), 1888 OMOD(SQ_ALU_OMOD_OFF), 1889 ALU_INST(SQ_OP2_INST_DOT4), 1890 BANK_SWIZZLE(SQ_ALU_VEC_012), 1891 DST_GPR(4), 1892 DST_REL(ABSOLUTE), 1893 DST_ELEM(ELEM_W), 1894 CLAMP(0)); 1895 1896 /* 26 maskY.x DOT4 - mask */ 1897 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1898 SRC0_REL(ABSOLUTE), 1899 SRC0_ELEM(ELEM_X), 1900 SRC0_NEG(0), 1901 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1902 SRC1_REL(ABSOLUTE), 1903 SRC1_ELEM(ELEM_X), 1904 SRC1_NEG(0), 1905 INDEX_MODE(SQ_INDEX_LOOP), 1906 PRED_SEL(SQ_PRED_SEL_OFF), 1907 LAST(0)); 1908 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1909 SRC1_ABS(0), 1910 UPDATE_EXECUTE_MASK(0), 1911 UPDATE_PRED(0), 1912 WRITE_MASK(0), 1913 OMOD(SQ_ALU_OMOD_OFF), 1914 ALU_INST(SQ_OP2_INST_DOT4), 1915 BANK_SWIZZLE(SQ_ALU_VEC_012), 1916 DST_GPR(4), 1917 DST_REL(ABSOLUTE), 1918 DST_ELEM(ELEM_X), 1919 CLAMP(0)); 1920 1921 /* 27 maskY.y DOT4 - mask */ 1922 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1923 SRC0_REL(ABSOLUTE), 1924 SRC0_ELEM(ELEM_Y), 1925 SRC0_NEG(0), 1926 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1927 SRC1_REL(ABSOLUTE), 1928 SRC1_ELEM(ELEM_Y), 1929 SRC1_NEG(0), 1930 INDEX_MODE(SQ_INDEX_LOOP), 1931 PRED_SEL(SQ_PRED_SEL_OFF), 1932 LAST(0)); 1933 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1934 SRC1_ABS(0), 1935 UPDATE_EXECUTE_MASK(0), 1936 UPDATE_PRED(0), 1937 WRITE_MASK(1), 1938 OMOD(SQ_ALU_OMOD_OFF), 1939 ALU_INST(SQ_OP2_INST_DOT4), 1940 BANK_SWIZZLE(SQ_ALU_VEC_012), 1941 DST_GPR(4), 1942 DST_REL(ABSOLUTE), 1943 DST_ELEM(ELEM_Y), 1944 CLAMP(0)); 1945 1946 /* 28 maskY.z DOT4 - mask */ 1947 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1948 SRC0_REL(ABSOLUTE), 1949 SRC0_ELEM(ELEM_Z), 1950 SRC0_NEG(0), 1951 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1952 SRC1_REL(ABSOLUTE), 1953 SRC1_ELEM(ELEM_Z), 1954 SRC1_NEG(0), 1955 INDEX_MODE(SQ_INDEX_LOOP), 1956 PRED_SEL(SQ_PRED_SEL_OFF), 1957 LAST(0)); 1958 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1959 SRC1_ABS(0), 1960 UPDATE_EXECUTE_MASK(0), 1961 UPDATE_PRED(0), 1962 WRITE_MASK(0), 1963 OMOD(SQ_ALU_OMOD_OFF), 1964 ALU_INST(SQ_OP2_INST_DOT4), 1965 BANK_SWIZZLE(SQ_ALU_VEC_012), 1966 DST_GPR(4), 1967 DST_REL(ABSOLUTE), 1968 DST_ELEM(ELEM_Z), 1969 CLAMP(0)); 1970 1971 /* 29 maskY.w DOT4 - mask */ 1972 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1973 SRC0_REL(ABSOLUTE), 1974 SRC0_ELEM(ELEM_W), 1975 SRC0_NEG(0), 1976 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1977 SRC1_REL(ABSOLUTE), 1978 SRC1_ELEM(ELEM_W), 1979 SRC1_NEG(0), 1980 INDEX_MODE(SQ_INDEX_LOOP), 1981 PRED_SEL(SQ_PRED_SEL_OFF), 1982 LAST(1)); 1983 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1984 SRC1_ABS(0), 1985 UPDATE_EXECUTE_MASK(0), 1986 UPDATE_PRED(0), 1987 WRITE_MASK(0), 1988 OMOD(SQ_ALU_OMOD_OFF), 1989 ALU_INST(SQ_OP2_INST_DOT4), 1990 BANK_SWIZZLE(SQ_ALU_VEC_012), 1991 DST_GPR(4), 1992 DST_REL(ABSOLUTE), 1993 DST_ELEM(ELEM_W), 1994 CLAMP(0)); 1995 1996 /* 30 srcX / w */ 1997 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1998 SRC0_REL(ABSOLUTE), 1999 SRC0_ELEM(ELEM_X), 2000 SRC0_NEG(0), 2001 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2002 SRC1_REL(ABSOLUTE), 2003 SRC1_ELEM(ELEM_W), 2004 SRC1_NEG(0), 2005 INDEX_MODE(SQ_INDEX_AR_X), 2006 PRED_SEL(SQ_PRED_SEL_OFF), 2007 LAST(1)); 2008 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2009 SRC1_ABS(0), 2010 UPDATE_EXECUTE_MASK(0), 2011 UPDATE_PRED(0), 2012 WRITE_MASK(1), 2013 OMOD(SQ_ALU_OMOD_OFF), 2014 ALU_INST(SQ_OP2_INST_MUL), 2015 BANK_SWIZZLE(SQ_ALU_VEC_012), 2016 DST_GPR(1), 2017 DST_REL(ABSOLUTE), 2018 DST_ELEM(ELEM_X), 2019 CLAMP(0)); 2020 2021 /* 31 srcY / h */ 2022 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 2023 SRC0_REL(ABSOLUTE), 2024 SRC0_ELEM(ELEM_Y), 2025 SRC0_NEG(0), 2026 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2027 SRC1_REL(ABSOLUTE), 2028 SRC1_ELEM(ELEM_W), 2029 SRC1_NEG(0), 2030 INDEX_MODE(SQ_INDEX_AR_X), 2031 PRED_SEL(SQ_PRED_SEL_OFF), 2032 LAST(1)); 2033 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2034 SRC1_ABS(0), 2035 UPDATE_EXECUTE_MASK(0), 2036 UPDATE_PRED(0), 2037 WRITE_MASK(1), 2038 OMOD(SQ_ALU_OMOD_OFF), 2039 ALU_INST(SQ_OP2_INST_MUL), 2040 BANK_SWIZZLE(SQ_ALU_VEC_012), 2041 DST_GPR(1), 2042 DST_REL(ABSOLUTE), 2043 DST_ELEM(ELEM_Y), 2044 CLAMP(0)); 2045 2046 /* 32 maskX / w */ 2047 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 2048 SRC0_REL(ABSOLUTE), 2049 SRC0_ELEM(ELEM_X), 2050 SRC0_NEG(0), 2051 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 2052 SRC1_REL(ABSOLUTE), 2053 SRC1_ELEM(ELEM_W), 2054 SRC1_NEG(0), 2055 INDEX_MODE(SQ_INDEX_AR_X), 2056 PRED_SEL(SQ_PRED_SEL_OFF), 2057 LAST(1)); 2058 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2059 SRC1_ABS(0), 2060 UPDATE_EXECUTE_MASK(0), 2061 UPDATE_PRED(0), 2062 WRITE_MASK(1), 2063 OMOD(SQ_ALU_OMOD_OFF), 2064 ALU_INST(SQ_OP2_INST_MUL), 2065 BANK_SWIZZLE(SQ_ALU_VEC_012), 2066 DST_GPR(0), 2067 DST_REL(ABSOLUTE), 2068 DST_ELEM(ELEM_X), 2069 CLAMP(0)); 2070 2071 /* 33 maskY / h */ 2072 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 2073 SRC0_REL(ABSOLUTE), 2074 SRC0_ELEM(ELEM_Y), 2075 SRC0_NEG(0), 2076 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 2077 SRC1_REL(ABSOLUTE), 2078 SRC1_ELEM(ELEM_W), 2079 SRC1_NEG(0), 2080 INDEX_MODE(SQ_INDEX_AR_X), 2081 PRED_SEL(SQ_PRED_SEL_OFF), 2082 LAST(1)); 2083 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2084 SRC1_ABS(0), 2085 UPDATE_EXECUTE_MASK(0), 2086 UPDATE_PRED(0), 2087 WRITE_MASK(1), 2088 OMOD(SQ_ALU_OMOD_OFF), 2089 ALU_INST(SQ_OP2_INST_MUL), 2090 BANK_SWIZZLE(SQ_ALU_VEC_012), 2091 DST_GPR(0), 2092 DST_REL(ABSOLUTE), 2093 DST_ELEM(ELEM_Y), 2094 CLAMP(0)); 2095 2096 /* 34 srcX.x DOT4 - non-mask */ 2097 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2098 SRC0_REL(ABSOLUTE), 2099 SRC0_ELEM(ELEM_X), 2100 SRC0_NEG(0), 2101 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2102 SRC1_REL(ABSOLUTE), 2103 SRC1_ELEM(ELEM_X), 2104 SRC1_NEG(0), 2105 INDEX_MODE(SQ_INDEX_LOOP), 2106 PRED_SEL(SQ_PRED_SEL_OFF), 2107 LAST(0)); 2108 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2109 SRC1_ABS(0), 2110 UPDATE_EXECUTE_MASK(0), 2111 UPDATE_PRED(0), 2112 WRITE_MASK(1), 2113 OMOD(SQ_ALU_OMOD_OFF), 2114 ALU_INST(SQ_OP2_INST_DOT4), 2115 BANK_SWIZZLE(SQ_ALU_VEC_012), 2116 DST_GPR(2), 2117 DST_REL(ABSOLUTE), 2118 DST_ELEM(ELEM_X), 2119 CLAMP(0)); 2120 2121 /* 35 srcX.y DOT4 - non-mask */ 2122 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2123 SRC0_REL(ABSOLUTE), 2124 SRC0_ELEM(ELEM_Y), 2125 SRC0_NEG(0), 2126 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2127 SRC1_REL(ABSOLUTE), 2128 SRC1_ELEM(ELEM_Y), 2129 SRC1_NEG(0), 2130 INDEX_MODE(SQ_INDEX_LOOP), 2131 PRED_SEL(SQ_PRED_SEL_OFF), 2132 LAST(0)); 2133 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2134 SRC1_ABS(0), 2135 UPDATE_EXECUTE_MASK(0), 2136 UPDATE_PRED(0), 2137 WRITE_MASK(0), 2138 OMOD(SQ_ALU_OMOD_OFF), 2139 ALU_INST(SQ_OP2_INST_DOT4), 2140 BANK_SWIZZLE(SQ_ALU_VEC_012), 2141 DST_GPR(2), 2142 DST_REL(ABSOLUTE), 2143 DST_ELEM(ELEM_Y), 2144 CLAMP(0)); 2145 2146 /* 36 srcX.z DOT4 - non-mask */ 2147 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2148 SRC0_REL(ABSOLUTE), 2149 SRC0_ELEM(ELEM_Z), 2150 SRC0_NEG(0), 2151 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2152 SRC1_REL(ABSOLUTE), 2153 SRC1_ELEM(ELEM_Z), 2154 SRC1_NEG(0), 2155 INDEX_MODE(SQ_INDEX_LOOP), 2156 PRED_SEL(SQ_PRED_SEL_OFF), 2157 LAST(0)); 2158 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2159 SRC1_ABS(0), 2160 UPDATE_EXECUTE_MASK(0), 2161 UPDATE_PRED(0), 2162 WRITE_MASK(0), 2163 OMOD(SQ_ALU_OMOD_OFF), 2164 ALU_INST(SQ_OP2_INST_DOT4), 2165 BANK_SWIZZLE(SQ_ALU_VEC_012), 2166 DST_GPR(2), 2167 DST_REL(ABSOLUTE), 2168 DST_ELEM(ELEM_Z), 2169 CLAMP(0)); 2170 2171 /* 37 srcX.w DOT4 - non-mask */ 2172 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2173 SRC0_REL(ABSOLUTE), 2174 SRC0_ELEM(ELEM_W), 2175 SRC0_NEG(0), 2176 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2177 SRC1_REL(ABSOLUTE), 2178 SRC1_ELEM(ELEM_W), 2179 SRC1_NEG(0), 2180 INDEX_MODE(SQ_INDEX_LOOP), 2181 PRED_SEL(SQ_PRED_SEL_OFF), 2182 LAST(1)); 2183 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2184 SRC1_ABS(0), 2185 UPDATE_EXECUTE_MASK(0), 2186 UPDATE_PRED(0), 2187 WRITE_MASK(0), 2188 OMOD(SQ_ALU_OMOD_OFF), 2189 ALU_INST(SQ_OP2_INST_DOT4), 2190 BANK_SWIZZLE(SQ_ALU_VEC_012), 2191 DST_GPR(2), 2192 DST_REL(ABSOLUTE), 2193 DST_ELEM(ELEM_W), 2194 CLAMP(0)); 2195 2196 /* 38 srcY.x DOT4 - non-mask */ 2197 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2198 SRC0_REL(ABSOLUTE), 2199 SRC0_ELEM(ELEM_X), 2200 SRC0_NEG(0), 2201 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2202 SRC1_REL(ABSOLUTE), 2203 SRC1_ELEM(ELEM_X), 2204 SRC1_NEG(0), 2205 INDEX_MODE(SQ_INDEX_LOOP), 2206 PRED_SEL(SQ_PRED_SEL_OFF), 2207 LAST(0)); 2208 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2209 SRC1_ABS(0), 2210 UPDATE_EXECUTE_MASK(0), 2211 UPDATE_PRED(0), 2212 WRITE_MASK(0), 2213 OMOD(SQ_ALU_OMOD_OFF), 2214 ALU_INST(SQ_OP2_INST_DOT4), 2215 BANK_SWIZZLE(SQ_ALU_VEC_012), 2216 DST_GPR(2), 2217 DST_REL(ABSOLUTE), 2218 DST_ELEM(ELEM_X), 2219 CLAMP(0)); 2220 2221 /* 39 srcY.y DOT4 - non-mask */ 2222 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2223 SRC0_REL(ABSOLUTE), 2224 SRC0_ELEM(ELEM_Y), 2225 SRC0_NEG(0), 2226 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2227 SRC1_REL(ABSOLUTE), 2228 SRC1_ELEM(ELEM_Y), 2229 SRC1_NEG(0), 2230 INDEX_MODE(SQ_INDEX_LOOP), 2231 PRED_SEL(SQ_PRED_SEL_OFF), 2232 LAST(0)); 2233 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2234 SRC1_ABS(0), 2235 UPDATE_EXECUTE_MASK(0), 2236 UPDATE_PRED(0), 2237 WRITE_MASK(1), 2238 OMOD(SQ_ALU_OMOD_OFF), 2239 ALU_INST(SQ_OP2_INST_DOT4), 2240 BANK_SWIZZLE(SQ_ALU_VEC_012), 2241 DST_GPR(2), 2242 DST_REL(ABSOLUTE), 2243 DST_ELEM(ELEM_Y), 2244 CLAMP(0)); 2245 2246 /* 40 srcY.z DOT4 - non-mask */ 2247 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2248 SRC0_REL(ABSOLUTE), 2249 SRC0_ELEM(ELEM_Z), 2250 SRC0_NEG(0), 2251 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2252 SRC1_REL(ABSOLUTE), 2253 SRC1_ELEM(ELEM_Z), 2254 SRC1_NEG(0), 2255 INDEX_MODE(SQ_INDEX_LOOP), 2256 PRED_SEL(SQ_PRED_SEL_OFF), 2257 LAST(0)); 2258 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2259 SRC1_ABS(0), 2260 UPDATE_EXECUTE_MASK(0), 2261 UPDATE_PRED(0), 2262 WRITE_MASK(0), 2263 OMOD(SQ_ALU_OMOD_OFF), 2264 ALU_INST(SQ_OP2_INST_DOT4), 2265 BANK_SWIZZLE(SQ_ALU_VEC_012), 2266 DST_GPR(2), 2267 DST_REL(ABSOLUTE), 2268 DST_ELEM(ELEM_Z), 2269 CLAMP(0)); 2270 2271 /* 41 srcY.w DOT4 - non-mask */ 2272 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2273 SRC0_REL(ABSOLUTE), 2274 SRC0_ELEM(ELEM_W), 2275 SRC0_NEG(0), 2276 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2277 SRC1_REL(ABSOLUTE), 2278 SRC1_ELEM(ELEM_W), 2279 SRC1_NEG(0), 2280 INDEX_MODE(SQ_INDEX_LOOP), 2281 PRED_SEL(SQ_PRED_SEL_OFF), 2282 LAST(1)); 2283 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2284 SRC1_ABS(0), 2285 UPDATE_EXECUTE_MASK(0), 2286 UPDATE_PRED(0), 2287 WRITE_MASK(0), 2288 OMOD(SQ_ALU_OMOD_OFF), 2289 ALU_INST(SQ_OP2_INST_DOT4), 2290 BANK_SWIZZLE(SQ_ALU_VEC_012), 2291 DST_GPR(2), 2292 DST_REL(ABSOLUTE), 2293 DST_ELEM(ELEM_W), 2294 CLAMP(0)); 2295 2296 /* 42 srcX / w */ 2297 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2298 SRC0_REL(ABSOLUTE), 2299 SRC0_ELEM(ELEM_X), 2300 SRC0_NEG(0), 2301 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2302 SRC1_REL(ABSOLUTE), 2303 SRC1_ELEM(ELEM_W), 2304 SRC1_NEG(0), 2305 INDEX_MODE(SQ_INDEX_AR_X), 2306 PRED_SEL(SQ_PRED_SEL_OFF), 2307 LAST(1)); 2308 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2309 SRC1_ABS(0), 2310 UPDATE_EXECUTE_MASK(0), 2311 UPDATE_PRED(0), 2312 WRITE_MASK(1), 2313 OMOD(SQ_ALU_OMOD_OFF), 2314 ALU_INST(SQ_OP2_INST_MUL), 2315 BANK_SWIZZLE(SQ_ALU_VEC_012), 2316 DST_GPR(0), 2317 DST_REL(ABSOLUTE), 2318 DST_ELEM(ELEM_X), 2319 CLAMP(0)); 2320 2321 /* 43 srcY / h */ 2322 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2323 SRC0_REL(ABSOLUTE), 2324 SRC0_ELEM(ELEM_Y), 2325 SRC0_NEG(0), 2326 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2327 SRC1_REL(ABSOLUTE), 2328 SRC1_ELEM(ELEM_W), 2329 SRC1_NEG(0), 2330 INDEX_MODE(SQ_INDEX_AR_X), 2331 PRED_SEL(SQ_PRED_SEL_OFF), 2332 LAST(1)); 2333 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2334 SRC1_ABS(0), 2335 UPDATE_EXECUTE_MASK(0), 2336 UPDATE_PRED(0), 2337 WRITE_MASK(1), 2338 OMOD(SQ_ALU_OMOD_OFF), 2339 ALU_INST(SQ_OP2_INST_MUL), 2340 BANK_SWIZZLE(SQ_ALU_VEC_012), 2341 DST_GPR(0), 2342 DST_REL(ABSOLUTE), 2343 DST_ELEM(ELEM_Y), 2344 CLAMP(0)); 2345 2346 /* mask vfetch - 44/45 - dst */ 2347 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2348 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2349 FETCH_WHOLE_QUAD(0), 2350 BUFFER_ID(0), 2351 SRC_GPR(0), 2352 SRC_REL(ABSOLUTE), 2353 SRC_SEL_X(SQ_SEL_X), 2354 MEGA_FETCH_COUNT(24)); 2355 shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), 2356 DST_REL(0), 2357 DST_SEL_X(SQ_SEL_X), 2358 DST_SEL_Y(SQ_SEL_Y), 2359 DST_SEL_Z(SQ_SEL_0), 2360 DST_SEL_W(SQ_SEL_1), 2361 USE_CONST_FIELDS(0), 2362 DATA_FORMAT(FMT_32_32_FLOAT), 2363 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2364 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2365 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2366 shader[i++] = VTX_DWORD2(OFFSET(0), 2367#if X_BYTE_ORDER == X_BIG_ENDIAN 2368 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2369#else 2370 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2371#endif 2372 CONST_BUF_NO_STRIDE(0), 2373 MEGA_FETCH(1), 2374 ALT_CONST(0), 2375 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2376 shader[i++] = VTX_DWORD_PAD; 2377 /* 46/47 - src */ 2378 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2379 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2380 FETCH_WHOLE_QUAD(0), 2381 BUFFER_ID(0), 2382 SRC_GPR(0), 2383 SRC_REL(ABSOLUTE), 2384 SRC_SEL_X(SQ_SEL_X), 2385 MEGA_FETCH_COUNT(8)); 2386 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2387 DST_REL(0), 2388 DST_SEL_X(SQ_SEL_X), 2389 DST_SEL_Y(SQ_SEL_Y), 2390 DST_SEL_Z(SQ_SEL_1), 2391 DST_SEL_W(SQ_SEL_0), 2392 USE_CONST_FIELDS(0), 2393 DATA_FORMAT(FMT_32_32_FLOAT), 2394 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2395 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2396 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2397 shader[i++] = VTX_DWORD2(OFFSET(8), 2398#if X_BYTE_ORDER == X_BIG_ENDIAN 2399 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2400#else 2401 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2402#endif 2403 CONST_BUF_NO_STRIDE(0), 2404 MEGA_FETCH(0), 2405 ALT_CONST(0), 2406 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2407 shader[i++] = VTX_DWORD_PAD; 2408 /* 48/49 - mask */ 2409 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2410 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2411 FETCH_WHOLE_QUAD(0), 2412 BUFFER_ID(0), 2413 SRC_GPR(0), 2414 SRC_REL(ABSOLUTE), 2415 SRC_SEL_X(SQ_SEL_X), 2416 MEGA_FETCH_COUNT(8)); 2417 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2418 DST_REL(0), 2419 DST_SEL_X(SQ_SEL_X), 2420 DST_SEL_Y(SQ_SEL_Y), 2421 DST_SEL_Z(SQ_SEL_1), 2422 DST_SEL_W(SQ_SEL_0), 2423 USE_CONST_FIELDS(0), 2424 DATA_FORMAT(FMT_32_32_FLOAT), 2425 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2426 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2427 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2428 shader[i++] = VTX_DWORD2(OFFSET(16), 2429#if X_BYTE_ORDER == X_BIG_ENDIAN 2430 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2431#else 2432 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2433#endif 2434 CONST_BUF_NO_STRIDE(0), 2435 MEGA_FETCH(0), 2436 ALT_CONST(0), 2437 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2438 shader[i++] = VTX_DWORD_PAD; 2439 2440 /* no mask vfetch - 50/51 - dst */ 2441 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2442 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2443 FETCH_WHOLE_QUAD(0), 2444 BUFFER_ID(0), 2445 SRC_GPR(0), 2446 SRC_REL(ABSOLUTE), 2447 SRC_SEL_X(SQ_SEL_X), 2448 MEGA_FETCH_COUNT(16)); 2449 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2450 DST_REL(0), 2451 DST_SEL_X(SQ_SEL_X), 2452 DST_SEL_Y(SQ_SEL_Y), 2453 DST_SEL_Z(SQ_SEL_0), 2454 DST_SEL_W(SQ_SEL_1), 2455 USE_CONST_FIELDS(0), 2456 DATA_FORMAT(FMT_32_32_FLOAT), 2457 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2458 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2459 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2460 shader[i++] = VTX_DWORD2(OFFSET(0), 2461#if X_BYTE_ORDER == X_BIG_ENDIAN 2462 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2463#else 2464 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2465#endif 2466 CONST_BUF_NO_STRIDE(0), 2467 MEGA_FETCH(1), 2468 ALT_CONST(0), 2469 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2470 shader[i++] = VTX_DWORD_PAD; 2471 /* 52/53 - src */ 2472 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2473 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2474 FETCH_WHOLE_QUAD(0), 2475 BUFFER_ID(0), 2476 SRC_GPR(0), 2477 SRC_REL(ABSOLUTE), 2478 SRC_SEL_X(SQ_SEL_X), 2479 MEGA_FETCH_COUNT(8)); 2480 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2481 DST_REL(0), 2482 DST_SEL_X(SQ_SEL_X), 2483 DST_SEL_Y(SQ_SEL_Y), 2484 DST_SEL_Z(SQ_SEL_1), 2485 DST_SEL_W(SQ_SEL_0), 2486 USE_CONST_FIELDS(0), 2487 DATA_FORMAT(FMT_32_32_FLOAT), 2488 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2489 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2490 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2491 shader[i++] = VTX_DWORD2(OFFSET(8), 2492#if X_BYTE_ORDER == X_BIG_ENDIAN 2493 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2494#else 2495 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2496#endif 2497 CONST_BUF_NO_STRIDE(0), 2498 MEGA_FETCH(0), 2499 ALT_CONST(0), 2500 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2501 shader[i++] = VTX_DWORD_PAD; 2502 2503 return i; 2504} 2505 2506/* comp ps --------------------------------------- */ 2507int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) 2508{ 2509 int i = 0; 2510 2511 /* 0 */ 2512 shader[i++] = CF_DWORD0(ADDR(3), 2513 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2514 shader[i++] = CF_DWORD1(POP_COUNT(0), 2515 CF_CONST(0), 2516 COND(SQ_CF_COND_BOOL), 2517 I_COUNT(0), 2518 VALID_PIXEL_MODE(0), 2519 END_OF_PROGRAM(0), 2520 CF_INST(SQ_CF_INST_CALL), 2521 WHOLE_QUAD_MODE(0), 2522 BARRIER(0)); 2523 /* 1 */ 2524 shader[i++] = CF_DWORD0(ADDR(8), 2525 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2526 shader[i++] = CF_DWORD1(POP_COUNT(0), 2527 CF_CONST(0), 2528 COND(SQ_CF_COND_NOT_BOOL), 2529 I_COUNT(0), 2530 VALID_PIXEL_MODE(0), 2531 END_OF_PROGRAM(0), 2532 CF_INST(SQ_CF_INST_CALL), 2533 WHOLE_QUAD_MODE(0), 2534 BARRIER(0)); 2535 /* 2 */ 2536 shader[i++] = CF_DWORD0(ADDR(0), 2537 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2538 shader[i++] = CF_DWORD1(POP_COUNT(0), 2539 CF_CONST(0), 2540 COND(SQ_CF_COND_ACTIVE), 2541 I_COUNT(0), 2542 VALID_PIXEL_MODE(0), 2543 END_OF_PROGRAM(1), 2544 CF_INST(SQ_CF_INST_NOP), 2545 WHOLE_QUAD_MODE(0), 2546 BARRIER(1)); 2547 2548 /* 3 - mask sub */ 2549 shader[i++] = CF_ALU_DWORD0(ADDR(12), 2550 KCACHE_BANK0(0), 2551 KCACHE_BANK1(0), 2552 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2553 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2554 KCACHE_ADDR0(0), 2555 KCACHE_ADDR1(0), 2556 I_COUNT(8), 2557 ALT_CONST(0), 2558 CF_INST(SQ_CF_INST_ALU), 2559 WHOLE_QUAD_MODE(0), 2560 BARRIER(1)); 2561 2562 /* 4 */ 2563 shader[i++] = CF_DWORD0(ADDR(28), 2564 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2565 shader[i++] = CF_DWORD1(POP_COUNT(0), 2566 CF_CONST(0), 2567 COND(SQ_CF_COND_ACTIVE), 2568 I_COUNT(2), 2569 VALID_PIXEL_MODE(0), 2570 END_OF_PROGRAM(0), 2571 CF_INST(SQ_CF_INST_TC), 2572 WHOLE_QUAD_MODE(0), 2573 BARRIER(1)); 2574 2575 /* 5 */ 2576 shader[i++] = CF_ALU_DWORD0(ADDR(20), 2577 KCACHE_BANK0(0), 2578 KCACHE_BANK1(0), 2579 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 2580 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2581 KCACHE_ADDR0(0), 2582 KCACHE_ADDR1(0), 2583 I_COUNT(4), 2584 ALT_CONST(0), 2585 CF_INST(SQ_CF_INST_ALU), 2586 WHOLE_QUAD_MODE(0), 2587 BARRIER(1)); 2588 2589 /* 6 */ 2590 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2591 TYPE(SQ_EXPORT_PIXEL), 2592 RW_GPR(2), 2593 RW_REL(ABSOLUTE), 2594 INDEX_GPR(0), 2595 ELEM_SIZE(1)); 2596 2597 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2598 SRC_SEL_Y(SQ_SEL_Y), 2599 SRC_SEL_Z(SQ_SEL_Z), 2600 SRC_SEL_W(SQ_SEL_W), 2601 BURST_COUNT(1), 2602 VALID_PIXEL_MODE(0), 2603 END_OF_PROGRAM(0), 2604 CF_INST(SQ_CF_INST_EXPORT_DONE), 2605 MARK(0), 2606 BARRIER(1)); 2607 /* 7 */ 2608 shader[i++] = CF_DWORD0(ADDR(0), 2609 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2610 shader[i++] = CF_DWORD1(POP_COUNT(0), 2611 CF_CONST(0), 2612 COND(SQ_CF_COND_ACTIVE), 2613 I_COUNT(0), 2614 VALID_PIXEL_MODE(0), 2615 END_OF_PROGRAM(0), 2616 CF_INST(SQ_CF_INST_RETURN), 2617 WHOLE_QUAD_MODE(0), 2618 BARRIER(1)); 2619 2620 /* 8 - non-mask sub */ 2621 shader[i++] = CF_ALU_DWORD0(ADDR(24), 2622 KCACHE_BANK0(0), 2623 KCACHE_BANK1(0), 2624 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2625 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2626 KCACHE_ADDR0(0), 2627 KCACHE_ADDR1(0), 2628 I_COUNT(4), 2629 ALT_CONST(0), 2630 CF_INST(SQ_CF_INST_ALU), 2631 WHOLE_QUAD_MODE(0), 2632 BARRIER(1)); 2633 /* 9 */ 2634 shader[i++] = CF_DWORD0(ADDR(32), 2635 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2636 shader[i++] = CF_DWORD1(POP_COUNT(0), 2637 CF_CONST(0), 2638 COND(SQ_CF_COND_ACTIVE), 2639 I_COUNT(1), 2640 VALID_PIXEL_MODE(0), 2641 END_OF_PROGRAM(0), 2642 CF_INST(SQ_CF_INST_TC), 2643 WHOLE_QUAD_MODE(0), 2644 BARRIER(1)); 2645 2646 /* 10 */ 2647 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2648 TYPE(SQ_EXPORT_PIXEL), 2649 RW_GPR(0), 2650 RW_REL(ABSOLUTE), 2651 INDEX_GPR(0), 2652 ELEM_SIZE(1)); 2653 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2654 SRC_SEL_Y(SQ_SEL_Y), 2655 SRC_SEL_Z(SQ_SEL_Z), 2656 SRC_SEL_W(SQ_SEL_W), 2657 BURST_COUNT(1), 2658 VALID_PIXEL_MODE(0), 2659 END_OF_PROGRAM(0), 2660 CF_INST(SQ_CF_INST_EXPORT_DONE), 2661 MARK(0), 2662 BARRIER(1)); 2663 2664 /* 11 */ 2665 shader[i++] = CF_DWORD0(ADDR(0), 2666 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2667 shader[i++] = CF_DWORD1(POP_COUNT(0), 2668 CF_CONST(0), 2669 COND(SQ_CF_COND_ACTIVE), 2670 I_COUNT(0), 2671 VALID_PIXEL_MODE(0), 2672 END_OF_PROGRAM(0), 2673 CF_INST(SQ_CF_INST_RETURN), 2674 WHOLE_QUAD_MODE(0), 2675 BARRIER(1)); 2676 2677 /* 12 interpolate src tex coords - mask */ 2678 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2679 SRC0_REL(ABSOLUTE), 2680 SRC0_ELEM(ELEM_Y), 2681 SRC0_NEG(0), 2682 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2683 SRC1_REL(ABSOLUTE), 2684 SRC1_ELEM(ELEM_X), 2685 SRC1_NEG(0), 2686 INDEX_MODE(SQ_INDEX_AR_X), 2687 PRED_SEL(SQ_PRED_SEL_OFF), 2688 LAST(0)); 2689 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2690 SRC1_ABS(0), 2691 UPDATE_EXECUTE_MASK(0), 2692 UPDATE_PRED(0), 2693 WRITE_MASK(1), 2694 OMOD(SQ_ALU_OMOD_OFF), 2695 ALU_INST(SQ_OP2_INST_INTERP_XY), 2696 BANK_SWIZZLE(SQ_ALU_VEC_210), 2697 DST_GPR(1), 2698 DST_REL(ABSOLUTE), 2699 DST_ELEM(ELEM_X), 2700 CLAMP(0)); 2701 /* 13 */ 2702 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2703 SRC0_REL(ABSOLUTE), 2704 SRC0_ELEM(ELEM_X), 2705 SRC0_NEG(0), 2706 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2707 SRC1_REL(ABSOLUTE), 2708 SRC1_ELEM(ELEM_X), 2709 SRC1_NEG(0), 2710 INDEX_MODE(SQ_INDEX_AR_X), 2711 PRED_SEL(SQ_PRED_SEL_OFF), 2712 LAST(0)); 2713 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2714 SRC1_ABS(0), 2715 UPDATE_EXECUTE_MASK(0), 2716 UPDATE_PRED(0), 2717 WRITE_MASK(1), 2718 OMOD(SQ_ALU_OMOD_OFF), 2719 ALU_INST(SQ_OP2_INST_INTERP_XY), 2720 BANK_SWIZZLE(SQ_ALU_VEC_210), 2721 DST_GPR(1), 2722 DST_REL(ABSOLUTE), 2723 DST_ELEM(ELEM_Y), 2724 CLAMP(0)); 2725 /* 14 */ 2726 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2727 SRC0_REL(ABSOLUTE), 2728 SRC0_ELEM(ELEM_Y), 2729 SRC0_NEG(0), 2730 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2731 SRC1_REL(ABSOLUTE), 2732 SRC1_ELEM(ELEM_X), 2733 SRC1_NEG(0), 2734 INDEX_MODE(SQ_INDEX_AR_X), 2735 PRED_SEL(SQ_PRED_SEL_OFF), 2736 LAST(0)); 2737 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2738 SRC1_ABS(0), 2739 UPDATE_EXECUTE_MASK(0), 2740 UPDATE_PRED(0), 2741 WRITE_MASK(0), 2742 OMOD(SQ_ALU_OMOD_OFF), 2743 ALU_INST(SQ_OP2_INST_INTERP_XY), 2744 BANK_SWIZZLE(SQ_ALU_VEC_210), 2745 DST_GPR(1), 2746 DST_REL(ABSOLUTE), 2747 DST_ELEM(ELEM_Z), 2748 CLAMP(0)); 2749 /* 15 */ 2750 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2751 SRC0_REL(ABSOLUTE), 2752 SRC0_ELEM(ELEM_X), 2753 SRC0_NEG(0), 2754 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2755 SRC1_REL(ABSOLUTE), 2756 SRC1_ELEM(ELEM_X), 2757 SRC1_NEG(0), 2758 INDEX_MODE(SQ_INDEX_AR_X), 2759 PRED_SEL(SQ_PRED_SEL_OFF), 2760 LAST(1)); 2761 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2762 SRC1_ABS(0), 2763 UPDATE_EXECUTE_MASK(0), 2764 UPDATE_PRED(0), 2765 WRITE_MASK(0), 2766 OMOD(SQ_ALU_OMOD_OFF), 2767 ALU_INST(SQ_OP2_INST_INTERP_XY), 2768 BANK_SWIZZLE(SQ_ALU_VEC_210), 2769 DST_GPR(1), 2770 DST_REL(ABSOLUTE), 2771 DST_ELEM(ELEM_W), 2772 CLAMP(0)); 2773 2774 /* 16 interpolate mask tex coords */ 2775 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2776 SRC0_REL(ABSOLUTE), 2777 SRC0_ELEM(ELEM_Y), 2778 SRC0_NEG(0), 2779 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2780 SRC1_REL(ABSOLUTE), 2781 SRC1_ELEM(ELEM_X), 2782 SRC1_NEG(0), 2783 INDEX_MODE(SQ_INDEX_AR_X), 2784 PRED_SEL(SQ_PRED_SEL_OFF), 2785 LAST(0)); 2786 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2787 SRC1_ABS(0), 2788 UPDATE_EXECUTE_MASK(0), 2789 UPDATE_PRED(0), 2790 WRITE_MASK(1), 2791 OMOD(SQ_ALU_OMOD_OFF), 2792 ALU_INST(SQ_OP2_INST_INTERP_XY), 2793 BANK_SWIZZLE(SQ_ALU_VEC_210), 2794 DST_GPR(0), 2795 DST_REL(ABSOLUTE), 2796 DST_ELEM(ELEM_X), 2797 CLAMP(0)); 2798 /* 17 */ 2799 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2800 SRC0_REL(ABSOLUTE), 2801 SRC0_ELEM(ELEM_X), 2802 SRC0_NEG(0), 2803 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2804 SRC1_REL(ABSOLUTE), 2805 SRC1_ELEM(ELEM_X), 2806 SRC1_NEG(0), 2807 INDEX_MODE(SQ_INDEX_AR_X), 2808 PRED_SEL(SQ_PRED_SEL_OFF), 2809 LAST(0)); 2810 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2811 SRC1_ABS(0), 2812 UPDATE_EXECUTE_MASK(0), 2813 UPDATE_PRED(0), 2814 WRITE_MASK(1), 2815 OMOD(SQ_ALU_OMOD_OFF), 2816 ALU_INST(SQ_OP2_INST_INTERP_XY), 2817 BANK_SWIZZLE(SQ_ALU_VEC_210), 2818 DST_GPR(0), 2819 DST_REL(ABSOLUTE), 2820 DST_ELEM(ELEM_Y), 2821 CLAMP(0)); 2822 /* 18 */ 2823 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2824 SRC0_REL(ABSOLUTE), 2825 SRC0_ELEM(ELEM_Y), 2826 SRC0_NEG(0), 2827 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2828 SRC1_REL(ABSOLUTE), 2829 SRC1_ELEM(ELEM_X), 2830 SRC1_NEG(0), 2831 INDEX_MODE(SQ_INDEX_AR_X), 2832 PRED_SEL(SQ_PRED_SEL_OFF), 2833 LAST(0)); 2834 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2835 SRC1_ABS(0), 2836 UPDATE_EXECUTE_MASK(0), 2837 UPDATE_PRED(0), 2838 WRITE_MASK(0), 2839 OMOD(SQ_ALU_OMOD_OFF), 2840 ALU_INST(SQ_OP2_INST_INTERP_XY), 2841 BANK_SWIZZLE(SQ_ALU_VEC_210), 2842 DST_GPR(0), 2843 DST_REL(ABSOLUTE), 2844 DST_ELEM(ELEM_Z), 2845 CLAMP(0)); 2846 /* 19 */ 2847 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2848 SRC0_REL(ABSOLUTE), 2849 SRC0_ELEM(ELEM_X), 2850 SRC0_NEG(0), 2851 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2852 SRC1_REL(ABSOLUTE), 2853 SRC1_ELEM(ELEM_X), 2854 SRC1_NEG(0), 2855 INDEX_MODE(SQ_INDEX_AR_X), 2856 PRED_SEL(SQ_PRED_SEL_OFF), 2857 LAST(1)); 2858 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2859 SRC1_ABS(0), 2860 UPDATE_EXECUTE_MASK(0), 2861 UPDATE_PRED(0), 2862 WRITE_MASK(0), 2863 OMOD(SQ_ALU_OMOD_OFF), 2864 ALU_INST(SQ_OP2_INST_INTERP_XY), 2865 BANK_SWIZZLE(SQ_ALU_VEC_210), 2866 DST_GPR(0), 2867 DST_REL(ABSOLUTE), 2868 DST_ELEM(ELEM_W), 2869 CLAMP(0)); 2870 2871 /* 20 - alu 0 */ 2872 /* MUL gpr[2].x gpr[0].x gpr[1].x */ 2873 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2874 SRC0_REL(ABSOLUTE), 2875 SRC0_ELEM(ELEM_X), 2876 SRC0_NEG(0), 2877 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2878 SRC1_REL(ABSOLUTE), 2879 SRC1_ELEM(ELEM_X), 2880 SRC1_NEG(0), 2881 INDEX_MODE(SQ_INDEX_LOOP), 2882 PRED_SEL(SQ_PRED_SEL_OFF), 2883 LAST(0)); 2884 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2885 SRC1_ABS(0), 2886 UPDATE_EXECUTE_MASK(0), 2887 UPDATE_PRED(0), 2888 WRITE_MASK(1), 2889 OMOD(SQ_ALU_OMOD_OFF), 2890 ALU_INST(SQ_OP2_INST_MUL), 2891 BANK_SWIZZLE(SQ_ALU_VEC_012), 2892 DST_GPR(2), 2893 DST_REL(ABSOLUTE), 2894 DST_ELEM(ELEM_X), 2895 CLAMP(1)); 2896 /* 21 - alu 1 */ 2897 /* MUL gpr[2].y gpr[0].y gpr[1].y */ 2898 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2899 SRC0_REL(ABSOLUTE), 2900 SRC0_ELEM(ELEM_Y), 2901 SRC0_NEG(0), 2902 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2903 SRC1_REL(ABSOLUTE), 2904 SRC1_ELEM(ELEM_Y), 2905 SRC1_NEG(0), 2906 INDEX_MODE(SQ_INDEX_LOOP), 2907 PRED_SEL(SQ_PRED_SEL_OFF), 2908 LAST(0)); 2909 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2910 SRC1_ABS(0), 2911 UPDATE_EXECUTE_MASK(0), 2912 UPDATE_PRED(0), 2913 WRITE_MASK(1), 2914 OMOD(SQ_ALU_OMOD_OFF), 2915 ALU_INST(SQ_OP2_INST_MUL), 2916 BANK_SWIZZLE(SQ_ALU_VEC_012), 2917 DST_GPR(2), 2918 DST_REL(ABSOLUTE), 2919 DST_ELEM(ELEM_Y), 2920 CLAMP(1)); 2921 /* 22 - alu 2 */ 2922 /* MUL gpr[2].z gpr[0].z gpr[1].z */ 2923 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2924 SRC0_REL(ABSOLUTE), 2925 SRC0_ELEM(ELEM_Z), 2926 SRC0_NEG(0), 2927 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2928 SRC1_REL(ABSOLUTE), 2929 SRC1_ELEM(ELEM_Z), 2930 SRC1_NEG(0), 2931 INDEX_MODE(SQ_INDEX_LOOP), 2932 PRED_SEL(SQ_PRED_SEL_OFF), 2933 LAST(0)); 2934 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2935 SRC1_ABS(0), 2936 UPDATE_EXECUTE_MASK(0), 2937 UPDATE_PRED(0), 2938 WRITE_MASK(1), 2939 OMOD(SQ_ALU_OMOD_OFF), 2940 ALU_INST(SQ_OP2_INST_MUL), 2941 BANK_SWIZZLE(SQ_ALU_VEC_012), 2942 DST_GPR(2), 2943 DST_REL(ABSOLUTE), 2944 DST_ELEM(ELEM_Z), 2945 CLAMP(1)); 2946 /* 23 - alu 3 */ 2947 /* MUL gpr[2].w gpr[0].w gpr[1].w */ 2948 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2949 SRC0_REL(ABSOLUTE), 2950 SRC0_ELEM(ELEM_W), 2951 SRC0_NEG(0), 2952 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2953 SRC1_REL(ABSOLUTE), 2954 SRC1_ELEM(ELEM_W), 2955 SRC1_NEG(0), 2956 INDEX_MODE(SQ_INDEX_LOOP), 2957 PRED_SEL(SQ_PRED_SEL_OFF), 2958 LAST(1)); 2959 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2960 SRC1_ABS(0), 2961 UPDATE_EXECUTE_MASK(0), 2962 UPDATE_PRED(0), 2963 WRITE_MASK(1), 2964 OMOD(SQ_ALU_OMOD_OFF), 2965 ALU_INST(SQ_OP2_INST_MUL), 2966 BANK_SWIZZLE(SQ_ALU_VEC_012), 2967 DST_GPR(2), 2968 DST_REL(ABSOLUTE), 2969 DST_ELEM(ELEM_W), 2970 CLAMP(1)); 2971 2972 /* 24 - interpolate tex coords - non-mask */ 2973 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2974 SRC0_REL(ABSOLUTE), 2975 SRC0_ELEM(ELEM_Y), 2976 SRC0_NEG(0), 2977 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2978 SRC1_REL(ABSOLUTE), 2979 SRC1_ELEM(ELEM_X), 2980 SRC1_NEG(0), 2981 INDEX_MODE(SQ_INDEX_AR_X), 2982 PRED_SEL(SQ_PRED_SEL_OFF), 2983 LAST(0)); 2984 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2985 SRC1_ABS(0), 2986 UPDATE_EXECUTE_MASK(0), 2987 UPDATE_PRED(0), 2988 WRITE_MASK(1), 2989 OMOD(SQ_ALU_OMOD_OFF), 2990 ALU_INST(SQ_OP2_INST_INTERP_XY), 2991 BANK_SWIZZLE(SQ_ALU_VEC_210), 2992 DST_GPR(0), 2993 DST_REL(ABSOLUTE), 2994 DST_ELEM(ELEM_X), 2995 CLAMP(0)); 2996 /* 25 */ 2997 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2998 SRC0_REL(ABSOLUTE), 2999 SRC0_ELEM(ELEM_X), 3000 SRC0_NEG(0), 3001 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 3002 SRC1_REL(ABSOLUTE), 3003 SRC1_ELEM(ELEM_X), 3004 SRC1_NEG(0), 3005 INDEX_MODE(SQ_INDEX_AR_X), 3006 PRED_SEL(SQ_PRED_SEL_OFF), 3007 LAST(0)); 3008 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3009 SRC1_ABS(0), 3010 UPDATE_EXECUTE_MASK(0), 3011 UPDATE_PRED(0), 3012 WRITE_MASK(1), 3013 OMOD(SQ_ALU_OMOD_OFF), 3014 ALU_INST(SQ_OP2_INST_INTERP_XY), 3015 BANK_SWIZZLE(SQ_ALU_VEC_210), 3016 DST_GPR(0), 3017 DST_REL(ABSOLUTE), 3018 DST_ELEM(ELEM_Y), 3019 CLAMP(0)); 3020 /* 26 */ 3021 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3022 SRC0_REL(ABSOLUTE), 3023 SRC0_ELEM(ELEM_Y), 3024 SRC0_NEG(0), 3025 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 3026 SRC1_REL(ABSOLUTE), 3027 SRC1_ELEM(ELEM_X), 3028 SRC1_NEG(0), 3029 INDEX_MODE(SQ_INDEX_AR_X), 3030 PRED_SEL(SQ_PRED_SEL_OFF), 3031 LAST(0)); 3032 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3033 SRC1_ABS(0), 3034 UPDATE_EXECUTE_MASK(0), 3035 UPDATE_PRED(0), 3036 WRITE_MASK(0), 3037 OMOD(SQ_ALU_OMOD_OFF), 3038 ALU_INST(SQ_OP2_INST_INTERP_XY), 3039 BANK_SWIZZLE(SQ_ALU_VEC_210), 3040 DST_GPR(0), 3041 DST_REL(ABSOLUTE), 3042 DST_ELEM(ELEM_Z), 3043 CLAMP(0)); 3044 /* 27 */ 3045 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3046 SRC0_REL(ABSOLUTE), 3047 SRC0_ELEM(ELEM_X), 3048 SRC0_NEG(0), 3049 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 3050 SRC1_REL(ABSOLUTE), 3051 SRC1_ELEM(ELEM_X), 3052 SRC1_NEG(0), 3053 INDEX_MODE(SQ_INDEX_AR_X), 3054 PRED_SEL(SQ_PRED_SEL_OFF), 3055 LAST(1)); 3056 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3057 SRC1_ABS(0), 3058 UPDATE_EXECUTE_MASK(0), 3059 UPDATE_PRED(0), 3060 WRITE_MASK(0), 3061 OMOD(SQ_ALU_OMOD_OFF), 3062 ALU_INST(SQ_OP2_INST_INTERP_XY), 3063 BANK_SWIZZLE(SQ_ALU_VEC_210), 3064 DST_GPR(0), 3065 DST_REL(ABSOLUTE), 3066 DST_ELEM(ELEM_W), 3067 CLAMP(0)); 3068 3069 /* 28/29 - src - mask */ 3070 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3071 INST_MOD(0), 3072 FETCH_WHOLE_QUAD(0), 3073 RESOURCE_ID(0), 3074 SRC_GPR(1), 3075 SRC_REL(ABSOLUTE), 3076 ALT_CONST(0), 3077 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3078 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3079 shader[i++] = TEX_DWORD1(DST_GPR(1), 3080 DST_REL(ABSOLUTE), 3081 DST_SEL_X(SQ_SEL_X), 3082 DST_SEL_Y(SQ_SEL_Y), 3083 DST_SEL_Z(SQ_SEL_Z), 3084 DST_SEL_W(SQ_SEL_W), 3085 LOD_BIAS(0), 3086 COORD_TYPE_X(TEX_NORMALIZED), 3087 COORD_TYPE_Y(TEX_NORMALIZED), 3088 COORD_TYPE_Z(TEX_NORMALIZED), 3089 COORD_TYPE_W(TEX_NORMALIZED)); 3090 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3091 OFFSET_Y(0), 3092 OFFSET_Z(0), 3093 SAMPLER_ID(0), 3094 SRC_SEL_X(SQ_SEL_X), 3095 SRC_SEL_Y(SQ_SEL_Y), 3096 SRC_SEL_Z(SQ_SEL_0), 3097 SRC_SEL_W(SQ_SEL_1)); 3098 shader[i++] = TEX_DWORD_PAD; 3099 /* 30/31 - mask */ 3100 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3101 INST_MOD(0), 3102 FETCH_WHOLE_QUAD(0), 3103 RESOURCE_ID(1), 3104 SRC_GPR(0), 3105 SRC_REL(ABSOLUTE), 3106 ALT_CONST(0), 3107 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3108 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3109 shader[i++] = TEX_DWORD1(DST_GPR(0), 3110 DST_REL(ABSOLUTE), 3111 DST_SEL_X(SQ_SEL_X), 3112 DST_SEL_Y(SQ_SEL_Y), 3113 DST_SEL_Z(SQ_SEL_Z), 3114 DST_SEL_W(SQ_SEL_W), 3115 LOD_BIAS(0), 3116 COORD_TYPE_X(TEX_NORMALIZED), 3117 COORD_TYPE_Y(TEX_NORMALIZED), 3118 COORD_TYPE_Z(TEX_NORMALIZED), 3119 COORD_TYPE_W(TEX_NORMALIZED)); 3120 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3121 OFFSET_Y(0), 3122 OFFSET_Z(0), 3123 SAMPLER_ID(1), 3124 SRC_SEL_X(SQ_SEL_X), 3125 SRC_SEL_Y(SQ_SEL_Y), 3126 SRC_SEL_Z(SQ_SEL_0), 3127 SRC_SEL_W(SQ_SEL_1)); 3128 shader[i++] = TEX_DWORD_PAD; 3129 3130 /* 32/33 - src - non-mask */ 3131 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3132 INST_MOD(0), 3133 FETCH_WHOLE_QUAD(0), 3134 RESOURCE_ID(0), 3135 SRC_GPR(0), 3136 SRC_REL(ABSOLUTE), 3137 ALT_CONST(0), 3138 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3139 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3140 shader[i++] = TEX_DWORD1(DST_GPR(0), 3141 DST_REL(ABSOLUTE), 3142 DST_SEL_X(SQ_SEL_X), 3143 DST_SEL_Y(SQ_SEL_Y), 3144 DST_SEL_Z(SQ_SEL_Z), 3145 DST_SEL_W(SQ_SEL_W), 3146 LOD_BIAS(0), 3147 COORD_TYPE_X(TEX_NORMALIZED), 3148 COORD_TYPE_Y(TEX_NORMALIZED), 3149 COORD_TYPE_Z(TEX_NORMALIZED), 3150 COORD_TYPE_W(TEX_NORMALIZED)); 3151 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3152 OFFSET_Y(0), 3153 OFFSET_Z(0), 3154 SAMPLER_ID(0), 3155 SRC_SEL_X(SQ_SEL_X), 3156 SRC_SEL_Y(SQ_SEL_Y), 3157 SRC_SEL_Z(SQ_SEL_0), 3158 SRC_SEL_W(SQ_SEL_1)); 3159 shader[i++] = TEX_DWORD_PAD; 3160 3161 return i; 3162} 3163 3164#endif 3165