1/* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#ifdef XF86DRM_MODE 32 33#include "xf86.h" 34 35#include "evergreen_shader.h" 36#include "evergreen_reg.h" 37 38/* solid vs --------------------------------------- */ 39int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) 40{ 41 int i = 0; 42 43 /* 0 */ 44 shader[i++] = CF_DWORD0(ADDR(4), 45 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 46 shader[i++] = CF_DWORD1(POP_COUNT(0), 47 CF_CONST(0), 48 COND(SQ_CF_COND_ACTIVE), 49 I_COUNT(1), 50 VALID_PIXEL_MODE(0), 51 END_OF_PROGRAM(0), 52 CF_INST(SQ_CF_INST_VC), 53 WHOLE_QUAD_MODE(0), 54 BARRIER(1)); 55 /* 1 */ 56 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 57 TYPE(SQ_EXPORT_POS), 58 RW_GPR(1), 59 RW_REL(ABSOLUTE), 60 INDEX_GPR(0), 61 ELEM_SIZE(0)); 62 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 63 SRC_SEL_Y(SQ_SEL_Y), 64 SRC_SEL_Z(SQ_SEL_Z), 65 SRC_SEL_W(SQ_SEL_W), 66 BURST_COUNT(1), 67 VALID_PIXEL_MODE(0), 68 END_OF_PROGRAM(0), 69 CF_INST(SQ_CF_INST_EXPORT_DONE), 70 MARK(0), 71 BARRIER(1)); 72 /* 2 - always export a param whether it's used or not */ 73 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 74 TYPE(SQ_EXPORT_PARAM), 75 RW_GPR(0), 76 RW_REL(ABSOLUTE), 77 INDEX_GPR(0), 78 ELEM_SIZE(0)); 79 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 80 SRC_SEL_Y(SQ_SEL_Y), 81 SRC_SEL_Z(SQ_SEL_Z), 82 SRC_SEL_W(SQ_SEL_W), 83 BURST_COUNT(0), 84 VALID_PIXEL_MODE(0), 85 END_OF_PROGRAM(1), 86 CF_INST(SQ_CF_INST_EXPORT_DONE), 87 MARK(0), 88 BARRIER(0)); 89 /* 3 - padding */ 90 shader[i++] = 0x00000000; 91 shader[i++] = 0x00000000; 92 /* 4/5 */ 93 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 94 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 95 FETCH_WHOLE_QUAD(0), 96 BUFFER_ID(0), 97 SRC_GPR(0), 98 SRC_REL(ABSOLUTE), 99 SRC_SEL_X(SQ_SEL_X), 100 MEGA_FETCH_COUNT(8)); 101 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 102 DST_REL(0), 103 DST_SEL_X(SQ_SEL_X), 104 DST_SEL_Y(SQ_SEL_Y), 105 DST_SEL_Z(SQ_SEL_0), 106 DST_SEL_W(SQ_SEL_1), 107 USE_CONST_FIELDS(0), 108 DATA_FORMAT(FMT_32_32_FLOAT), 109 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 110 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 111 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 112 shader[i++] = VTX_DWORD2(OFFSET(0), 113#if X_BYTE_ORDER == X_BIG_ENDIAN 114 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 115#else 116 ENDIAN_SWAP(SQ_ENDIAN_NONE), 117#endif 118 CONST_BUF_NO_STRIDE(0), 119 MEGA_FETCH(1), 120 ALT_CONST(0), 121 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 122 shader[i++] = VTX_DWORD_PAD; 123 124 return i; 125} 126 127/* solid ps --------------------------------------- */ 128int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) 129{ 130 int i = 0; 131 132 /* 0 */ 133 shader[i++] = CF_ALU_DWORD0(ADDR(2), 134 KCACHE_BANK0(0), 135 KCACHE_BANK1(0), 136 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 137 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 138 KCACHE_ADDR0(0), 139 KCACHE_ADDR1(0), 140 I_COUNT(4), 141 ALT_CONST(0), 142 CF_INST(SQ_CF_INST_ALU), 143 WHOLE_QUAD_MODE(0), 144 BARRIER(1)); 145 /* 1 */ 146 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 147 TYPE(SQ_EXPORT_PIXEL), 148 RW_GPR(0), 149 RW_REL(ABSOLUTE), 150 INDEX_GPR(0), 151 ELEM_SIZE(1)); 152 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 153 SRC_SEL_Y(SQ_SEL_Y), 154 SRC_SEL_Z(SQ_SEL_Z), 155 SRC_SEL_W(SQ_SEL_W), 156 BURST_COUNT(1), 157 VALID_PIXEL_MODE(0), 158 END_OF_PROGRAM(1), 159 CF_INST(SQ_CF_INST_EXPORT_DONE), 160 MARK(0), 161 BARRIER(1)); 162 163 /* 2 */ 164 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 165 SRC0_REL(ABSOLUTE), 166 SRC0_ELEM(ELEM_X), 167 SRC0_NEG(0), 168 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 169 SRC1_REL(ABSOLUTE), 170 SRC1_ELEM(ELEM_X), 171 SRC1_NEG(0), 172 INDEX_MODE(SQ_INDEX_AR_X), 173 PRED_SEL(SQ_PRED_SEL_OFF), 174 LAST(0)); 175 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 176 SRC1_ABS(0), 177 UPDATE_EXECUTE_MASK(0), 178 UPDATE_PRED(0), 179 WRITE_MASK(1), 180 OMOD(SQ_ALU_OMOD_OFF), 181 ALU_INST(SQ_OP2_INST_MOV), 182 BANK_SWIZZLE(SQ_ALU_VEC_012), 183 DST_GPR(0), 184 DST_REL(ABSOLUTE), 185 DST_ELEM(ELEM_X), 186 CLAMP(1)); 187 /* 3 */ 188 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 189 SRC0_REL(ABSOLUTE), 190 SRC0_ELEM(ELEM_Y), 191 SRC0_NEG(0), 192 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 193 SRC1_REL(ABSOLUTE), 194 SRC1_ELEM(ELEM_Y), 195 SRC1_NEG(0), 196 INDEX_MODE(SQ_INDEX_AR_X), 197 PRED_SEL(SQ_PRED_SEL_OFF), 198 LAST(0)); 199 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 200 SRC1_ABS(0), 201 UPDATE_EXECUTE_MASK(0), 202 UPDATE_PRED(0), 203 WRITE_MASK(1), 204 OMOD(SQ_ALU_OMOD_OFF), 205 ALU_INST(SQ_OP2_INST_MOV), 206 BANK_SWIZZLE(SQ_ALU_VEC_012), 207 DST_GPR(0), 208 DST_REL(ABSOLUTE), 209 DST_ELEM(ELEM_Y), 210 CLAMP(1)); 211 /* 4 */ 212 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 213 SRC0_REL(ABSOLUTE), 214 SRC0_ELEM(ELEM_Z), 215 SRC0_NEG(0), 216 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 217 SRC1_REL(ABSOLUTE), 218 SRC1_ELEM(ELEM_Z), 219 SRC1_NEG(0), 220 INDEX_MODE(SQ_INDEX_AR_X), 221 PRED_SEL(SQ_PRED_SEL_OFF), 222 LAST(0)); 223 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 224 SRC1_ABS(0), 225 UPDATE_EXECUTE_MASK(0), 226 UPDATE_PRED(0), 227 WRITE_MASK(1), 228 OMOD(SQ_ALU_OMOD_OFF), 229 ALU_INST(SQ_OP2_INST_MOV), 230 BANK_SWIZZLE(SQ_ALU_VEC_012), 231 DST_GPR(0), 232 DST_REL(ABSOLUTE), 233 DST_ELEM(ELEM_Z), 234 CLAMP(1)); 235 /* 5 */ 236 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 237 SRC0_REL(ABSOLUTE), 238 SRC0_ELEM(ELEM_W), 239 SRC0_NEG(0), 240 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 241 SRC1_REL(ABSOLUTE), 242 SRC1_ELEM(ELEM_W), 243 SRC1_NEG(0), 244 INDEX_MODE(SQ_INDEX_AR_X), 245 PRED_SEL(SQ_PRED_SEL_OFF), 246 LAST(1)); 247 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 248 SRC1_ABS(0), 249 UPDATE_EXECUTE_MASK(0), 250 UPDATE_PRED(0), 251 WRITE_MASK(1), 252 OMOD(SQ_ALU_OMOD_OFF), 253 ALU_INST(SQ_OP2_INST_MOV), 254 BANK_SWIZZLE(SQ_ALU_VEC_012), 255 DST_GPR(0), 256 DST_REL(ABSOLUTE), 257 DST_ELEM(ELEM_W), 258 CLAMP(1)); 259 260 return i; 261} 262 263/* copy vs --------------------------------------- */ 264int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) 265{ 266 int i = 0; 267 268 /* 0 */ 269 shader[i++] = CF_DWORD0(ADDR(4), 270 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 271 shader[i++] = CF_DWORD1(POP_COUNT(0), 272 CF_CONST(0), 273 COND(SQ_CF_COND_ACTIVE), 274 I_COUNT(2), 275 VALID_PIXEL_MODE(0), 276 END_OF_PROGRAM(0), 277 CF_INST(SQ_CF_INST_VC), 278 WHOLE_QUAD_MODE(0), 279 BARRIER(1)); 280 /* 1 */ 281 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 282 TYPE(SQ_EXPORT_POS), 283 RW_GPR(1), 284 RW_REL(ABSOLUTE), 285 INDEX_GPR(0), 286 ELEM_SIZE(0)); 287 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 288 SRC_SEL_Y(SQ_SEL_Y), 289 SRC_SEL_Z(SQ_SEL_Z), 290 SRC_SEL_W(SQ_SEL_W), 291 BURST_COUNT(0), 292 VALID_PIXEL_MODE(0), 293 END_OF_PROGRAM(0), 294 CF_INST(SQ_CF_INST_EXPORT_DONE), 295 MARK(0), 296 BARRIER(1)); 297 /* 2 */ 298 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 299 TYPE(SQ_EXPORT_PARAM), 300 RW_GPR(0), 301 RW_REL(ABSOLUTE), 302 INDEX_GPR(0), 303 ELEM_SIZE(0)); 304 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 305 SRC_SEL_Y(SQ_SEL_Y), 306 SRC_SEL_Z(SQ_SEL_Z), 307 SRC_SEL_W(SQ_SEL_W), 308 BURST_COUNT(0), 309 VALID_PIXEL_MODE(0), 310 END_OF_PROGRAM(1), 311 CF_INST(SQ_CF_INST_EXPORT_DONE), 312 MARK(0), 313 BARRIER(0)); 314 /* 3 */ 315 shader[i++] = 0x00000000; 316 shader[i++] = 0x00000000; 317 /* 4/5 */ 318 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 319 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 320 FETCH_WHOLE_QUAD(0), 321 BUFFER_ID(0), 322 SRC_GPR(0), 323 SRC_REL(ABSOLUTE), 324 SRC_SEL_X(SQ_SEL_X), 325 MEGA_FETCH_COUNT(16)); 326 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 327 DST_REL(0), 328 DST_SEL_X(SQ_SEL_X), 329 DST_SEL_Y(SQ_SEL_Y), 330 DST_SEL_Z(SQ_SEL_0), 331 DST_SEL_W(SQ_SEL_1), 332 USE_CONST_FIELDS(0), 333 DATA_FORMAT(FMT_32_32_FLOAT), 334 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 335 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 336 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 337 shader[i++] = VTX_DWORD2(OFFSET(0), 338#if X_BYTE_ORDER == X_BIG_ENDIAN 339 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 340#else 341 ENDIAN_SWAP(SQ_ENDIAN_NONE), 342#endif 343 CONST_BUF_NO_STRIDE(0), 344 MEGA_FETCH(1), 345 ALT_CONST(0), 346 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 347 shader[i++] = VTX_DWORD_PAD; 348 /* 6/7 */ 349 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 350 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 351 FETCH_WHOLE_QUAD(0), 352 BUFFER_ID(0), 353 SRC_GPR(0), 354 SRC_REL(ABSOLUTE), 355 SRC_SEL_X(SQ_SEL_X), 356 MEGA_FETCH_COUNT(8)); 357 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 358 DST_REL(0), 359 DST_SEL_X(SQ_SEL_X), 360 DST_SEL_Y(SQ_SEL_Y), 361 DST_SEL_Z(SQ_SEL_0), 362 DST_SEL_W(SQ_SEL_1), 363 USE_CONST_FIELDS(0), 364 DATA_FORMAT(FMT_32_32_FLOAT), 365 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 366 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 367 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 368 shader[i++] = VTX_DWORD2(OFFSET(8), 369#if X_BYTE_ORDER == X_BIG_ENDIAN 370 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 371#else 372 ENDIAN_SWAP(SQ_ENDIAN_NONE), 373#endif 374 CONST_BUF_NO_STRIDE(0), 375 MEGA_FETCH(0), 376 ALT_CONST(0), 377 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 378 shader[i++] = VTX_DWORD_PAD; 379 380 return i; 381} 382 383/* copy ps --------------------------------------- */ 384int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) 385{ 386 int i = 0; 387 388 /* CF INST 0 */ 389 shader[i++] = CF_ALU_DWORD0(ADDR(3), 390 KCACHE_BANK0(0), 391 KCACHE_BANK1(0), 392 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 393 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 394 KCACHE_ADDR0(0), 395 KCACHE_ADDR1(0), 396 I_COUNT(4), 397 ALT_CONST(0), 398 CF_INST(SQ_CF_INST_ALU), 399 WHOLE_QUAD_MODE(0), 400 BARRIER(1)); 401 /* CF INST 1 */ 402 shader[i++] = CF_DWORD0(ADDR(8), 403 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 404 shader[i++] = CF_DWORD1(POP_COUNT(0), 405 CF_CONST(0), 406 COND(SQ_CF_COND_ACTIVE), 407 I_COUNT(1), 408 VALID_PIXEL_MODE(0), 409 END_OF_PROGRAM(0), 410 CF_INST(SQ_CF_INST_TC), 411 WHOLE_QUAD_MODE(0), 412 BARRIER(1)); 413 /* CF INST 2 */ 414 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 415 TYPE(SQ_EXPORT_PIXEL), 416 RW_GPR(0), 417 RW_REL(ABSOLUTE), 418 INDEX_GPR(0), 419 ELEM_SIZE(1)); 420 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 421 SRC_SEL_Y(SQ_SEL_Y), 422 SRC_SEL_Z(SQ_SEL_Z), 423 SRC_SEL_W(SQ_SEL_W), 424 BURST_COUNT(1), 425 VALID_PIXEL_MODE(0), 426 END_OF_PROGRAM(1), 427 CF_INST(SQ_CF_INST_EXPORT_DONE), 428 MARK(0), 429 BARRIER(1)); 430 431 /* 3 interpolate tex coords */ 432 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 433 SRC0_REL(ABSOLUTE), 434 SRC0_ELEM(ELEM_Y), 435 SRC0_NEG(0), 436 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 437 SRC1_REL(ABSOLUTE), 438 SRC1_ELEM(ELEM_X), 439 SRC1_NEG(0), 440 INDEX_MODE(SQ_INDEX_AR_X), 441 PRED_SEL(SQ_PRED_SEL_OFF), 442 LAST(0)); 443 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 444 SRC1_ABS(0), 445 UPDATE_EXECUTE_MASK(0), 446 UPDATE_PRED(0), 447 WRITE_MASK(1), 448 OMOD(SQ_ALU_OMOD_OFF), 449 ALU_INST(SQ_OP2_INST_INTERP_XY), 450 BANK_SWIZZLE(SQ_ALU_VEC_210), 451 DST_GPR(0), 452 DST_REL(ABSOLUTE), 453 DST_ELEM(ELEM_X), 454 CLAMP(0)); 455 /* 4 */ 456 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 457 SRC0_REL(ABSOLUTE), 458 SRC0_ELEM(ELEM_X), 459 SRC0_NEG(0), 460 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 461 SRC1_REL(ABSOLUTE), 462 SRC1_ELEM(ELEM_X), 463 SRC1_NEG(0), 464 INDEX_MODE(SQ_INDEX_AR_X), 465 PRED_SEL(SQ_PRED_SEL_OFF), 466 LAST(0)); 467 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 468 SRC1_ABS(0), 469 UPDATE_EXECUTE_MASK(0), 470 UPDATE_PRED(0), 471 WRITE_MASK(1), 472 OMOD(SQ_ALU_OMOD_OFF), 473 ALU_INST(SQ_OP2_INST_INTERP_XY), 474 BANK_SWIZZLE(SQ_ALU_VEC_210), 475 DST_GPR(0), 476 DST_REL(ABSOLUTE), 477 DST_ELEM(ELEM_Y), 478 CLAMP(0)); 479 /* 5 */ 480 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 481 SRC0_REL(ABSOLUTE), 482 SRC0_ELEM(ELEM_Y), 483 SRC0_NEG(0), 484 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 485 SRC1_REL(ABSOLUTE), 486 SRC1_ELEM(ELEM_X), 487 SRC1_NEG(0), 488 INDEX_MODE(SQ_INDEX_AR_X), 489 PRED_SEL(SQ_PRED_SEL_OFF), 490 LAST(0)); 491 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 492 SRC1_ABS(0), 493 UPDATE_EXECUTE_MASK(0), 494 UPDATE_PRED(0), 495 WRITE_MASK(0), 496 OMOD(SQ_ALU_OMOD_OFF), 497 ALU_INST(SQ_OP2_INST_INTERP_XY), 498 BANK_SWIZZLE(SQ_ALU_VEC_210), 499 DST_GPR(0), 500 DST_REL(ABSOLUTE), 501 DST_ELEM(ELEM_Z), 502 CLAMP(0)); 503 /* 6 */ 504 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 505 SRC0_REL(ABSOLUTE), 506 SRC0_ELEM(ELEM_X), 507 SRC0_NEG(0), 508 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 509 SRC1_REL(ABSOLUTE), 510 SRC1_ELEM(ELEM_X), 511 SRC1_NEG(0), 512 INDEX_MODE(SQ_INDEX_AR_X), 513 PRED_SEL(SQ_PRED_SEL_OFF), 514 LAST(1)); 515 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 516 SRC1_ABS(0), 517 UPDATE_EXECUTE_MASK(0), 518 UPDATE_PRED(0), 519 WRITE_MASK(0), 520 OMOD(SQ_ALU_OMOD_OFF), 521 ALU_INST(SQ_OP2_INST_INTERP_XY), 522 BANK_SWIZZLE(SQ_ALU_VEC_210), 523 DST_GPR(0), 524 DST_REL(ABSOLUTE), 525 DST_ELEM(ELEM_W), 526 CLAMP(0)); 527 528 /* 7 */ 529 shader[i++] = 0x00000000; 530 shader[i++] = 0x00000000; 531 532 /* 8/9 TEX INST 0 */ 533 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 534 INST_MOD(0), 535 FETCH_WHOLE_QUAD(0), 536 RESOURCE_ID(0), 537 SRC_GPR(0), 538 SRC_REL(ABSOLUTE), 539 ALT_CONST(0), 540 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 541 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 542 shader[i++] = TEX_DWORD1(DST_GPR(0), 543 DST_REL(ABSOLUTE), 544 DST_SEL_X(SQ_SEL_X), /* R */ 545 DST_SEL_Y(SQ_SEL_Y), /* G */ 546 DST_SEL_Z(SQ_SEL_Z), /* B */ 547 DST_SEL_W(SQ_SEL_W), /* A */ 548 LOD_BIAS(0), 549 COORD_TYPE_X(TEX_UNNORMALIZED), 550 COORD_TYPE_Y(TEX_UNNORMALIZED), 551 COORD_TYPE_Z(TEX_UNNORMALIZED), 552 COORD_TYPE_W(TEX_UNNORMALIZED)); 553 shader[i++] = TEX_DWORD2(OFFSET_X(0), 554 OFFSET_Y(0), 555 OFFSET_Z(0), 556 SAMPLER_ID(0), 557 SRC_SEL_X(SQ_SEL_X), 558 SRC_SEL_Y(SQ_SEL_Y), 559 SRC_SEL_Z(SQ_SEL_0), 560 SRC_SEL_W(SQ_SEL_1)); 561 shader[i++] = TEX_DWORD_PAD; 562 563 return i; 564} 565 566int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) 567{ 568 int i = 0; 569 570 /* 0 */ 571 shader[i++] = CF_DWORD0(ADDR(6), 572 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 573 shader[i++] = CF_DWORD1(POP_COUNT(0), 574 CF_CONST(0), 575 COND(SQ_CF_COND_ACTIVE), 576 I_COUNT(2), 577 VALID_PIXEL_MODE(0), 578 END_OF_PROGRAM(0), 579 CF_INST(SQ_CF_INST_VC), 580 WHOLE_QUAD_MODE(0), 581 BARRIER(1)); 582 583 /* 1 - ALU */ 584 shader[i++] = CF_ALU_DWORD0(ADDR(4), 585 KCACHE_BANK0(0), 586 KCACHE_BANK1(0), 587 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 588 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 589 KCACHE_ADDR0(0), 590 KCACHE_ADDR1(0), 591 I_COUNT(2), 592 ALT_CONST(0), 593 CF_INST(SQ_CF_INST_ALU), 594 WHOLE_QUAD_MODE(0), 595 BARRIER(1)); 596 597 /* 2 */ 598 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 599 TYPE(SQ_EXPORT_POS), 600 RW_GPR(1), 601 RW_REL(ABSOLUTE), 602 INDEX_GPR(0), 603 ELEM_SIZE(3)); 604 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 605 SRC_SEL_Y(SQ_SEL_Y), 606 SRC_SEL_Z(SQ_SEL_Z), 607 SRC_SEL_W(SQ_SEL_W), 608 BURST_COUNT(1), 609 VALID_PIXEL_MODE(0), 610 END_OF_PROGRAM(0), 611 CF_INST(SQ_CF_INST_EXPORT_DONE), 612 MARK(0), 613 BARRIER(1)); 614 /* 3 */ 615 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 616 TYPE(SQ_EXPORT_PARAM), 617 RW_GPR(0), 618 RW_REL(ABSOLUTE), 619 INDEX_GPR(0), 620 ELEM_SIZE(3)); 621 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 622 SRC_SEL_Y(SQ_SEL_Y), 623 SRC_SEL_Z(SQ_SEL_Z), 624 SRC_SEL_W(SQ_SEL_W), 625 BURST_COUNT(1), 626 VALID_PIXEL_MODE(0), 627 END_OF_PROGRAM(1), 628 CF_INST(SQ_CF_INST_EXPORT_DONE), 629 MARK(0), 630 BARRIER(0)); 631 632 633 /* 4 texX / w */ 634 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 635 SRC0_REL(ABSOLUTE), 636 SRC0_ELEM(ELEM_X), 637 SRC0_NEG(0), 638 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 639 SRC1_REL(ABSOLUTE), 640 SRC1_ELEM(ELEM_X), 641 SRC1_NEG(0), 642 INDEX_MODE(SQ_INDEX_AR_X), 643 PRED_SEL(SQ_PRED_SEL_OFF), 644 LAST(0)); 645 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 646 SRC1_ABS(0), 647 UPDATE_EXECUTE_MASK(0), 648 UPDATE_PRED(0), 649 WRITE_MASK(1), 650 OMOD(SQ_ALU_OMOD_OFF), 651 ALU_INST(SQ_OP2_INST_MUL), 652 BANK_SWIZZLE(SQ_ALU_VEC_012), 653 DST_GPR(0), 654 DST_REL(ABSOLUTE), 655 DST_ELEM(ELEM_X), 656 CLAMP(0)); 657 658 /* 5 texY / h */ 659 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 660 SRC0_REL(ABSOLUTE), 661 SRC0_ELEM(ELEM_Y), 662 SRC0_NEG(0), 663 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 664 SRC1_REL(ABSOLUTE), 665 SRC1_ELEM(ELEM_Y), 666 SRC1_NEG(0), 667 INDEX_MODE(SQ_INDEX_AR_X), 668 PRED_SEL(SQ_PRED_SEL_OFF), 669 LAST(1)); 670 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 671 SRC1_ABS(0), 672 UPDATE_EXECUTE_MASK(0), 673 UPDATE_PRED(0), 674 WRITE_MASK(1), 675 OMOD(SQ_ALU_OMOD_OFF), 676 ALU_INST(SQ_OP2_INST_MUL), 677 BANK_SWIZZLE(SQ_ALU_VEC_012), 678 DST_GPR(0), 679 DST_REL(ABSOLUTE), 680 DST_ELEM(ELEM_Y), 681 CLAMP(0)); 682 683 /* 6/7 */ 684 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 685 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 686 FETCH_WHOLE_QUAD(0), 687 BUFFER_ID(0), 688 SRC_GPR(0), 689 SRC_REL(ABSOLUTE), 690 SRC_SEL_X(SQ_SEL_X), 691 MEGA_FETCH_COUNT(16)); 692 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 693 DST_REL(ABSOLUTE), 694 DST_SEL_X(SQ_SEL_X), 695 DST_SEL_Y(SQ_SEL_Y), 696 DST_SEL_Z(SQ_SEL_0), 697 DST_SEL_W(SQ_SEL_1), 698 USE_CONST_FIELDS(0), 699 DATA_FORMAT(FMT_32_32_FLOAT), 700 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 701 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 702 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 703 shader[i++] = VTX_DWORD2(OFFSET(0), 704#if X_BYTE_ORDER == X_BIG_ENDIAN 705 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 706#else 707 ENDIAN_SWAP(SQ_ENDIAN_NONE), 708#endif 709 CONST_BUF_NO_STRIDE(0), 710 MEGA_FETCH(1), 711 ALT_CONST(0), 712 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 713 shader[i++] = VTX_DWORD_PAD; 714 /* 8/9 */ 715 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 716 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 717 FETCH_WHOLE_QUAD(0), 718 BUFFER_ID(0), 719 SRC_GPR(0), 720 SRC_REL(ABSOLUTE), 721 SRC_SEL_X(SQ_SEL_X), 722 MEGA_FETCH_COUNT(8)); 723 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 724 DST_REL(ABSOLUTE), 725 DST_SEL_X(SQ_SEL_X), 726 DST_SEL_Y(SQ_SEL_Y), 727 DST_SEL_Z(SQ_SEL_0), 728 DST_SEL_W(SQ_SEL_1), 729 USE_CONST_FIELDS(0), 730 DATA_FORMAT(FMT_32_32_FLOAT), 731 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 732 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 733 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 734 shader[i++] = VTX_DWORD2(OFFSET(8), 735#if X_BYTE_ORDER == X_BIG_ENDIAN 736 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 737#else 738 ENDIAN_SWAP(SQ_ENDIAN_NONE), 739#endif 740 CONST_BUF_NO_STRIDE(0), 741 MEGA_FETCH(0), 742 ALT_CONST(0), 743 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 744 shader[i++] = VTX_DWORD_PAD; 745 746 return i; 747} 748 749int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) 750{ 751 int i = 0; 752 753 /* 0 */ 754 shader[i++] = CF_ALU_DWORD0(ADDR(5), 755 KCACHE_BANK0(0), 756 KCACHE_BANK1(0), 757 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 758 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 759 KCACHE_ADDR0(0), 760 KCACHE_ADDR1(0), 761 I_COUNT(4), 762 ALT_CONST(0), 763 CF_INST(SQ_CF_INST_ALU), 764 WHOLE_QUAD_MODE(0), 765 BARRIER(1)); 766 /* 1 */ 767 shader[i++] = CF_DWORD0(ADDR(21), 768 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 769 shader[i++] = CF_DWORD1(POP_COUNT(0), 770 CF_CONST(0), 771 COND(SQ_CF_COND_BOOL), 772 I_COUNT(0), 773 VALID_PIXEL_MODE(0), 774 END_OF_PROGRAM(0), 775 CF_INST(SQ_CF_INST_CALL), 776 WHOLE_QUAD_MODE(0), 777 BARRIER(0)); 778 /* 2 */ 779 shader[i++] = CF_DWORD0(ADDR(30), 780 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 781 shader[i++] = CF_DWORD1(POP_COUNT(0), 782 CF_CONST(0), 783 COND(SQ_CF_COND_NOT_BOOL), 784 I_COUNT(0), 785 VALID_PIXEL_MODE(0), 786 END_OF_PROGRAM(0), 787 CF_INST(SQ_CF_INST_CALL), 788 WHOLE_QUAD_MODE(0), 789 BARRIER(0)); 790 /* 3 */ 791 shader[i++] = CF_ALU_DWORD0(ADDR(9), 792 KCACHE_BANK0(0), 793 KCACHE_BANK1(0), 794 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 795 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 796 KCACHE_ADDR0(0), 797 KCACHE_ADDR1(0), 798 I_COUNT(12), 799 ALT_CONST(0), 800 CF_INST(SQ_CF_INST_ALU), 801 WHOLE_QUAD_MODE(0), 802 BARRIER(1)); 803 /* 4 */ 804 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 805 TYPE(SQ_EXPORT_PIXEL), 806 RW_GPR(2), 807 RW_REL(ABSOLUTE), 808 INDEX_GPR(0), 809 ELEM_SIZE(3)); 810 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 811 SRC_SEL_Y(SQ_SEL_Y), 812 SRC_SEL_Z(SQ_SEL_Z), 813 SRC_SEL_W(SQ_SEL_W), 814 BURST_COUNT(1), 815 VALID_PIXEL_MODE(0), 816 END_OF_PROGRAM(1), 817 CF_INST(SQ_CF_INST_EXPORT_DONE), 818 MARK(0), 819 BARRIER(1)); 820 /* 5 interpolate tex coords */ 821 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 822 SRC0_REL(ABSOLUTE), 823 SRC0_ELEM(ELEM_Y), 824 SRC0_NEG(0), 825 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 826 SRC1_REL(ABSOLUTE), 827 SRC1_ELEM(ELEM_X), 828 SRC1_NEG(0), 829 INDEX_MODE(SQ_INDEX_AR_X), 830 PRED_SEL(SQ_PRED_SEL_OFF), 831 LAST(0)); 832 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 833 SRC1_ABS(0), 834 UPDATE_EXECUTE_MASK(0), 835 UPDATE_PRED(0), 836 WRITE_MASK(1), 837 OMOD(SQ_ALU_OMOD_OFF), 838 ALU_INST(SQ_OP2_INST_INTERP_XY), 839 BANK_SWIZZLE(SQ_ALU_VEC_210), 840 DST_GPR(0), 841 DST_REL(ABSOLUTE), 842 DST_ELEM(ELEM_X), 843 CLAMP(0)); 844 /* 6 */ 845 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 846 SRC0_REL(ABSOLUTE), 847 SRC0_ELEM(ELEM_X), 848 SRC0_NEG(0), 849 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 850 SRC1_REL(ABSOLUTE), 851 SRC1_ELEM(ELEM_X), 852 SRC1_NEG(0), 853 INDEX_MODE(SQ_INDEX_AR_X), 854 PRED_SEL(SQ_PRED_SEL_OFF), 855 LAST(0)); 856 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 857 SRC1_ABS(0), 858 UPDATE_EXECUTE_MASK(0), 859 UPDATE_PRED(0), 860 WRITE_MASK(1), 861 OMOD(SQ_ALU_OMOD_OFF), 862 ALU_INST(SQ_OP2_INST_INTERP_XY), 863 BANK_SWIZZLE(SQ_ALU_VEC_210), 864 DST_GPR(0), 865 DST_REL(ABSOLUTE), 866 DST_ELEM(ELEM_Y), 867 CLAMP(0)); 868 /* 7 */ 869 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 870 SRC0_REL(ABSOLUTE), 871 SRC0_ELEM(ELEM_Y), 872 SRC0_NEG(0), 873 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 874 SRC1_REL(ABSOLUTE), 875 SRC1_ELEM(ELEM_X), 876 SRC1_NEG(0), 877 INDEX_MODE(SQ_INDEX_AR_X), 878 PRED_SEL(SQ_PRED_SEL_OFF), 879 LAST(0)); 880 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 881 SRC1_ABS(0), 882 UPDATE_EXECUTE_MASK(0), 883 UPDATE_PRED(0), 884 WRITE_MASK(0), 885 OMOD(SQ_ALU_OMOD_OFF), 886 ALU_INST(SQ_OP2_INST_INTERP_XY), 887 BANK_SWIZZLE(SQ_ALU_VEC_210), 888 DST_GPR(0), 889 DST_REL(ABSOLUTE), 890 DST_ELEM(ELEM_Z), 891 CLAMP(0)); 892 /* 8 */ 893 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 894 SRC0_REL(ABSOLUTE), 895 SRC0_ELEM(ELEM_X), 896 SRC0_NEG(0), 897 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 898 SRC1_REL(ABSOLUTE), 899 SRC1_ELEM(ELEM_X), 900 SRC1_NEG(0), 901 INDEX_MODE(SQ_INDEX_AR_X), 902 PRED_SEL(SQ_PRED_SEL_OFF), 903 LAST(1)); 904 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 905 SRC1_ABS(0), 906 UPDATE_EXECUTE_MASK(0), 907 UPDATE_PRED(0), 908 WRITE_MASK(0), 909 OMOD(SQ_ALU_OMOD_OFF), 910 ALU_INST(SQ_OP2_INST_INTERP_XY), 911 BANK_SWIZZLE(SQ_ALU_VEC_210), 912 DST_GPR(0), 913 DST_REL(ABSOLUTE), 914 DST_ELEM(ELEM_W), 915 CLAMP(0)); 916 917 /* 9,10,11,12 */ 918 /* r2.x = MAD(c0.w, r1.x, c0.x) */ 919 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 920 SRC0_REL(ABSOLUTE), 921 SRC0_ELEM(ELEM_W), 922 SRC0_NEG(0), 923 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 924 SRC1_REL(ABSOLUTE), 925 SRC1_ELEM(ELEM_X), 926 SRC1_NEG(0), 927 INDEX_MODE(SQ_INDEX_LOOP), 928 PRED_SEL(SQ_PRED_SEL_OFF), 929 LAST(0)); 930 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 931 SRC2_REL(ABSOLUTE), 932 SRC2_ELEM(ELEM_X), 933 SRC2_NEG(0), 934 ALU_INST(SQ_OP3_INST_MULADD), 935 BANK_SWIZZLE(SQ_ALU_VEC_012), 936 DST_GPR(2), 937 DST_REL(ABSOLUTE), 938 DST_ELEM(ELEM_X), 939 CLAMP(0)); 940 /* r2.y = MAD(c0.w, r1.x, c0.y) */ 941 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 942 SRC0_REL(ABSOLUTE), 943 SRC0_ELEM(ELEM_W), 944 SRC0_NEG(0), 945 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 946 SRC1_REL(ABSOLUTE), 947 SRC1_ELEM(ELEM_X), 948 SRC1_NEG(0), 949 INDEX_MODE(SQ_INDEX_LOOP), 950 PRED_SEL(SQ_PRED_SEL_OFF), 951 LAST(0)); 952 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 953 SRC2_REL(ABSOLUTE), 954 SRC2_ELEM(ELEM_Y), 955 SRC2_NEG(0), 956 ALU_INST(SQ_OP3_INST_MULADD), 957 BANK_SWIZZLE(SQ_ALU_VEC_012), 958 DST_GPR(2), 959 DST_REL(ABSOLUTE), 960 DST_ELEM(ELEM_Y), 961 CLAMP(0)); 962 /* r2.z = MAD(c0.w, r1.x, c0.z) */ 963 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), 964 SRC0_REL(ABSOLUTE), 965 SRC0_ELEM(ELEM_W), 966 SRC0_NEG(0), 967 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 968 SRC1_REL(ABSOLUTE), 969 SRC1_ELEM(ELEM_X), 970 SRC1_NEG(0), 971 INDEX_MODE(SQ_INDEX_LOOP), 972 PRED_SEL(SQ_PRED_SEL_OFF), 973 LAST(0)); 974 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), 975 SRC2_REL(ABSOLUTE), 976 SRC2_ELEM(ELEM_Z), 977 SRC2_NEG(0), 978 ALU_INST(SQ_OP3_INST_MULADD), 979 BANK_SWIZZLE(SQ_ALU_VEC_012), 980 DST_GPR(2), 981 DST_REL(ABSOLUTE), 982 DST_ELEM(ELEM_Z), 983 CLAMP(0)); 984 /* r2.w = MAD(0, 0, 1) */ 985 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 986 SRC0_REL(ABSOLUTE), 987 SRC0_ELEM(ELEM_X), 988 SRC0_NEG(0), 989 SRC1_SEL(SQ_ALU_SRC_0), 990 SRC1_REL(ABSOLUTE), 991 SRC1_ELEM(ELEM_X), 992 SRC1_NEG(0), 993 INDEX_MODE(SQ_INDEX_LOOP), 994 PRED_SEL(SQ_PRED_SEL_OFF), 995 LAST(1)); 996 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 997 SRC2_REL(ABSOLUTE), 998 SRC2_ELEM(ELEM_X), 999 SRC2_NEG(0), 1000 ALU_INST(SQ_OP3_INST_MULADD), 1001 BANK_SWIZZLE(SQ_ALU_VEC_012), 1002 DST_GPR(2), 1003 DST_REL(ABSOLUTE), 1004 DST_ELEM(ELEM_W), 1005 CLAMP(0)); 1006 1007 /* 13,14,15,16 */ 1008 /* r2.x = MAD(c1.x, r1.y, pv.x) */ 1009 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1010 SRC0_REL(ABSOLUTE), 1011 SRC0_ELEM(ELEM_X), 1012 SRC0_NEG(0), 1013 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1014 SRC1_REL(ABSOLUTE), 1015 SRC1_ELEM(ELEM_Y), 1016 SRC1_NEG(0), 1017 INDEX_MODE(SQ_INDEX_LOOP), 1018 PRED_SEL(SQ_PRED_SEL_OFF), 1019 LAST(0)); 1020 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1021 SRC2_REL(ABSOLUTE), 1022 SRC2_ELEM(ELEM_X), 1023 SRC2_NEG(0), 1024 ALU_INST(SQ_OP3_INST_MULADD), 1025 BANK_SWIZZLE(SQ_ALU_VEC_012), 1026 DST_GPR(2), 1027 DST_REL(ABSOLUTE), 1028 DST_ELEM(ELEM_X), 1029 CLAMP(0)); 1030 /* r2.y = MAD(c1.y, r1.y, pv.y) */ 1031 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1032 SRC0_REL(ABSOLUTE), 1033 SRC0_ELEM(ELEM_Y), 1034 SRC0_NEG(0), 1035 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1036 SRC1_REL(ABSOLUTE), 1037 SRC1_ELEM(ELEM_Y), 1038 SRC1_NEG(0), 1039 INDEX_MODE(SQ_INDEX_LOOP), 1040 PRED_SEL(SQ_PRED_SEL_OFF), 1041 LAST(0)); 1042 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1043 SRC2_REL(ABSOLUTE), 1044 SRC2_ELEM(ELEM_Y), 1045 SRC2_NEG(0), 1046 ALU_INST(SQ_OP3_INST_MULADD), 1047 BANK_SWIZZLE(SQ_ALU_VEC_012), 1048 DST_GPR(2), 1049 DST_REL(ABSOLUTE), 1050 DST_ELEM(ELEM_Y), 1051 CLAMP(0)); 1052 /* r2.z = MAD(c1.z, r1.y, pv.z) */ 1053 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), 1054 SRC0_REL(ABSOLUTE), 1055 SRC0_ELEM(ELEM_Z), 1056 SRC0_NEG(0), 1057 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1058 SRC1_REL(ABSOLUTE), 1059 SRC1_ELEM(ELEM_Y), 1060 SRC1_NEG(0), 1061 INDEX_MODE(SQ_INDEX_LOOP), 1062 PRED_SEL(SQ_PRED_SEL_OFF), 1063 LAST(0)); 1064 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1065 SRC2_REL(ABSOLUTE), 1066 SRC2_ELEM(ELEM_Z), 1067 SRC2_NEG(0), 1068 ALU_INST(SQ_OP3_INST_MULADD), 1069 BANK_SWIZZLE(SQ_ALU_VEC_012), 1070 DST_GPR(2), 1071 DST_REL(ABSOLUTE), 1072 DST_ELEM(ELEM_Z), 1073 CLAMP(0)); 1074 /* r2.w = MAD(0, 0, 1) */ 1075 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1076 SRC0_REL(ABSOLUTE), 1077 SRC0_ELEM(ELEM_X), 1078 SRC0_NEG(0), 1079 SRC1_SEL(SQ_ALU_SRC_0), 1080 SRC1_REL(ABSOLUTE), 1081 SRC1_ELEM(ELEM_X), 1082 SRC1_NEG(0), 1083 INDEX_MODE(SQ_INDEX_LOOP), 1084 PRED_SEL(SQ_PRED_SEL_OFF), 1085 LAST(1)); 1086 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1087 SRC2_REL(ABSOLUTE), 1088 SRC2_ELEM(ELEM_W), 1089 SRC2_NEG(0), 1090 ALU_INST(SQ_OP3_INST_MULADD), 1091 BANK_SWIZZLE(SQ_ALU_VEC_012), 1092 DST_GPR(2), 1093 DST_REL(ABSOLUTE), 1094 DST_ELEM(ELEM_W), 1095 CLAMP(0)); 1096 /* 17,18,19,20 */ 1097 /* r2.x = MAD(c2.x, r1.z, pv.x) */ 1098 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1099 SRC0_REL(ABSOLUTE), 1100 SRC0_ELEM(ELEM_X), 1101 SRC0_NEG(0), 1102 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1103 SRC1_REL(ABSOLUTE), 1104 SRC1_ELEM(ELEM_Z), 1105 SRC1_NEG(0), 1106 INDEX_MODE(SQ_INDEX_LOOP), 1107 PRED_SEL(SQ_PRED_SEL_OFF), 1108 LAST(0)); 1109 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1110 SRC2_REL(ABSOLUTE), 1111 SRC2_ELEM(ELEM_X), 1112 SRC2_NEG(0), 1113 ALU_INST(SQ_OP3_INST_MULADD), 1114 BANK_SWIZZLE(SQ_ALU_VEC_012), 1115 DST_GPR(2), 1116 DST_REL(ABSOLUTE), 1117 DST_ELEM(ELEM_X), 1118 CLAMP(1)); 1119 /* r2.y = MAD(c2.y, r1.z, pv.y) */ 1120 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1121 SRC0_REL(ABSOLUTE), 1122 SRC0_ELEM(ELEM_Y), 1123 SRC0_NEG(0), 1124 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1125 SRC1_REL(ABSOLUTE), 1126 SRC1_ELEM(ELEM_Z), 1127 SRC1_NEG(0), 1128 INDEX_MODE(SQ_INDEX_LOOP), 1129 PRED_SEL(SQ_PRED_SEL_OFF), 1130 LAST(0)); 1131 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1132 SRC2_REL(ABSOLUTE), 1133 SRC2_ELEM(ELEM_Y), 1134 SRC2_NEG(0), 1135 ALU_INST(SQ_OP3_INST_MULADD), 1136 BANK_SWIZZLE(SQ_ALU_VEC_012), 1137 DST_GPR(2), 1138 DST_REL(ABSOLUTE), 1139 DST_ELEM(ELEM_Y), 1140 CLAMP(1)); 1141 /* r2.z = MAD(c2.z, r1.z, pv.z) */ 1142 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), 1143 SRC0_REL(ABSOLUTE), 1144 SRC0_ELEM(ELEM_Z), 1145 SRC0_NEG(0), 1146 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 1147 SRC1_REL(ABSOLUTE), 1148 SRC1_ELEM(ELEM_Z), 1149 SRC1_NEG(0), 1150 INDEX_MODE(SQ_INDEX_LOOP), 1151 PRED_SEL(SQ_PRED_SEL_OFF), 1152 LAST(0)); 1153 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 1154 SRC2_REL(ABSOLUTE), 1155 SRC2_ELEM(ELEM_Z), 1156 SRC2_NEG(0), 1157 ALU_INST(SQ_OP3_INST_MULADD), 1158 BANK_SWIZZLE(SQ_ALU_VEC_012), 1159 DST_GPR(2), 1160 DST_REL(ABSOLUTE), 1161 DST_ELEM(ELEM_Z), 1162 CLAMP(1)); 1163 /* r2.w = MAD(0, 0, 1) */ 1164 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 1165 SRC0_REL(ABSOLUTE), 1166 SRC0_ELEM(ELEM_X), 1167 SRC0_NEG(0), 1168 SRC1_SEL(SQ_ALU_SRC_0), 1169 SRC1_REL(ABSOLUTE), 1170 SRC1_ELEM(ELEM_X), 1171 SRC1_NEG(0), 1172 INDEX_MODE(SQ_INDEX_LOOP), 1173 PRED_SEL(SQ_PRED_SEL_OFF), 1174 LAST(1)); 1175 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 1176 SRC2_REL(ABSOLUTE), 1177 SRC2_ELEM(ELEM_X), 1178 SRC2_NEG(0), 1179 ALU_INST(SQ_OP3_INST_MULADD), 1180 BANK_SWIZZLE(SQ_ALU_VEC_012), 1181 DST_GPR(2), 1182 DST_REL(ABSOLUTE), 1183 DST_ELEM(ELEM_W), 1184 CLAMP(1)); 1185 1186 /* 21 */ 1187 shader[i++] = CF_DWORD0(ADDR(24), 1188 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1189 shader[i++] = CF_DWORD1(POP_COUNT(0), 1190 CF_CONST(0), 1191 COND(SQ_CF_COND_ACTIVE), 1192 I_COUNT(3), 1193 VALID_PIXEL_MODE(0), 1194 END_OF_PROGRAM(0), 1195 CF_INST(SQ_CF_INST_TC), 1196 WHOLE_QUAD_MODE(0), 1197 BARRIER(1)); 1198 /* 22 */ 1199 shader[i++] = CF_DWORD0(ADDR(0), 1200 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1201 shader[i++] = CF_DWORD1(POP_COUNT(0), 1202 CF_CONST(0), 1203 COND(SQ_CF_COND_ACTIVE), 1204 I_COUNT(0), 1205 VALID_PIXEL_MODE(0), 1206 END_OF_PROGRAM(0), 1207 CF_INST(SQ_CF_INST_RETURN), 1208 WHOLE_QUAD_MODE(0), 1209 BARRIER(1)); 1210 /* 23 */ 1211 shader[i++] = 0x00000000; 1212 shader[i++] = 0x00000000; 1213 /* 24/25 */ 1214 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1215 INST_MOD(0), 1216 FETCH_WHOLE_QUAD(0), 1217 RESOURCE_ID(0), 1218 SRC_GPR(0), 1219 SRC_REL(ABSOLUTE), 1220 ALT_CONST(0), 1221 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1222 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1223 shader[i++] = TEX_DWORD1(DST_GPR(1), 1224 DST_REL(ABSOLUTE), 1225 DST_SEL_X(SQ_SEL_X), 1226 DST_SEL_Y(SQ_SEL_MASK), 1227 DST_SEL_Z(SQ_SEL_MASK), 1228 DST_SEL_W(SQ_SEL_1), 1229 LOD_BIAS(0), 1230 COORD_TYPE_X(TEX_NORMALIZED), 1231 COORD_TYPE_Y(TEX_NORMALIZED), 1232 COORD_TYPE_Z(TEX_NORMALIZED), 1233 COORD_TYPE_W(TEX_NORMALIZED)); 1234 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1235 OFFSET_Y(0), 1236 OFFSET_Z(0), 1237 SAMPLER_ID(0), 1238 SRC_SEL_X(SQ_SEL_X), 1239 SRC_SEL_Y(SQ_SEL_Y), 1240 SRC_SEL_Z(SQ_SEL_0), 1241 SRC_SEL_W(SQ_SEL_1)); 1242 shader[i++] = TEX_DWORD_PAD; 1243 /* 26/27 */ 1244 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1245 INST_MOD(0), 1246 FETCH_WHOLE_QUAD(0), 1247 RESOURCE_ID(1), 1248 SRC_GPR(0), 1249 SRC_REL(ABSOLUTE), 1250 ALT_CONST(0), 1251 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1252 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1253 shader[i++] = TEX_DWORD1(DST_GPR(1), 1254 DST_REL(ABSOLUTE), 1255 DST_SEL_X(SQ_SEL_MASK), 1256 DST_SEL_Y(SQ_SEL_MASK), 1257 DST_SEL_Z(SQ_SEL_X), 1258 DST_SEL_W(SQ_SEL_MASK), 1259 LOD_BIAS(0), 1260 COORD_TYPE_X(TEX_NORMALIZED), 1261 COORD_TYPE_Y(TEX_NORMALIZED), 1262 COORD_TYPE_Z(TEX_NORMALIZED), 1263 COORD_TYPE_W(TEX_NORMALIZED)); 1264 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1265 OFFSET_Y(0), 1266 OFFSET_Z(0), 1267 SAMPLER_ID(1), 1268 SRC_SEL_X(SQ_SEL_X), 1269 SRC_SEL_Y(SQ_SEL_Y), 1270 SRC_SEL_Z(SQ_SEL_0), 1271 SRC_SEL_W(SQ_SEL_1)); 1272 shader[i++] = TEX_DWORD_PAD; 1273 /* 28/29 */ 1274 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1275 INST_MOD(0), 1276 FETCH_WHOLE_QUAD(0), 1277 RESOURCE_ID(2), 1278 SRC_GPR(0), 1279 SRC_REL(ABSOLUTE), 1280 ALT_CONST(0), 1281 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1282 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1283 shader[i++] = TEX_DWORD1(DST_GPR(1), 1284 DST_REL(ABSOLUTE), 1285 DST_SEL_X(SQ_SEL_MASK), 1286 DST_SEL_Y(SQ_SEL_X), 1287 DST_SEL_Z(SQ_SEL_MASK), 1288 DST_SEL_W(SQ_SEL_MASK), 1289 LOD_BIAS(0), 1290 COORD_TYPE_X(TEX_NORMALIZED), 1291 COORD_TYPE_Y(TEX_NORMALIZED), 1292 COORD_TYPE_Z(TEX_NORMALIZED), 1293 COORD_TYPE_W(TEX_NORMALIZED)); 1294 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1295 OFFSET_Y(0), 1296 OFFSET_Z(0), 1297 SAMPLER_ID(2), 1298 SRC_SEL_X(SQ_SEL_X), 1299 SRC_SEL_Y(SQ_SEL_Y), 1300 SRC_SEL_Z(SQ_SEL_0), 1301 SRC_SEL_W(SQ_SEL_1)); 1302 shader[i++] = TEX_DWORD_PAD; 1303 /* 30 */ 1304 shader[i++] = CF_DWORD0(ADDR(32), 1305 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1306 shader[i++] = CF_DWORD1(POP_COUNT(0), 1307 CF_CONST(0), 1308 COND(SQ_CF_COND_ACTIVE), 1309 I_COUNT(1), 1310 VALID_PIXEL_MODE(0), 1311 END_OF_PROGRAM(0), 1312 CF_INST(SQ_CF_INST_TC), 1313 WHOLE_QUAD_MODE(0), 1314 BARRIER(1)); 1315 /* 31 */ 1316 shader[i++] = CF_DWORD0(ADDR(0), 1317 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1318 shader[i++] = CF_DWORD1(POP_COUNT(0), 1319 CF_CONST(0), 1320 COND(SQ_CF_COND_ACTIVE), 1321 I_COUNT(0), 1322 VALID_PIXEL_MODE(0), 1323 END_OF_PROGRAM(0), 1324 CF_INST(SQ_CF_INST_RETURN), 1325 WHOLE_QUAD_MODE(0), 1326 BARRIER(1)); 1327 /* 32/33 */ 1328 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1329 INST_MOD(0), 1330 FETCH_WHOLE_QUAD(0), 1331 RESOURCE_ID(0), 1332 SRC_GPR(0), 1333 SRC_REL(ABSOLUTE), 1334 ALT_CONST(0), 1335 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 1336 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 1337 shader[i++] = TEX_DWORD1(DST_GPR(1), 1338 DST_REL(ABSOLUTE), 1339 DST_SEL_X(SQ_SEL_X), 1340 DST_SEL_Y(SQ_SEL_Y), 1341 DST_SEL_Z(SQ_SEL_Z), 1342 DST_SEL_W(SQ_SEL_1), 1343 LOD_BIAS(0), 1344 COORD_TYPE_X(TEX_NORMALIZED), 1345 COORD_TYPE_Y(TEX_NORMALIZED), 1346 COORD_TYPE_Z(TEX_NORMALIZED), 1347 COORD_TYPE_W(TEX_NORMALIZED)); 1348 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1349 OFFSET_Y(0), 1350 OFFSET_Z(0), 1351 SAMPLER_ID(0), 1352 SRC_SEL_X(SQ_SEL_X), 1353 SRC_SEL_Y(SQ_SEL_Y), 1354 SRC_SEL_Z(SQ_SEL_0), 1355 SRC_SEL_W(SQ_SEL_1)); 1356 shader[i++] = TEX_DWORD_PAD; 1357 1358 return i; 1359} 1360 1361/* comp vs --------------------------------------- */ 1362int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) 1363{ 1364 int i = 0; 1365 1366 /* 0 */ 1367 shader[i++] = CF_DWORD0(ADDR(3), 1368 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1369 shader[i++] = CF_DWORD1(POP_COUNT(0), 1370 CF_CONST(0), 1371 COND(SQ_CF_COND_BOOL), 1372 I_COUNT(0), 1373 VALID_PIXEL_MODE(0), 1374 END_OF_PROGRAM(0), 1375 CF_INST(SQ_CF_INST_CALL), 1376 WHOLE_QUAD_MODE(0), 1377 BARRIER(0)); 1378 /* 1 */ 1379 shader[i++] = CF_DWORD0(ADDR(9), 1380 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1381 shader[i++] = CF_DWORD1(POP_COUNT(0), 1382 CF_CONST(0), 1383 COND(SQ_CF_COND_NOT_BOOL), 1384 I_COUNT(0), 1385 VALID_PIXEL_MODE(0), 1386 END_OF_PROGRAM(0), 1387 CF_INST(SQ_CF_INST_CALL), 1388 WHOLE_QUAD_MODE(0), 1389 BARRIER(0)); 1390 /* 2 */ 1391 shader[i++] = CF_DWORD0(ADDR(0), 1392 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1393 shader[i++] = CF_DWORD1(POP_COUNT(0), 1394 CF_CONST(0), 1395 COND(SQ_CF_COND_ACTIVE), 1396 I_COUNT(0), 1397 VALID_PIXEL_MODE(0), 1398 END_OF_PROGRAM(1), 1399 CF_INST(SQ_CF_INST_NOP), 1400 WHOLE_QUAD_MODE(0), 1401 BARRIER(1)); 1402 /* 3 - mask sub */ 1403 shader[i++] = CF_DWORD0(ADDR(44), 1404 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1405 shader[i++] = CF_DWORD1(POP_COUNT(0), 1406 CF_CONST(0), 1407 COND(SQ_CF_COND_ACTIVE), 1408 I_COUNT(3), 1409 VALID_PIXEL_MODE(0), 1410 END_OF_PROGRAM(0), 1411 CF_INST(SQ_CF_INST_VC), 1412 WHOLE_QUAD_MODE(0), 1413 BARRIER(1)); 1414 1415 /* 4 - ALU */ 1416 shader[i++] = CF_ALU_DWORD0(ADDR(14), 1417 KCACHE_BANK0(0), 1418 KCACHE_BANK1(0), 1419 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 1420 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1421 KCACHE_ADDR0(0), 1422 KCACHE_ADDR1(0), 1423 I_COUNT(20), 1424 ALT_CONST(0), 1425 CF_INST(SQ_CF_INST_ALU), 1426 WHOLE_QUAD_MODE(0), 1427 BARRIER(1)); 1428 1429 /* 5 - dst */ 1430 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1431 TYPE(SQ_EXPORT_POS), 1432 RW_GPR(2), 1433 RW_REL(ABSOLUTE), 1434 INDEX_GPR(0), 1435 ELEM_SIZE(0)); 1436 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1437 SRC_SEL_Y(SQ_SEL_Y), 1438 SRC_SEL_Z(SQ_SEL_0), 1439 SRC_SEL_W(SQ_SEL_1), 1440 BURST_COUNT(1), 1441 VALID_PIXEL_MODE(0), 1442 END_OF_PROGRAM(0), 1443 CF_INST(SQ_CF_INST_EXPORT_DONE), 1444 MARK(0), 1445 BARRIER(1)); 1446 /* 6 - src */ 1447 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1448 TYPE(SQ_EXPORT_PARAM), 1449 RW_GPR(1), 1450 RW_REL(ABSOLUTE), 1451 INDEX_GPR(0), 1452 ELEM_SIZE(0)); 1453 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1454 SRC_SEL_Y(SQ_SEL_Y), 1455 SRC_SEL_Z(SQ_SEL_0), 1456 SRC_SEL_W(SQ_SEL_1), 1457 BURST_COUNT(1), 1458 VALID_PIXEL_MODE(0), 1459 END_OF_PROGRAM(0), 1460 CF_INST(SQ_CF_INST_EXPORT), 1461 MARK(0), 1462 BARRIER(0)); 1463 /* 7 - mask */ 1464 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), 1465 TYPE(SQ_EXPORT_PARAM), 1466 RW_GPR(0), 1467 RW_REL(ABSOLUTE), 1468 INDEX_GPR(0), 1469 ELEM_SIZE(0)); 1470 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1471 SRC_SEL_Y(SQ_SEL_Y), 1472 SRC_SEL_Z(SQ_SEL_0), 1473 SRC_SEL_W(SQ_SEL_1), 1474 BURST_COUNT(1), 1475 VALID_PIXEL_MODE(0), 1476 END_OF_PROGRAM(0), 1477 CF_INST(SQ_CF_INST_EXPORT_DONE), 1478 WHOLE_QUAD_MODE(0), 1479 BARRIER(0)); 1480 /* 8 */ 1481 shader[i++] = CF_DWORD0(ADDR(0), 1482 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1483 shader[i++] = CF_DWORD1(POP_COUNT(0), 1484 CF_CONST(0), 1485 COND(SQ_CF_COND_ACTIVE), 1486 I_COUNT(0), 1487 VALID_PIXEL_MODE(0), 1488 END_OF_PROGRAM(0), 1489 CF_INST(SQ_CF_INST_RETURN), 1490 WHOLE_QUAD_MODE(0), 1491 BARRIER(1)); 1492 /* 9 - non-mask sub */ 1493 shader[i++] = CF_DWORD0(ADDR(50), 1494 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1495 shader[i++] = CF_DWORD1(POP_COUNT(0), 1496 CF_CONST(0), 1497 COND(SQ_CF_COND_ACTIVE), 1498 I_COUNT(2), 1499 VALID_PIXEL_MODE(0), 1500 END_OF_PROGRAM(0), 1501 CF_INST(SQ_CF_INST_VC), 1502 WHOLE_QUAD_MODE(0), 1503 BARRIER(1)); 1504 1505 /* 10 - ALU */ 1506 shader[i++] = CF_ALU_DWORD0(ADDR(34), 1507 KCACHE_BANK0(0), 1508 KCACHE_BANK1(0), 1509 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 1510 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1511 KCACHE_ADDR0(0), 1512 KCACHE_ADDR1(0), 1513 I_COUNT(10), 1514 ALT_CONST(0), 1515 CF_INST(SQ_CF_INST_ALU), 1516 WHOLE_QUAD_MODE(0), 1517 BARRIER(1)); 1518 1519 /* 11 - dst */ 1520 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1521 TYPE(SQ_EXPORT_POS), 1522 RW_GPR(1), 1523 RW_REL(ABSOLUTE), 1524 INDEX_GPR(0), 1525 ELEM_SIZE(0)); 1526 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1527 SRC_SEL_Y(SQ_SEL_Y), 1528 SRC_SEL_Z(SQ_SEL_0), 1529 SRC_SEL_W(SQ_SEL_1), 1530 BURST_COUNT(0), 1531 VALID_PIXEL_MODE(0), 1532 END_OF_PROGRAM(0), 1533 CF_INST(SQ_CF_INST_EXPORT_DONE), 1534 MARK(0), 1535 BARRIER(1)); 1536 /* 12 - src */ 1537 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1538 TYPE(SQ_EXPORT_PARAM), 1539 RW_GPR(0), 1540 RW_REL(ABSOLUTE), 1541 INDEX_GPR(0), 1542 ELEM_SIZE(0)); 1543 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1544 SRC_SEL_Y(SQ_SEL_Y), 1545 SRC_SEL_Z(SQ_SEL_0), 1546 SRC_SEL_W(SQ_SEL_1), 1547 BURST_COUNT(0), 1548 VALID_PIXEL_MODE(0), 1549 END_OF_PROGRAM(0), 1550 CF_INST(SQ_CF_INST_EXPORT_DONE), 1551 MARK(0), 1552 BARRIER(0)); 1553 /* 13 */ 1554 shader[i++] = CF_DWORD0(ADDR(0), 1555 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 1556 shader[i++] = CF_DWORD1(POP_COUNT(0), 1557 CF_CONST(0), 1558 COND(SQ_CF_COND_ACTIVE), 1559 I_COUNT(0), 1560 VALID_PIXEL_MODE(0), 1561 END_OF_PROGRAM(0), 1562 CF_INST(SQ_CF_INST_RETURN), 1563 WHOLE_QUAD_MODE(0), 1564 BARRIER(1)); 1565 1566 /* 14 srcX.x DOT4 - mask */ 1567 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1568 SRC0_REL(ABSOLUTE), 1569 SRC0_ELEM(ELEM_X), 1570 SRC0_NEG(0), 1571 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1572 SRC1_REL(ABSOLUTE), 1573 SRC1_ELEM(ELEM_X), 1574 SRC1_NEG(0), 1575 INDEX_MODE(SQ_INDEX_LOOP), 1576 PRED_SEL(SQ_PRED_SEL_OFF), 1577 LAST(0)); 1578 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1579 SRC1_ABS(0), 1580 UPDATE_EXECUTE_MASK(0), 1581 UPDATE_PRED(0), 1582 WRITE_MASK(1), 1583 OMOD(SQ_ALU_OMOD_OFF), 1584 ALU_INST(SQ_OP2_INST_DOT4), 1585 BANK_SWIZZLE(SQ_ALU_VEC_012), 1586 DST_GPR(3), 1587 DST_REL(ABSOLUTE), 1588 DST_ELEM(ELEM_X), 1589 CLAMP(0)); 1590 1591 /* 15 srcX.y DOT4 - mask */ 1592 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1593 SRC0_REL(ABSOLUTE), 1594 SRC0_ELEM(ELEM_Y), 1595 SRC0_NEG(0), 1596 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1597 SRC1_REL(ABSOLUTE), 1598 SRC1_ELEM(ELEM_Y), 1599 SRC1_NEG(0), 1600 INDEX_MODE(SQ_INDEX_LOOP), 1601 PRED_SEL(SQ_PRED_SEL_OFF), 1602 LAST(0)); 1603 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1604 SRC1_ABS(0), 1605 UPDATE_EXECUTE_MASK(0), 1606 UPDATE_PRED(0), 1607 WRITE_MASK(0), 1608 OMOD(SQ_ALU_OMOD_OFF), 1609 ALU_INST(SQ_OP2_INST_DOT4), 1610 BANK_SWIZZLE(SQ_ALU_VEC_012), 1611 DST_GPR(3), 1612 DST_REL(ABSOLUTE), 1613 DST_ELEM(ELEM_Y), 1614 CLAMP(0)); 1615 1616 /* 16 srcX.z DOT4 - mask */ 1617 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1618 SRC0_REL(ABSOLUTE), 1619 SRC0_ELEM(ELEM_Z), 1620 SRC0_NEG(0), 1621 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1622 SRC1_REL(ABSOLUTE), 1623 SRC1_ELEM(ELEM_Z), 1624 SRC1_NEG(0), 1625 INDEX_MODE(SQ_INDEX_LOOP), 1626 PRED_SEL(SQ_PRED_SEL_OFF), 1627 LAST(0)); 1628 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1629 SRC1_ABS(0), 1630 UPDATE_EXECUTE_MASK(0), 1631 UPDATE_PRED(0), 1632 WRITE_MASK(0), 1633 OMOD(SQ_ALU_OMOD_OFF), 1634 ALU_INST(SQ_OP2_INST_DOT4), 1635 BANK_SWIZZLE(SQ_ALU_VEC_012), 1636 DST_GPR(3), 1637 DST_REL(ABSOLUTE), 1638 DST_ELEM(ELEM_Z), 1639 CLAMP(0)); 1640 1641 /* 17 srcX.w DOT4 - mask */ 1642 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1643 SRC0_REL(ABSOLUTE), 1644 SRC0_ELEM(ELEM_W), 1645 SRC0_NEG(0), 1646 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1647 SRC1_REL(ABSOLUTE), 1648 SRC1_ELEM(ELEM_W), 1649 SRC1_NEG(0), 1650 INDEX_MODE(SQ_INDEX_LOOP), 1651 PRED_SEL(SQ_PRED_SEL_OFF), 1652 LAST(1)); 1653 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1654 SRC1_ABS(0), 1655 UPDATE_EXECUTE_MASK(0), 1656 UPDATE_PRED(0), 1657 WRITE_MASK(0), 1658 OMOD(SQ_ALU_OMOD_OFF), 1659 ALU_INST(SQ_OP2_INST_DOT4), 1660 BANK_SWIZZLE(SQ_ALU_VEC_012), 1661 DST_GPR(3), 1662 DST_REL(ABSOLUTE), 1663 DST_ELEM(ELEM_W), 1664 CLAMP(0)); 1665 1666 /* 18 srcY.x DOT4 - mask */ 1667 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1668 SRC0_REL(ABSOLUTE), 1669 SRC0_ELEM(ELEM_X), 1670 SRC0_NEG(0), 1671 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1672 SRC1_REL(ABSOLUTE), 1673 SRC1_ELEM(ELEM_X), 1674 SRC1_NEG(0), 1675 INDEX_MODE(SQ_INDEX_LOOP), 1676 PRED_SEL(SQ_PRED_SEL_OFF), 1677 LAST(0)); 1678 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1679 SRC1_ABS(0), 1680 UPDATE_EXECUTE_MASK(0), 1681 UPDATE_PRED(0), 1682 WRITE_MASK(0), 1683 OMOD(SQ_ALU_OMOD_OFF), 1684 ALU_INST(SQ_OP2_INST_DOT4), 1685 BANK_SWIZZLE(SQ_ALU_VEC_012), 1686 DST_GPR(3), 1687 DST_REL(ABSOLUTE), 1688 DST_ELEM(ELEM_X), 1689 CLAMP(0)); 1690 1691 /* 19 srcY.y DOT4 - mask */ 1692 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1693 SRC0_REL(ABSOLUTE), 1694 SRC0_ELEM(ELEM_Y), 1695 SRC0_NEG(0), 1696 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1697 SRC1_REL(ABSOLUTE), 1698 SRC1_ELEM(ELEM_Y), 1699 SRC1_NEG(0), 1700 INDEX_MODE(SQ_INDEX_LOOP), 1701 PRED_SEL(SQ_PRED_SEL_OFF), 1702 LAST(0)); 1703 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1704 SRC1_ABS(0), 1705 UPDATE_EXECUTE_MASK(0), 1706 UPDATE_PRED(0), 1707 WRITE_MASK(1), 1708 OMOD(SQ_ALU_OMOD_OFF), 1709 ALU_INST(SQ_OP2_INST_DOT4), 1710 BANK_SWIZZLE(SQ_ALU_VEC_012), 1711 DST_GPR(3), 1712 DST_REL(ABSOLUTE), 1713 DST_ELEM(ELEM_Y), 1714 CLAMP(0)); 1715 1716 /* 20 srcY.z DOT4 - mask */ 1717 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1718 SRC0_REL(ABSOLUTE), 1719 SRC0_ELEM(ELEM_Z), 1720 SRC0_NEG(0), 1721 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1722 SRC1_REL(ABSOLUTE), 1723 SRC1_ELEM(ELEM_Z), 1724 SRC1_NEG(0), 1725 INDEX_MODE(SQ_INDEX_LOOP), 1726 PRED_SEL(SQ_PRED_SEL_OFF), 1727 LAST(0)); 1728 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1729 SRC1_ABS(0), 1730 UPDATE_EXECUTE_MASK(0), 1731 UPDATE_PRED(0), 1732 WRITE_MASK(0), 1733 OMOD(SQ_ALU_OMOD_OFF), 1734 ALU_INST(SQ_OP2_INST_DOT4), 1735 BANK_SWIZZLE(SQ_ALU_VEC_012), 1736 DST_GPR(3), 1737 DST_REL(ABSOLUTE), 1738 DST_ELEM(ELEM_Z), 1739 CLAMP(0)); 1740 1741 /* 21 srcY.w DOT4 - mask */ 1742 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1743 SRC0_REL(ABSOLUTE), 1744 SRC0_ELEM(ELEM_W), 1745 SRC0_NEG(0), 1746 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1747 SRC1_REL(ABSOLUTE), 1748 SRC1_ELEM(ELEM_W), 1749 SRC1_NEG(0), 1750 INDEX_MODE(SQ_INDEX_LOOP), 1751 PRED_SEL(SQ_PRED_SEL_OFF), 1752 LAST(1)); 1753 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1754 SRC1_ABS(0), 1755 UPDATE_EXECUTE_MASK(0), 1756 UPDATE_PRED(0), 1757 WRITE_MASK(0), 1758 OMOD(SQ_ALU_OMOD_OFF), 1759 ALU_INST(SQ_OP2_INST_DOT4), 1760 BANK_SWIZZLE(SQ_ALU_VEC_012), 1761 DST_GPR(3), 1762 DST_REL(ABSOLUTE), 1763 DST_ELEM(ELEM_W), 1764 CLAMP(0)); 1765 1766 /* 22 maskX.x DOT4 - mask */ 1767 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1768 SRC0_REL(ABSOLUTE), 1769 SRC0_ELEM(ELEM_X), 1770 SRC0_NEG(0), 1771 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1772 SRC1_REL(ABSOLUTE), 1773 SRC1_ELEM(ELEM_X), 1774 SRC1_NEG(0), 1775 INDEX_MODE(SQ_INDEX_LOOP), 1776 PRED_SEL(SQ_PRED_SEL_OFF), 1777 LAST(0)); 1778 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1779 SRC1_ABS(0), 1780 UPDATE_EXECUTE_MASK(0), 1781 UPDATE_PRED(0), 1782 WRITE_MASK(1), 1783 OMOD(SQ_ALU_OMOD_OFF), 1784 ALU_INST(SQ_OP2_INST_DOT4), 1785 BANK_SWIZZLE(SQ_ALU_VEC_012), 1786 DST_GPR(4), 1787 DST_REL(ABSOLUTE), 1788 DST_ELEM(ELEM_X), 1789 CLAMP(0)); 1790 1791 /* 23 maskX.y DOT4 - mask */ 1792 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1793 SRC0_REL(ABSOLUTE), 1794 SRC0_ELEM(ELEM_Y), 1795 SRC0_NEG(0), 1796 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1797 SRC1_REL(ABSOLUTE), 1798 SRC1_ELEM(ELEM_Y), 1799 SRC1_NEG(0), 1800 INDEX_MODE(SQ_INDEX_LOOP), 1801 PRED_SEL(SQ_PRED_SEL_OFF), 1802 LAST(0)); 1803 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1804 SRC1_ABS(0), 1805 UPDATE_EXECUTE_MASK(0), 1806 UPDATE_PRED(0), 1807 WRITE_MASK(0), 1808 OMOD(SQ_ALU_OMOD_OFF), 1809 ALU_INST(SQ_OP2_INST_DOT4), 1810 BANK_SWIZZLE(SQ_ALU_VEC_012), 1811 DST_GPR(4), 1812 DST_REL(ABSOLUTE), 1813 DST_ELEM(ELEM_Y), 1814 CLAMP(0)); 1815 1816 /* 24 maskX.z DOT4 - mask */ 1817 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1818 SRC0_REL(ABSOLUTE), 1819 SRC0_ELEM(ELEM_Z), 1820 SRC0_NEG(0), 1821 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1822 SRC1_REL(ABSOLUTE), 1823 SRC1_ELEM(ELEM_Z), 1824 SRC1_NEG(0), 1825 INDEX_MODE(SQ_INDEX_LOOP), 1826 PRED_SEL(SQ_PRED_SEL_OFF), 1827 LAST(0)); 1828 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1829 SRC1_ABS(0), 1830 UPDATE_EXECUTE_MASK(0), 1831 UPDATE_PRED(0), 1832 WRITE_MASK(0), 1833 OMOD(SQ_ALU_OMOD_OFF), 1834 ALU_INST(SQ_OP2_INST_DOT4), 1835 BANK_SWIZZLE(SQ_ALU_VEC_012), 1836 DST_GPR(4), 1837 DST_REL(ABSOLUTE), 1838 DST_ELEM(ELEM_Z), 1839 CLAMP(0)); 1840 1841 /* 25 maskX.w DOT4 - mask */ 1842 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1843 SRC0_REL(ABSOLUTE), 1844 SRC0_ELEM(ELEM_W), 1845 SRC0_NEG(0), 1846 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 1847 SRC1_REL(ABSOLUTE), 1848 SRC1_ELEM(ELEM_W), 1849 SRC1_NEG(0), 1850 INDEX_MODE(SQ_INDEX_LOOP), 1851 PRED_SEL(SQ_PRED_SEL_OFF), 1852 LAST(1)); 1853 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1854 SRC1_ABS(0), 1855 UPDATE_EXECUTE_MASK(0), 1856 UPDATE_PRED(0), 1857 WRITE_MASK(0), 1858 OMOD(SQ_ALU_OMOD_OFF), 1859 ALU_INST(SQ_OP2_INST_DOT4), 1860 BANK_SWIZZLE(SQ_ALU_VEC_012), 1861 DST_GPR(4), 1862 DST_REL(ABSOLUTE), 1863 DST_ELEM(ELEM_W), 1864 CLAMP(0)); 1865 1866 /* 26 maskY.x DOT4 - mask */ 1867 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1868 SRC0_REL(ABSOLUTE), 1869 SRC0_ELEM(ELEM_X), 1870 SRC0_NEG(0), 1871 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1872 SRC1_REL(ABSOLUTE), 1873 SRC1_ELEM(ELEM_X), 1874 SRC1_NEG(0), 1875 INDEX_MODE(SQ_INDEX_LOOP), 1876 PRED_SEL(SQ_PRED_SEL_OFF), 1877 LAST(0)); 1878 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1879 SRC1_ABS(0), 1880 UPDATE_EXECUTE_MASK(0), 1881 UPDATE_PRED(0), 1882 WRITE_MASK(0), 1883 OMOD(SQ_ALU_OMOD_OFF), 1884 ALU_INST(SQ_OP2_INST_DOT4), 1885 BANK_SWIZZLE(SQ_ALU_VEC_012), 1886 DST_GPR(4), 1887 DST_REL(ABSOLUTE), 1888 DST_ELEM(ELEM_X), 1889 CLAMP(0)); 1890 1891 /* 27 maskY.y DOT4 - mask */ 1892 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1893 SRC0_REL(ABSOLUTE), 1894 SRC0_ELEM(ELEM_Y), 1895 SRC0_NEG(0), 1896 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1897 SRC1_REL(ABSOLUTE), 1898 SRC1_ELEM(ELEM_Y), 1899 SRC1_NEG(0), 1900 INDEX_MODE(SQ_INDEX_LOOP), 1901 PRED_SEL(SQ_PRED_SEL_OFF), 1902 LAST(0)); 1903 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1904 SRC1_ABS(0), 1905 UPDATE_EXECUTE_MASK(0), 1906 UPDATE_PRED(0), 1907 WRITE_MASK(1), 1908 OMOD(SQ_ALU_OMOD_OFF), 1909 ALU_INST(SQ_OP2_INST_DOT4), 1910 BANK_SWIZZLE(SQ_ALU_VEC_012), 1911 DST_GPR(4), 1912 DST_REL(ABSOLUTE), 1913 DST_ELEM(ELEM_Y), 1914 CLAMP(0)); 1915 1916 /* 28 maskY.z DOT4 - mask */ 1917 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1918 SRC0_REL(ABSOLUTE), 1919 SRC0_ELEM(ELEM_Z), 1920 SRC0_NEG(0), 1921 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1922 SRC1_REL(ABSOLUTE), 1923 SRC1_ELEM(ELEM_Z), 1924 SRC1_NEG(0), 1925 INDEX_MODE(SQ_INDEX_LOOP), 1926 PRED_SEL(SQ_PRED_SEL_OFF), 1927 LAST(0)); 1928 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1929 SRC1_ABS(0), 1930 UPDATE_EXECUTE_MASK(0), 1931 UPDATE_PRED(0), 1932 WRITE_MASK(0), 1933 OMOD(SQ_ALU_OMOD_OFF), 1934 ALU_INST(SQ_OP2_INST_DOT4), 1935 BANK_SWIZZLE(SQ_ALU_VEC_012), 1936 DST_GPR(4), 1937 DST_REL(ABSOLUTE), 1938 DST_ELEM(ELEM_Z), 1939 CLAMP(0)); 1940 1941 /* 29 maskY.w DOT4 - mask */ 1942 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1943 SRC0_REL(ABSOLUTE), 1944 SRC0_ELEM(ELEM_W), 1945 SRC0_NEG(0), 1946 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 1947 SRC1_REL(ABSOLUTE), 1948 SRC1_ELEM(ELEM_W), 1949 SRC1_NEG(0), 1950 INDEX_MODE(SQ_INDEX_LOOP), 1951 PRED_SEL(SQ_PRED_SEL_OFF), 1952 LAST(1)); 1953 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1954 SRC1_ABS(0), 1955 UPDATE_EXECUTE_MASK(0), 1956 UPDATE_PRED(0), 1957 WRITE_MASK(0), 1958 OMOD(SQ_ALU_OMOD_OFF), 1959 ALU_INST(SQ_OP2_INST_DOT4), 1960 BANK_SWIZZLE(SQ_ALU_VEC_012), 1961 DST_GPR(4), 1962 DST_REL(ABSOLUTE), 1963 DST_ELEM(ELEM_W), 1964 CLAMP(0)); 1965 1966 /* 30 srcX / w */ 1967 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1968 SRC0_REL(ABSOLUTE), 1969 SRC0_ELEM(ELEM_X), 1970 SRC0_NEG(0), 1971 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 1972 SRC1_REL(ABSOLUTE), 1973 SRC1_ELEM(ELEM_W), 1974 SRC1_NEG(0), 1975 INDEX_MODE(SQ_INDEX_AR_X), 1976 PRED_SEL(SQ_PRED_SEL_OFF), 1977 LAST(1)); 1978 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 1979 SRC1_ABS(0), 1980 UPDATE_EXECUTE_MASK(0), 1981 UPDATE_PRED(0), 1982 WRITE_MASK(1), 1983 OMOD(SQ_ALU_OMOD_OFF), 1984 ALU_INST(SQ_OP2_INST_MUL), 1985 BANK_SWIZZLE(SQ_ALU_VEC_012), 1986 DST_GPR(1), 1987 DST_REL(ABSOLUTE), 1988 DST_ELEM(ELEM_X), 1989 CLAMP(0)); 1990 1991 /* 31 srcY / h */ 1992 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1993 SRC0_REL(ABSOLUTE), 1994 SRC0_ELEM(ELEM_Y), 1995 SRC0_NEG(0), 1996 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 1997 SRC1_REL(ABSOLUTE), 1998 SRC1_ELEM(ELEM_W), 1999 SRC1_NEG(0), 2000 INDEX_MODE(SQ_INDEX_AR_X), 2001 PRED_SEL(SQ_PRED_SEL_OFF), 2002 LAST(1)); 2003 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2004 SRC1_ABS(0), 2005 UPDATE_EXECUTE_MASK(0), 2006 UPDATE_PRED(0), 2007 WRITE_MASK(1), 2008 OMOD(SQ_ALU_OMOD_OFF), 2009 ALU_INST(SQ_OP2_INST_MUL), 2010 BANK_SWIZZLE(SQ_ALU_VEC_012), 2011 DST_GPR(1), 2012 DST_REL(ABSOLUTE), 2013 DST_ELEM(ELEM_Y), 2014 CLAMP(0)); 2015 2016 /* 32 maskX / w */ 2017 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 2018 SRC0_REL(ABSOLUTE), 2019 SRC0_ELEM(ELEM_X), 2020 SRC0_NEG(0), 2021 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), 2022 SRC1_REL(ABSOLUTE), 2023 SRC1_ELEM(ELEM_W), 2024 SRC1_NEG(0), 2025 INDEX_MODE(SQ_INDEX_AR_X), 2026 PRED_SEL(SQ_PRED_SEL_OFF), 2027 LAST(1)); 2028 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2029 SRC1_ABS(0), 2030 UPDATE_EXECUTE_MASK(0), 2031 UPDATE_PRED(0), 2032 WRITE_MASK(1), 2033 OMOD(SQ_ALU_OMOD_OFF), 2034 ALU_INST(SQ_OP2_INST_MUL), 2035 BANK_SWIZZLE(SQ_ALU_VEC_012), 2036 DST_GPR(0), 2037 DST_REL(ABSOLUTE), 2038 DST_ELEM(ELEM_X), 2039 CLAMP(0)); 2040 2041 /* 33 maskY / h */ 2042 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 2043 SRC0_REL(ABSOLUTE), 2044 SRC0_ELEM(ELEM_Y), 2045 SRC0_NEG(0), 2046 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), 2047 SRC1_REL(ABSOLUTE), 2048 SRC1_ELEM(ELEM_W), 2049 SRC1_NEG(0), 2050 INDEX_MODE(SQ_INDEX_AR_X), 2051 PRED_SEL(SQ_PRED_SEL_OFF), 2052 LAST(1)); 2053 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2054 SRC1_ABS(0), 2055 UPDATE_EXECUTE_MASK(0), 2056 UPDATE_PRED(0), 2057 WRITE_MASK(1), 2058 OMOD(SQ_ALU_OMOD_OFF), 2059 ALU_INST(SQ_OP2_INST_MUL), 2060 BANK_SWIZZLE(SQ_ALU_VEC_012), 2061 DST_GPR(0), 2062 DST_REL(ABSOLUTE), 2063 DST_ELEM(ELEM_Y), 2064 CLAMP(0)); 2065 2066 /* 34 srcX.x DOT4 - non-mask */ 2067 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2068 SRC0_REL(ABSOLUTE), 2069 SRC0_ELEM(ELEM_X), 2070 SRC0_NEG(0), 2071 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2072 SRC1_REL(ABSOLUTE), 2073 SRC1_ELEM(ELEM_X), 2074 SRC1_NEG(0), 2075 INDEX_MODE(SQ_INDEX_LOOP), 2076 PRED_SEL(SQ_PRED_SEL_OFF), 2077 LAST(0)); 2078 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2079 SRC1_ABS(0), 2080 UPDATE_EXECUTE_MASK(0), 2081 UPDATE_PRED(0), 2082 WRITE_MASK(1), 2083 OMOD(SQ_ALU_OMOD_OFF), 2084 ALU_INST(SQ_OP2_INST_DOT4), 2085 BANK_SWIZZLE(SQ_ALU_VEC_012), 2086 DST_GPR(2), 2087 DST_REL(ABSOLUTE), 2088 DST_ELEM(ELEM_X), 2089 CLAMP(0)); 2090 2091 /* 35 srcX.y DOT4 - non-mask */ 2092 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2093 SRC0_REL(ABSOLUTE), 2094 SRC0_ELEM(ELEM_Y), 2095 SRC0_NEG(0), 2096 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2097 SRC1_REL(ABSOLUTE), 2098 SRC1_ELEM(ELEM_Y), 2099 SRC1_NEG(0), 2100 INDEX_MODE(SQ_INDEX_LOOP), 2101 PRED_SEL(SQ_PRED_SEL_OFF), 2102 LAST(0)); 2103 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2104 SRC1_ABS(0), 2105 UPDATE_EXECUTE_MASK(0), 2106 UPDATE_PRED(0), 2107 WRITE_MASK(0), 2108 OMOD(SQ_ALU_OMOD_OFF), 2109 ALU_INST(SQ_OP2_INST_DOT4), 2110 BANK_SWIZZLE(SQ_ALU_VEC_012), 2111 DST_GPR(2), 2112 DST_REL(ABSOLUTE), 2113 DST_ELEM(ELEM_Y), 2114 CLAMP(0)); 2115 2116 /* 36 srcX.z DOT4 - non-mask */ 2117 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2118 SRC0_REL(ABSOLUTE), 2119 SRC0_ELEM(ELEM_Z), 2120 SRC0_NEG(0), 2121 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2122 SRC1_REL(ABSOLUTE), 2123 SRC1_ELEM(ELEM_Z), 2124 SRC1_NEG(0), 2125 INDEX_MODE(SQ_INDEX_LOOP), 2126 PRED_SEL(SQ_PRED_SEL_OFF), 2127 LAST(0)); 2128 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2129 SRC1_ABS(0), 2130 UPDATE_EXECUTE_MASK(0), 2131 UPDATE_PRED(0), 2132 WRITE_MASK(0), 2133 OMOD(SQ_ALU_OMOD_OFF), 2134 ALU_INST(SQ_OP2_INST_DOT4), 2135 BANK_SWIZZLE(SQ_ALU_VEC_012), 2136 DST_GPR(2), 2137 DST_REL(ABSOLUTE), 2138 DST_ELEM(ELEM_Z), 2139 CLAMP(0)); 2140 2141 /* 37 srcX.w DOT4 - non-mask */ 2142 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2143 SRC0_REL(ABSOLUTE), 2144 SRC0_ELEM(ELEM_W), 2145 SRC0_NEG(0), 2146 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2147 SRC1_REL(ABSOLUTE), 2148 SRC1_ELEM(ELEM_W), 2149 SRC1_NEG(0), 2150 INDEX_MODE(SQ_INDEX_LOOP), 2151 PRED_SEL(SQ_PRED_SEL_OFF), 2152 LAST(1)); 2153 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2154 SRC1_ABS(0), 2155 UPDATE_EXECUTE_MASK(0), 2156 UPDATE_PRED(0), 2157 WRITE_MASK(0), 2158 OMOD(SQ_ALU_OMOD_OFF), 2159 ALU_INST(SQ_OP2_INST_DOT4), 2160 BANK_SWIZZLE(SQ_ALU_VEC_012), 2161 DST_GPR(2), 2162 DST_REL(ABSOLUTE), 2163 DST_ELEM(ELEM_W), 2164 CLAMP(0)); 2165 2166 /* 38 srcY.x DOT4 - non-mask */ 2167 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2168 SRC0_REL(ABSOLUTE), 2169 SRC0_ELEM(ELEM_X), 2170 SRC0_NEG(0), 2171 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2172 SRC1_REL(ABSOLUTE), 2173 SRC1_ELEM(ELEM_X), 2174 SRC1_NEG(0), 2175 INDEX_MODE(SQ_INDEX_LOOP), 2176 PRED_SEL(SQ_PRED_SEL_OFF), 2177 LAST(0)); 2178 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2179 SRC1_ABS(0), 2180 UPDATE_EXECUTE_MASK(0), 2181 UPDATE_PRED(0), 2182 WRITE_MASK(0), 2183 OMOD(SQ_ALU_OMOD_OFF), 2184 ALU_INST(SQ_OP2_INST_DOT4), 2185 BANK_SWIZZLE(SQ_ALU_VEC_012), 2186 DST_GPR(2), 2187 DST_REL(ABSOLUTE), 2188 DST_ELEM(ELEM_X), 2189 CLAMP(0)); 2190 2191 /* 39 srcY.y DOT4 - non-mask */ 2192 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2193 SRC0_REL(ABSOLUTE), 2194 SRC0_ELEM(ELEM_Y), 2195 SRC0_NEG(0), 2196 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2197 SRC1_REL(ABSOLUTE), 2198 SRC1_ELEM(ELEM_Y), 2199 SRC1_NEG(0), 2200 INDEX_MODE(SQ_INDEX_LOOP), 2201 PRED_SEL(SQ_PRED_SEL_OFF), 2202 LAST(0)); 2203 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2204 SRC1_ABS(0), 2205 UPDATE_EXECUTE_MASK(0), 2206 UPDATE_PRED(0), 2207 WRITE_MASK(1), 2208 OMOD(SQ_ALU_OMOD_OFF), 2209 ALU_INST(SQ_OP2_INST_DOT4), 2210 BANK_SWIZZLE(SQ_ALU_VEC_012), 2211 DST_GPR(2), 2212 DST_REL(ABSOLUTE), 2213 DST_ELEM(ELEM_Y), 2214 CLAMP(0)); 2215 2216 /* 40 srcY.z DOT4 - non-mask */ 2217 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2218 SRC0_REL(ABSOLUTE), 2219 SRC0_ELEM(ELEM_Z), 2220 SRC0_NEG(0), 2221 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2222 SRC1_REL(ABSOLUTE), 2223 SRC1_ELEM(ELEM_Z), 2224 SRC1_NEG(0), 2225 INDEX_MODE(SQ_INDEX_LOOP), 2226 PRED_SEL(SQ_PRED_SEL_OFF), 2227 LAST(0)); 2228 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2229 SRC1_ABS(0), 2230 UPDATE_EXECUTE_MASK(0), 2231 UPDATE_PRED(0), 2232 WRITE_MASK(0), 2233 OMOD(SQ_ALU_OMOD_OFF), 2234 ALU_INST(SQ_OP2_INST_DOT4), 2235 BANK_SWIZZLE(SQ_ALU_VEC_012), 2236 DST_GPR(2), 2237 DST_REL(ABSOLUTE), 2238 DST_ELEM(ELEM_Z), 2239 CLAMP(0)); 2240 2241 /* 41 srcY.w DOT4 - non-mask */ 2242 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2243 SRC0_REL(ABSOLUTE), 2244 SRC0_ELEM(ELEM_W), 2245 SRC0_NEG(0), 2246 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2247 SRC1_REL(ABSOLUTE), 2248 SRC1_ELEM(ELEM_W), 2249 SRC1_NEG(0), 2250 INDEX_MODE(SQ_INDEX_LOOP), 2251 PRED_SEL(SQ_PRED_SEL_OFF), 2252 LAST(1)); 2253 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2254 SRC1_ABS(0), 2255 UPDATE_EXECUTE_MASK(0), 2256 UPDATE_PRED(0), 2257 WRITE_MASK(0), 2258 OMOD(SQ_ALU_OMOD_OFF), 2259 ALU_INST(SQ_OP2_INST_DOT4), 2260 BANK_SWIZZLE(SQ_ALU_VEC_012), 2261 DST_GPR(2), 2262 DST_REL(ABSOLUTE), 2263 DST_ELEM(ELEM_W), 2264 CLAMP(0)); 2265 2266 /* 42 srcX / w */ 2267 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2268 SRC0_REL(ABSOLUTE), 2269 SRC0_ELEM(ELEM_X), 2270 SRC0_NEG(0), 2271 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), 2272 SRC1_REL(ABSOLUTE), 2273 SRC1_ELEM(ELEM_W), 2274 SRC1_NEG(0), 2275 INDEX_MODE(SQ_INDEX_AR_X), 2276 PRED_SEL(SQ_PRED_SEL_OFF), 2277 LAST(1)); 2278 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2279 SRC1_ABS(0), 2280 UPDATE_EXECUTE_MASK(0), 2281 UPDATE_PRED(0), 2282 WRITE_MASK(1), 2283 OMOD(SQ_ALU_OMOD_OFF), 2284 ALU_INST(SQ_OP2_INST_MUL), 2285 BANK_SWIZZLE(SQ_ALU_VEC_012), 2286 DST_GPR(0), 2287 DST_REL(ABSOLUTE), 2288 DST_ELEM(ELEM_X), 2289 CLAMP(0)); 2290 2291 /* 43 srcY / h */ 2292 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2293 SRC0_REL(ABSOLUTE), 2294 SRC0_ELEM(ELEM_Y), 2295 SRC0_NEG(0), 2296 SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), 2297 SRC1_REL(ABSOLUTE), 2298 SRC1_ELEM(ELEM_W), 2299 SRC1_NEG(0), 2300 INDEX_MODE(SQ_INDEX_AR_X), 2301 PRED_SEL(SQ_PRED_SEL_OFF), 2302 LAST(1)); 2303 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2304 SRC1_ABS(0), 2305 UPDATE_EXECUTE_MASK(0), 2306 UPDATE_PRED(0), 2307 WRITE_MASK(1), 2308 OMOD(SQ_ALU_OMOD_OFF), 2309 ALU_INST(SQ_OP2_INST_MUL), 2310 BANK_SWIZZLE(SQ_ALU_VEC_012), 2311 DST_GPR(0), 2312 DST_REL(ABSOLUTE), 2313 DST_ELEM(ELEM_Y), 2314 CLAMP(0)); 2315 2316 /* mask vfetch - 44/45 - dst */ 2317 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2318 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2319 FETCH_WHOLE_QUAD(0), 2320 BUFFER_ID(0), 2321 SRC_GPR(0), 2322 SRC_REL(ABSOLUTE), 2323 SRC_SEL_X(SQ_SEL_X), 2324 MEGA_FETCH_COUNT(24)); 2325 shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), 2326 DST_REL(0), 2327 DST_SEL_X(SQ_SEL_X), 2328 DST_SEL_Y(SQ_SEL_Y), 2329 DST_SEL_Z(SQ_SEL_0), 2330 DST_SEL_W(SQ_SEL_1), 2331 USE_CONST_FIELDS(0), 2332 DATA_FORMAT(FMT_32_32_FLOAT), 2333 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2334 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2335 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2336 shader[i++] = VTX_DWORD2(OFFSET(0), 2337#if X_BYTE_ORDER == X_BIG_ENDIAN 2338 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2339#else 2340 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2341#endif 2342 CONST_BUF_NO_STRIDE(0), 2343 MEGA_FETCH(1), 2344 ALT_CONST(0), 2345 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2346 shader[i++] = VTX_DWORD_PAD; 2347 /* 46/47 - src */ 2348 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2349 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2350 FETCH_WHOLE_QUAD(0), 2351 BUFFER_ID(0), 2352 SRC_GPR(0), 2353 SRC_REL(ABSOLUTE), 2354 SRC_SEL_X(SQ_SEL_X), 2355 MEGA_FETCH_COUNT(8)); 2356 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2357 DST_REL(0), 2358 DST_SEL_X(SQ_SEL_X), 2359 DST_SEL_Y(SQ_SEL_Y), 2360 DST_SEL_Z(SQ_SEL_1), 2361 DST_SEL_W(SQ_SEL_0), 2362 USE_CONST_FIELDS(0), 2363 DATA_FORMAT(FMT_32_32_FLOAT), 2364 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2365 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2366 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2367 shader[i++] = VTX_DWORD2(OFFSET(8), 2368#if X_BYTE_ORDER == X_BIG_ENDIAN 2369 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2370#else 2371 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2372#endif 2373 CONST_BUF_NO_STRIDE(0), 2374 MEGA_FETCH(0), 2375 ALT_CONST(0), 2376 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2377 shader[i++] = VTX_DWORD_PAD; 2378 /* 48/49 - mask */ 2379 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2380 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2381 FETCH_WHOLE_QUAD(0), 2382 BUFFER_ID(0), 2383 SRC_GPR(0), 2384 SRC_REL(ABSOLUTE), 2385 SRC_SEL_X(SQ_SEL_X), 2386 MEGA_FETCH_COUNT(8)); 2387 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2388 DST_REL(0), 2389 DST_SEL_X(SQ_SEL_X), 2390 DST_SEL_Y(SQ_SEL_Y), 2391 DST_SEL_Z(SQ_SEL_1), 2392 DST_SEL_W(SQ_SEL_0), 2393 USE_CONST_FIELDS(0), 2394 DATA_FORMAT(FMT_32_32_FLOAT), 2395 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2396 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2397 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2398 shader[i++] = VTX_DWORD2(OFFSET(16), 2399#if X_BYTE_ORDER == X_BIG_ENDIAN 2400 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2401#else 2402 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2403#endif 2404 CONST_BUF_NO_STRIDE(0), 2405 MEGA_FETCH(0), 2406 ALT_CONST(0), 2407 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2408 shader[i++] = VTX_DWORD_PAD; 2409 2410 /* no mask vfetch - 50/51 - dst */ 2411 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2412 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2413 FETCH_WHOLE_QUAD(0), 2414 BUFFER_ID(0), 2415 SRC_GPR(0), 2416 SRC_REL(ABSOLUTE), 2417 SRC_SEL_X(SQ_SEL_X), 2418 MEGA_FETCH_COUNT(16)); 2419 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2420 DST_REL(0), 2421 DST_SEL_X(SQ_SEL_X), 2422 DST_SEL_Y(SQ_SEL_Y), 2423 DST_SEL_Z(SQ_SEL_0), 2424 DST_SEL_W(SQ_SEL_1), 2425 USE_CONST_FIELDS(0), 2426 DATA_FORMAT(FMT_32_32_FLOAT), 2427 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2428 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2429 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2430 shader[i++] = VTX_DWORD2(OFFSET(0), 2431#if X_BYTE_ORDER == X_BIG_ENDIAN 2432 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2433#else 2434 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2435#endif 2436 CONST_BUF_NO_STRIDE(0), 2437 MEGA_FETCH(1), 2438 ALT_CONST(0), 2439 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2440 shader[i++] = VTX_DWORD_PAD; 2441 /* 52/53 - src */ 2442 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2443 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2444 FETCH_WHOLE_QUAD(0), 2445 BUFFER_ID(0), 2446 SRC_GPR(0), 2447 SRC_REL(ABSOLUTE), 2448 SRC_SEL_X(SQ_SEL_X), 2449 MEGA_FETCH_COUNT(8)); 2450 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2451 DST_REL(0), 2452 DST_SEL_X(SQ_SEL_X), 2453 DST_SEL_Y(SQ_SEL_Y), 2454 DST_SEL_Z(SQ_SEL_1), 2455 DST_SEL_W(SQ_SEL_0), 2456 USE_CONST_FIELDS(0), 2457 DATA_FORMAT(FMT_32_32_FLOAT), 2458 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2459 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2460 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2461 shader[i++] = VTX_DWORD2(OFFSET(8), 2462#if X_BYTE_ORDER == X_BIG_ENDIAN 2463 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2464#else 2465 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2466#endif 2467 CONST_BUF_NO_STRIDE(0), 2468 MEGA_FETCH(0), 2469 ALT_CONST(0), 2470 BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); 2471 shader[i++] = VTX_DWORD_PAD; 2472 2473 return i; 2474} 2475 2476/* comp ps --------------------------------------- */ 2477int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) 2478{ 2479 int i = 0; 2480 2481 /* 0 */ 2482 shader[i++] = CF_DWORD0(ADDR(3), 2483 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2484 shader[i++] = CF_DWORD1(POP_COUNT(0), 2485 CF_CONST(0), 2486 COND(SQ_CF_COND_BOOL), 2487 I_COUNT(0), 2488 VALID_PIXEL_MODE(0), 2489 END_OF_PROGRAM(0), 2490 CF_INST(SQ_CF_INST_CALL), 2491 WHOLE_QUAD_MODE(0), 2492 BARRIER(0)); 2493 /* 1 */ 2494 shader[i++] = CF_DWORD0(ADDR(8), 2495 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2496 shader[i++] = CF_DWORD1(POP_COUNT(0), 2497 CF_CONST(0), 2498 COND(SQ_CF_COND_NOT_BOOL), 2499 I_COUNT(0), 2500 VALID_PIXEL_MODE(0), 2501 END_OF_PROGRAM(0), 2502 CF_INST(SQ_CF_INST_CALL), 2503 WHOLE_QUAD_MODE(0), 2504 BARRIER(0)); 2505 /* 2 */ 2506 shader[i++] = CF_DWORD0(ADDR(0), 2507 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2508 shader[i++] = CF_DWORD1(POP_COUNT(0), 2509 CF_CONST(0), 2510 COND(SQ_CF_COND_ACTIVE), 2511 I_COUNT(0), 2512 VALID_PIXEL_MODE(0), 2513 END_OF_PROGRAM(1), 2514 CF_INST(SQ_CF_INST_NOP), 2515 WHOLE_QUAD_MODE(0), 2516 BARRIER(1)); 2517 2518 /* 3 - mask sub */ 2519 shader[i++] = CF_ALU_DWORD0(ADDR(12), 2520 KCACHE_BANK0(0), 2521 KCACHE_BANK1(0), 2522 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2523 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2524 KCACHE_ADDR0(0), 2525 KCACHE_ADDR1(0), 2526 I_COUNT(8), 2527 ALT_CONST(0), 2528 CF_INST(SQ_CF_INST_ALU), 2529 WHOLE_QUAD_MODE(0), 2530 BARRIER(1)); 2531 2532 /* 4 */ 2533 shader[i++] = CF_DWORD0(ADDR(28), 2534 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2535 shader[i++] = CF_DWORD1(POP_COUNT(0), 2536 CF_CONST(0), 2537 COND(SQ_CF_COND_ACTIVE), 2538 I_COUNT(2), 2539 VALID_PIXEL_MODE(0), 2540 END_OF_PROGRAM(0), 2541 CF_INST(SQ_CF_INST_TC), 2542 WHOLE_QUAD_MODE(0), 2543 BARRIER(1)); 2544 2545 /* 5 */ 2546 shader[i++] = CF_ALU_DWORD0(ADDR(20), 2547 KCACHE_BANK0(0), 2548 KCACHE_BANK1(0), 2549 KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); 2550 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2551 KCACHE_ADDR0(0), 2552 KCACHE_ADDR1(0), 2553 I_COUNT(4), 2554 ALT_CONST(0), 2555 CF_INST(SQ_CF_INST_ALU), 2556 WHOLE_QUAD_MODE(0), 2557 BARRIER(1)); 2558 2559 /* 6 */ 2560 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2561 TYPE(SQ_EXPORT_PIXEL), 2562 RW_GPR(2), 2563 RW_REL(ABSOLUTE), 2564 INDEX_GPR(0), 2565 ELEM_SIZE(1)); 2566 2567 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2568 SRC_SEL_Y(SQ_SEL_Y), 2569 SRC_SEL_Z(SQ_SEL_Z), 2570 SRC_SEL_W(SQ_SEL_W), 2571 BURST_COUNT(1), 2572 VALID_PIXEL_MODE(0), 2573 END_OF_PROGRAM(0), 2574 CF_INST(SQ_CF_INST_EXPORT_DONE), 2575 MARK(0), 2576 BARRIER(1)); 2577 /* 7 */ 2578 shader[i++] = CF_DWORD0(ADDR(0), 2579 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2580 shader[i++] = CF_DWORD1(POP_COUNT(0), 2581 CF_CONST(0), 2582 COND(SQ_CF_COND_ACTIVE), 2583 I_COUNT(0), 2584 VALID_PIXEL_MODE(0), 2585 END_OF_PROGRAM(0), 2586 CF_INST(SQ_CF_INST_RETURN), 2587 WHOLE_QUAD_MODE(0), 2588 BARRIER(1)); 2589 2590 /* 8 - non-mask sub */ 2591 shader[i++] = CF_ALU_DWORD0(ADDR(24), 2592 KCACHE_BANK0(0), 2593 KCACHE_BANK1(0), 2594 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2595 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2596 KCACHE_ADDR0(0), 2597 KCACHE_ADDR1(0), 2598 I_COUNT(4), 2599 ALT_CONST(0), 2600 CF_INST(SQ_CF_INST_ALU), 2601 WHOLE_QUAD_MODE(0), 2602 BARRIER(1)); 2603 /* 9 */ 2604 shader[i++] = CF_DWORD0(ADDR(32), 2605 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2606 shader[i++] = CF_DWORD1(POP_COUNT(0), 2607 CF_CONST(0), 2608 COND(SQ_CF_COND_ACTIVE), 2609 I_COUNT(1), 2610 VALID_PIXEL_MODE(0), 2611 END_OF_PROGRAM(0), 2612 CF_INST(SQ_CF_INST_TC), 2613 WHOLE_QUAD_MODE(0), 2614 BARRIER(1)); 2615 2616 /* 10 */ 2617 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2618 TYPE(SQ_EXPORT_PIXEL), 2619 RW_GPR(0), 2620 RW_REL(ABSOLUTE), 2621 INDEX_GPR(0), 2622 ELEM_SIZE(1)); 2623 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2624 SRC_SEL_Y(SQ_SEL_Y), 2625 SRC_SEL_Z(SQ_SEL_Z), 2626 SRC_SEL_W(SQ_SEL_W), 2627 BURST_COUNT(1), 2628 VALID_PIXEL_MODE(0), 2629 END_OF_PROGRAM(0), 2630 CF_INST(SQ_CF_INST_EXPORT_DONE), 2631 MARK(0), 2632 BARRIER(1)); 2633 2634 /* 11 */ 2635 shader[i++] = CF_DWORD0(ADDR(0), 2636 JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); 2637 shader[i++] = CF_DWORD1(POP_COUNT(0), 2638 CF_CONST(0), 2639 COND(SQ_CF_COND_ACTIVE), 2640 I_COUNT(0), 2641 VALID_PIXEL_MODE(0), 2642 END_OF_PROGRAM(0), 2643 CF_INST(SQ_CF_INST_RETURN), 2644 WHOLE_QUAD_MODE(0), 2645 BARRIER(1)); 2646 2647 /* 12 interpolate src tex coords - mask */ 2648 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2649 SRC0_REL(ABSOLUTE), 2650 SRC0_ELEM(ELEM_Y), 2651 SRC0_NEG(0), 2652 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2653 SRC1_REL(ABSOLUTE), 2654 SRC1_ELEM(ELEM_X), 2655 SRC1_NEG(0), 2656 INDEX_MODE(SQ_INDEX_AR_X), 2657 PRED_SEL(SQ_PRED_SEL_OFF), 2658 LAST(0)); 2659 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2660 SRC1_ABS(0), 2661 UPDATE_EXECUTE_MASK(0), 2662 UPDATE_PRED(0), 2663 WRITE_MASK(1), 2664 OMOD(SQ_ALU_OMOD_OFF), 2665 ALU_INST(SQ_OP2_INST_INTERP_XY), 2666 BANK_SWIZZLE(SQ_ALU_VEC_210), 2667 DST_GPR(1), 2668 DST_REL(ABSOLUTE), 2669 DST_ELEM(ELEM_X), 2670 CLAMP(0)); 2671 /* 13 */ 2672 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2673 SRC0_REL(ABSOLUTE), 2674 SRC0_ELEM(ELEM_X), 2675 SRC0_NEG(0), 2676 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2677 SRC1_REL(ABSOLUTE), 2678 SRC1_ELEM(ELEM_X), 2679 SRC1_NEG(0), 2680 INDEX_MODE(SQ_INDEX_AR_X), 2681 PRED_SEL(SQ_PRED_SEL_OFF), 2682 LAST(0)); 2683 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2684 SRC1_ABS(0), 2685 UPDATE_EXECUTE_MASK(0), 2686 UPDATE_PRED(0), 2687 WRITE_MASK(1), 2688 OMOD(SQ_ALU_OMOD_OFF), 2689 ALU_INST(SQ_OP2_INST_INTERP_XY), 2690 BANK_SWIZZLE(SQ_ALU_VEC_210), 2691 DST_GPR(1), 2692 DST_REL(ABSOLUTE), 2693 DST_ELEM(ELEM_Y), 2694 CLAMP(0)); 2695 /* 14 */ 2696 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2697 SRC0_REL(ABSOLUTE), 2698 SRC0_ELEM(ELEM_Y), 2699 SRC0_NEG(0), 2700 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2701 SRC1_REL(ABSOLUTE), 2702 SRC1_ELEM(ELEM_X), 2703 SRC1_NEG(0), 2704 INDEX_MODE(SQ_INDEX_AR_X), 2705 PRED_SEL(SQ_PRED_SEL_OFF), 2706 LAST(0)); 2707 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2708 SRC1_ABS(0), 2709 UPDATE_EXECUTE_MASK(0), 2710 UPDATE_PRED(0), 2711 WRITE_MASK(0), 2712 OMOD(SQ_ALU_OMOD_OFF), 2713 ALU_INST(SQ_OP2_INST_INTERP_XY), 2714 BANK_SWIZZLE(SQ_ALU_VEC_210), 2715 DST_GPR(1), 2716 DST_REL(ABSOLUTE), 2717 DST_ELEM(ELEM_Z), 2718 CLAMP(0)); 2719 /* 15 */ 2720 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2721 SRC0_REL(ABSOLUTE), 2722 SRC0_ELEM(ELEM_X), 2723 SRC0_NEG(0), 2724 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2725 SRC1_REL(ABSOLUTE), 2726 SRC1_ELEM(ELEM_X), 2727 SRC1_NEG(0), 2728 INDEX_MODE(SQ_INDEX_AR_X), 2729 PRED_SEL(SQ_PRED_SEL_OFF), 2730 LAST(1)); 2731 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2732 SRC1_ABS(0), 2733 UPDATE_EXECUTE_MASK(0), 2734 UPDATE_PRED(0), 2735 WRITE_MASK(0), 2736 OMOD(SQ_ALU_OMOD_OFF), 2737 ALU_INST(SQ_OP2_INST_INTERP_XY), 2738 BANK_SWIZZLE(SQ_ALU_VEC_210), 2739 DST_GPR(1), 2740 DST_REL(ABSOLUTE), 2741 DST_ELEM(ELEM_W), 2742 CLAMP(0)); 2743 2744 /* 16 interpolate mask tex coords */ 2745 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2746 SRC0_REL(ABSOLUTE), 2747 SRC0_ELEM(ELEM_Y), 2748 SRC0_NEG(0), 2749 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2750 SRC1_REL(ABSOLUTE), 2751 SRC1_ELEM(ELEM_X), 2752 SRC1_NEG(0), 2753 INDEX_MODE(SQ_INDEX_AR_X), 2754 PRED_SEL(SQ_PRED_SEL_OFF), 2755 LAST(0)); 2756 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2757 SRC1_ABS(0), 2758 UPDATE_EXECUTE_MASK(0), 2759 UPDATE_PRED(0), 2760 WRITE_MASK(1), 2761 OMOD(SQ_ALU_OMOD_OFF), 2762 ALU_INST(SQ_OP2_INST_INTERP_XY), 2763 BANK_SWIZZLE(SQ_ALU_VEC_210), 2764 DST_GPR(0), 2765 DST_REL(ABSOLUTE), 2766 DST_ELEM(ELEM_X), 2767 CLAMP(0)); 2768 /* 17 */ 2769 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2770 SRC0_REL(ABSOLUTE), 2771 SRC0_ELEM(ELEM_X), 2772 SRC0_NEG(0), 2773 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2774 SRC1_REL(ABSOLUTE), 2775 SRC1_ELEM(ELEM_X), 2776 SRC1_NEG(0), 2777 INDEX_MODE(SQ_INDEX_AR_X), 2778 PRED_SEL(SQ_PRED_SEL_OFF), 2779 LAST(0)); 2780 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2781 SRC1_ABS(0), 2782 UPDATE_EXECUTE_MASK(0), 2783 UPDATE_PRED(0), 2784 WRITE_MASK(1), 2785 OMOD(SQ_ALU_OMOD_OFF), 2786 ALU_INST(SQ_OP2_INST_INTERP_XY), 2787 BANK_SWIZZLE(SQ_ALU_VEC_210), 2788 DST_GPR(0), 2789 DST_REL(ABSOLUTE), 2790 DST_ELEM(ELEM_Y), 2791 CLAMP(0)); 2792 /* 18 */ 2793 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2794 SRC0_REL(ABSOLUTE), 2795 SRC0_ELEM(ELEM_Y), 2796 SRC0_NEG(0), 2797 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2798 SRC1_REL(ABSOLUTE), 2799 SRC1_ELEM(ELEM_X), 2800 SRC1_NEG(0), 2801 INDEX_MODE(SQ_INDEX_AR_X), 2802 PRED_SEL(SQ_PRED_SEL_OFF), 2803 LAST(0)); 2804 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2805 SRC1_ABS(0), 2806 UPDATE_EXECUTE_MASK(0), 2807 UPDATE_PRED(0), 2808 WRITE_MASK(0), 2809 OMOD(SQ_ALU_OMOD_OFF), 2810 ALU_INST(SQ_OP2_INST_INTERP_XY), 2811 BANK_SWIZZLE(SQ_ALU_VEC_210), 2812 DST_GPR(0), 2813 DST_REL(ABSOLUTE), 2814 DST_ELEM(ELEM_Z), 2815 CLAMP(0)); 2816 /* 19 */ 2817 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2818 SRC0_REL(ABSOLUTE), 2819 SRC0_ELEM(ELEM_X), 2820 SRC0_NEG(0), 2821 SRC1_SEL(ALU_SRC_PARAM_BASE + 1), 2822 SRC1_REL(ABSOLUTE), 2823 SRC1_ELEM(ELEM_X), 2824 SRC1_NEG(0), 2825 INDEX_MODE(SQ_INDEX_AR_X), 2826 PRED_SEL(SQ_PRED_SEL_OFF), 2827 LAST(1)); 2828 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2829 SRC1_ABS(0), 2830 UPDATE_EXECUTE_MASK(0), 2831 UPDATE_PRED(0), 2832 WRITE_MASK(0), 2833 OMOD(SQ_ALU_OMOD_OFF), 2834 ALU_INST(SQ_OP2_INST_INTERP_XY), 2835 BANK_SWIZZLE(SQ_ALU_VEC_210), 2836 DST_GPR(0), 2837 DST_REL(ABSOLUTE), 2838 DST_ELEM(ELEM_W), 2839 CLAMP(0)); 2840 2841 /* 20 - alu 0 */ 2842 /* MUL gpr[2].x gpr[0].x gpr[1].x */ 2843 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2844 SRC0_REL(ABSOLUTE), 2845 SRC0_ELEM(ELEM_X), 2846 SRC0_NEG(0), 2847 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2848 SRC1_REL(ABSOLUTE), 2849 SRC1_ELEM(ELEM_X), 2850 SRC1_NEG(0), 2851 INDEX_MODE(SQ_INDEX_LOOP), 2852 PRED_SEL(SQ_PRED_SEL_OFF), 2853 LAST(0)); 2854 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2855 SRC1_ABS(0), 2856 UPDATE_EXECUTE_MASK(0), 2857 UPDATE_PRED(0), 2858 WRITE_MASK(1), 2859 OMOD(SQ_ALU_OMOD_OFF), 2860 ALU_INST(SQ_OP2_INST_MUL), 2861 BANK_SWIZZLE(SQ_ALU_VEC_012), 2862 DST_GPR(2), 2863 DST_REL(ABSOLUTE), 2864 DST_ELEM(ELEM_X), 2865 CLAMP(1)); 2866 /* 21 - alu 1 */ 2867 /* MUL gpr[2].y gpr[0].y gpr[1].y */ 2868 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2869 SRC0_REL(ABSOLUTE), 2870 SRC0_ELEM(ELEM_Y), 2871 SRC0_NEG(0), 2872 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2873 SRC1_REL(ABSOLUTE), 2874 SRC1_ELEM(ELEM_Y), 2875 SRC1_NEG(0), 2876 INDEX_MODE(SQ_INDEX_LOOP), 2877 PRED_SEL(SQ_PRED_SEL_OFF), 2878 LAST(0)); 2879 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2880 SRC1_ABS(0), 2881 UPDATE_EXECUTE_MASK(0), 2882 UPDATE_PRED(0), 2883 WRITE_MASK(1), 2884 OMOD(SQ_ALU_OMOD_OFF), 2885 ALU_INST(SQ_OP2_INST_MUL), 2886 BANK_SWIZZLE(SQ_ALU_VEC_012), 2887 DST_GPR(2), 2888 DST_REL(ABSOLUTE), 2889 DST_ELEM(ELEM_Y), 2890 CLAMP(1)); 2891 /* 22 - alu 2 */ 2892 /* MUL gpr[2].z gpr[0].z gpr[1].z */ 2893 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2894 SRC0_REL(ABSOLUTE), 2895 SRC0_ELEM(ELEM_Z), 2896 SRC0_NEG(0), 2897 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2898 SRC1_REL(ABSOLUTE), 2899 SRC1_ELEM(ELEM_Z), 2900 SRC1_NEG(0), 2901 INDEX_MODE(SQ_INDEX_LOOP), 2902 PRED_SEL(SQ_PRED_SEL_OFF), 2903 LAST(0)); 2904 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2905 SRC1_ABS(0), 2906 UPDATE_EXECUTE_MASK(0), 2907 UPDATE_PRED(0), 2908 WRITE_MASK(1), 2909 OMOD(SQ_ALU_OMOD_OFF), 2910 ALU_INST(SQ_OP2_INST_MUL), 2911 BANK_SWIZZLE(SQ_ALU_VEC_012), 2912 DST_GPR(2), 2913 DST_REL(ABSOLUTE), 2914 DST_ELEM(ELEM_Z), 2915 CLAMP(1)); 2916 /* 23 - alu 3 */ 2917 /* MUL gpr[2].w gpr[0].w gpr[1].w */ 2918 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2919 SRC0_REL(ABSOLUTE), 2920 SRC0_ELEM(ELEM_W), 2921 SRC0_NEG(0), 2922 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 2923 SRC1_REL(ABSOLUTE), 2924 SRC1_ELEM(ELEM_W), 2925 SRC1_NEG(0), 2926 INDEX_MODE(SQ_INDEX_LOOP), 2927 PRED_SEL(SQ_PRED_SEL_OFF), 2928 LAST(1)); 2929 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2930 SRC1_ABS(0), 2931 UPDATE_EXECUTE_MASK(0), 2932 UPDATE_PRED(0), 2933 WRITE_MASK(1), 2934 OMOD(SQ_ALU_OMOD_OFF), 2935 ALU_INST(SQ_OP2_INST_MUL), 2936 BANK_SWIZZLE(SQ_ALU_VEC_012), 2937 DST_GPR(2), 2938 DST_REL(ABSOLUTE), 2939 DST_ELEM(ELEM_W), 2940 CLAMP(1)); 2941 2942 /* 24 - interpolate tex coords - non-mask */ 2943 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2944 SRC0_REL(ABSOLUTE), 2945 SRC0_ELEM(ELEM_Y), 2946 SRC0_NEG(0), 2947 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2948 SRC1_REL(ABSOLUTE), 2949 SRC1_ELEM(ELEM_X), 2950 SRC1_NEG(0), 2951 INDEX_MODE(SQ_INDEX_AR_X), 2952 PRED_SEL(SQ_PRED_SEL_OFF), 2953 LAST(0)); 2954 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2955 SRC1_ABS(0), 2956 UPDATE_EXECUTE_MASK(0), 2957 UPDATE_PRED(0), 2958 WRITE_MASK(1), 2959 OMOD(SQ_ALU_OMOD_OFF), 2960 ALU_INST(SQ_OP2_INST_INTERP_XY), 2961 BANK_SWIZZLE(SQ_ALU_VEC_210), 2962 DST_GPR(0), 2963 DST_REL(ABSOLUTE), 2964 DST_ELEM(ELEM_X), 2965 CLAMP(0)); 2966 /* 25 */ 2967 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2968 SRC0_REL(ABSOLUTE), 2969 SRC0_ELEM(ELEM_X), 2970 SRC0_NEG(0), 2971 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2972 SRC1_REL(ABSOLUTE), 2973 SRC1_ELEM(ELEM_X), 2974 SRC1_NEG(0), 2975 INDEX_MODE(SQ_INDEX_AR_X), 2976 PRED_SEL(SQ_PRED_SEL_OFF), 2977 LAST(0)); 2978 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 2979 SRC1_ABS(0), 2980 UPDATE_EXECUTE_MASK(0), 2981 UPDATE_PRED(0), 2982 WRITE_MASK(1), 2983 OMOD(SQ_ALU_OMOD_OFF), 2984 ALU_INST(SQ_OP2_INST_INTERP_XY), 2985 BANK_SWIZZLE(SQ_ALU_VEC_210), 2986 DST_GPR(0), 2987 DST_REL(ABSOLUTE), 2988 DST_ELEM(ELEM_Y), 2989 CLAMP(0)); 2990 /* 26 */ 2991 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2992 SRC0_REL(ABSOLUTE), 2993 SRC0_ELEM(ELEM_Y), 2994 SRC0_NEG(0), 2995 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 2996 SRC1_REL(ABSOLUTE), 2997 SRC1_ELEM(ELEM_X), 2998 SRC1_NEG(0), 2999 INDEX_MODE(SQ_INDEX_AR_X), 3000 PRED_SEL(SQ_PRED_SEL_OFF), 3001 LAST(0)); 3002 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3003 SRC1_ABS(0), 3004 UPDATE_EXECUTE_MASK(0), 3005 UPDATE_PRED(0), 3006 WRITE_MASK(0), 3007 OMOD(SQ_ALU_OMOD_OFF), 3008 ALU_INST(SQ_OP2_INST_INTERP_XY), 3009 BANK_SWIZZLE(SQ_ALU_VEC_210), 3010 DST_GPR(0), 3011 DST_REL(ABSOLUTE), 3012 DST_ELEM(ELEM_Z), 3013 CLAMP(0)); 3014 /* 27 */ 3015 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 3016 SRC0_REL(ABSOLUTE), 3017 SRC0_ELEM(ELEM_X), 3018 SRC0_NEG(0), 3019 SRC1_SEL(ALU_SRC_PARAM_BASE + 0), 3020 SRC1_REL(ABSOLUTE), 3021 SRC1_ELEM(ELEM_X), 3022 SRC1_NEG(0), 3023 INDEX_MODE(SQ_INDEX_AR_X), 3024 PRED_SEL(SQ_PRED_SEL_OFF), 3025 LAST(1)); 3026 shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), 3027 SRC1_ABS(0), 3028 UPDATE_EXECUTE_MASK(0), 3029 UPDATE_PRED(0), 3030 WRITE_MASK(0), 3031 OMOD(SQ_ALU_OMOD_OFF), 3032 ALU_INST(SQ_OP2_INST_INTERP_XY), 3033 BANK_SWIZZLE(SQ_ALU_VEC_210), 3034 DST_GPR(0), 3035 DST_REL(ABSOLUTE), 3036 DST_ELEM(ELEM_W), 3037 CLAMP(0)); 3038 3039 /* 28/29 - src - mask */ 3040 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3041 INST_MOD(0), 3042 FETCH_WHOLE_QUAD(0), 3043 RESOURCE_ID(0), 3044 SRC_GPR(1), 3045 SRC_REL(ABSOLUTE), 3046 ALT_CONST(0), 3047 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3048 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3049 shader[i++] = TEX_DWORD1(DST_GPR(1), 3050 DST_REL(ABSOLUTE), 3051 DST_SEL_X(SQ_SEL_X), 3052 DST_SEL_Y(SQ_SEL_Y), 3053 DST_SEL_Z(SQ_SEL_Z), 3054 DST_SEL_W(SQ_SEL_W), 3055 LOD_BIAS(0), 3056 COORD_TYPE_X(TEX_NORMALIZED), 3057 COORD_TYPE_Y(TEX_NORMALIZED), 3058 COORD_TYPE_Z(TEX_NORMALIZED), 3059 COORD_TYPE_W(TEX_NORMALIZED)); 3060 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3061 OFFSET_Y(0), 3062 OFFSET_Z(0), 3063 SAMPLER_ID(0), 3064 SRC_SEL_X(SQ_SEL_X), 3065 SRC_SEL_Y(SQ_SEL_Y), 3066 SRC_SEL_Z(SQ_SEL_0), 3067 SRC_SEL_W(SQ_SEL_1)); 3068 shader[i++] = TEX_DWORD_PAD; 3069 /* 30/31 - mask */ 3070 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3071 INST_MOD(0), 3072 FETCH_WHOLE_QUAD(0), 3073 RESOURCE_ID(1), 3074 SRC_GPR(0), 3075 SRC_REL(ABSOLUTE), 3076 ALT_CONST(0), 3077 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3078 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3079 shader[i++] = TEX_DWORD1(DST_GPR(0), 3080 DST_REL(ABSOLUTE), 3081 DST_SEL_X(SQ_SEL_X), 3082 DST_SEL_Y(SQ_SEL_Y), 3083 DST_SEL_Z(SQ_SEL_Z), 3084 DST_SEL_W(SQ_SEL_W), 3085 LOD_BIAS(0), 3086 COORD_TYPE_X(TEX_NORMALIZED), 3087 COORD_TYPE_Y(TEX_NORMALIZED), 3088 COORD_TYPE_Z(TEX_NORMALIZED), 3089 COORD_TYPE_W(TEX_NORMALIZED)); 3090 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3091 OFFSET_Y(0), 3092 OFFSET_Z(0), 3093 SAMPLER_ID(1), 3094 SRC_SEL_X(SQ_SEL_X), 3095 SRC_SEL_Y(SQ_SEL_Y), 3096 SRC_SEL_Z(SQ_SEL_0), 3097 SRC_SEL_W(SQ_SEL_1)); 3098 shader[i++] = TEX_DWORD_PAD; 3099 3100 /* 32/33 - src - non-mask */ 3101 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 3102 INST_MOD(0), 3103 FETCH_WHOLE_QUAD(0), 3104 RESOURCE_ID(0), 3105 SRC_GPR(0), 3106 SRC_REL(ABSOLUTE), 3107 ALT_CONST(0), 3108 RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), 3109 SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); 3110 shader[i++] = TEX_DWORD1(DST_GPR(0), 3111 DST_REL(ABSOLUTE), 3112 DST_SEL_X(SQ_SEL_X), 3113 DST_SEL_Y(SQ_SEL_Y), 3114 DST_SEL_Z(SQ_SEL_Z), 3115 DST_SEL_W(SQ_SEL_W), 3116 LOD_BIAS(0), 3117 COORD_TYPE_X(TEX_NORMALIZED), 3118 COORD_TYPE_Y(TEX_NORMALIZED), 3119 COORD_TYPE_Z(TEX_NORMALIZED), 3120 COORD_TYPE_W(TEX_NORMALIZED)); 3121 shader[i++] = TEX_DWORD2(OFFSET_X(0), 3122 OFFSET_Y(0), 3123 OFFSET_Z(0), 3124 SAMPLER_ID(0), 3125 SRC_SEL_X(SQ_SEL_X), 3126 SRC_SEL_Y(SQ_SEL_Y), 3127 SRC_SEL_Z(SQ_SEL_0), 3128 SRC_SEL_W(SQ_SEL_1)); 3129 shader[i++] = TEX_DWORD_PAD; 3130 3131 return i; 3132} 3133 3134#endif 3135