1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#include "xf86.h" 32 33#include "radeon.h" 34#include "r600_shader.h" 35#include "r600_reg.h" 36 37/* solid vs --------------------------------------- */ 38int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) 39{ 40 int i = 0; 41 42 /* 0 */ 43 shader[i++] = CF_DWORD0(ADDR(4)); 44 shader[i++] = CF_DWORD1(POP_COUNT(0), 45 CF_CONST(0), 46 COND(SQ_CF_COND_ACTIVE), 47 I_COUNT(1), 48 CALL_COUNT(0), 49 END_OF_PROGRAM(0), 50 VALID_PIXEL_MODE(0), 51 CF_INST(SQ_CF_INST_VTX), 52 WHOLE_QUAD_MODE(0), 53 BARRIER(1)); 54 /* 1 */ 55 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 56 TYPE(SQ_EXPORT_POS), 57 RW_GPR(1), 58 RW_REL(ABSOLUTE), 59 INDEX_GPR(0), 60 ELEM_SIZE(0)); 61 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 62 SRC_SEL_Y(SQ_SEL_Y), 63 SRC_SEL_Z(SQ_SEL_Z), 64 SRC_SEL_W(SQ_SEL_W), 65 R6xx_ELEM_LOOP(0), 66 BURST_COUNT(1), 67 END_OF_PROGRAM(0), 68 VALID_PIXEL_MODE(0), 69 CF_INST(SQ_CF_INST_EXPORT_DONE), 70 WHOLE_QUAD_MODE(0), 71 BARRIER(1)); 72 /* 2 - always export a param whether it's used or not */ 73 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 74 TYPE(SQ_EXPORT_PARAM), 75 RW_GPR(0), 76 RW_REL(ABSOLUTE), 77 INDEX_GPR(0), 78 ELEM_SIZE(0)); 79 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 80 SRC_SEL_Y(SQ_SEL_Y), 81 SRC_SEL_Z(SQ_SEL_Z), 82 SRC_SEL_W(SQ_SEL_W), 83 R6xx_ELEM_LOOP(0), 84 BURST_COUNT(0), 85 END_OF_PROGRAM(1), 86 VALID_PIXEL_MODE(0), 87 CF_INST(SQ_CF_INST_EXPORT_DONE), 88 WHOLE_QUAD_MODE(0), 89 BARRIER(0)); 90 /* 3 - padding */ 91 shader[i++] = 0x00000000; 92 shader[i++] = 0x00000000; 93 /* 4/5 */ 94 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 95 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 96 FETCH_WHOLE_QUAD(0), 97 BUFFER_ID(0), 98 SRC_GPR(0), 99 SRC_REL(ABSOLUTE), 100 SRC_SEL_X(SQ_SEL_X), 101 MEGA_FETCH_COUNT(8)); 102 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 103 DST_REL(0), 104 DST_SEL_X(SQ_SEL_X), 105 DST_SEL_Y(SQ_SEL_Y), 106 DST_SEL_Z(SQ_SEL_0), 107 DST_SEL_W(SQ_SEL_1), 108 USE_CONST_FIELDS(0), 109 DATA_FORMAT(FMT_32_32_FLOAT), 110 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 111 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 112 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 113 shader[i++] = VTX_DWORD2(OFFSET(0), 114#if X_BYTE_ORDER == X_BIG_ENDIAN 115 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 116#else 117 ENDIAN_SWAP(SQ_ENDIAN_NONE), 118#endif 119 CONST_BUF_NO_STRIDE(0), 120 MEGA_FETCH(1)); 121 shader[i++] = VTX_DWORD_PAD; 122 123 return i; 124} 125 126/* solid ps --------------------------------------- */ 127int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) 128{ 129 int i = 0; 130 131 /* 0 */ 132 shader[i++] = CF_ALU_DWORD0(ADDR(2), 133 KCACHE_BANK0(0), 134 KCACHE_BANK1(0), 135 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 136 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 137 KCACHE_ADDR0(0), 138 KCACHE_ADDR1(0), 139 I_COUNT(4), 140 USES_WATERFALL(0), 141 CF_INST(SQ_CF_INST_ALU), 142 WHOLE_QUAD_MODE(0), 143 BARRIER(1)); 144 /* 1 */ 145 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 146 TYPE(SQ_EXPORT_PIXEL), 147 RW_GPR(0), 148 RW_REL(ABSOLUTE), 149 INDEX_GPR(0), 150 ELEM_SIZE(1)); 151 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 152 SRC_SEL_Y(SQ_SEL_Y), 153 SRC_SEL_Z(SQ_SEL_Z), 154 SRC_SEL_W(SQ_SEL_W), 155 R6xx_ELEM_LOOP(0), 156 BURST_COUNT(1), 157 END_OF_PROGRAM(1), 158 VALID_PIXEL_MODE(0), 159 CF_INST(SQ_CF_INST_EXPORT_DONE), 160 WHOLE_QUAD_MODE(0), 161 BARRIER(1)); 162 163 /* 2 */ 164 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 165 SRC0_REL(ABSOLUTE), 166 SRC0_ELEM(ELEM_X), 167 SRC0_NEG(0), 168 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 169 SRC1_REL(ABSOLUTE), 170 SRC1_ELEM(ELEM_X), 171 SRC1_NEG(0), 172 INDEX_MODE(SQ_INDEX_AR_X), 173 PRED_SEL(SQ_PRED_SEL_OFF), 174 LAST(0)); 175 shader[i++] = ALU_DWORD1_OP2(ChipSet, 176 SRC0_ABS(0), 177 SRC1_ABS(0), 178 UPDATE_EXECUTE_MASK(0), 179 UPDATE_PRED(0), 180 WRITE_MASK(1), 181 FOG_MERGE(0), 182 OMOD(SQ_ALU_OMOD_OFF), 183 ALU_INST(SQ_OP2_INST_MOV), 184 BANK_SWIZZLE(SQ_ALU_VEC_012), 185 DST_GPR(0), 186 DST_REL(ABSOLUTE), 187 DST_ELEM(ELEM_X), 188 CLAMP(1)); 189 /* 3 */ 190 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 191 SRC0_REL(ABSOLUTE), 192 SRC0_ELEM(ELEM_Y), 193 SRC0_NEG(0), 194 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 195 SRC1_REL(ABSOLUTE), 196 SRC1_ELEM(ELEM_Y), 197 SRC1_NEG(0), 198 INDEX_MODE(SQ_INDEX_AR_X), 199 PRED_SEL(SQ_PRED_SEL_OFF), 200 LAST(0)); 201 shader[i++] = ALU_DWORD1_OP2(ChipSet, 202 SRC0_ABS(0), 203 SRC1_ABS(0), 204 UPDATE_EXECUTE_MASK(0), 205 UPDATE_PRED(0), 206 WRITE_MASK(1), 207 FOG_MERGE(0), 208 OMOD(SQ_ALU_OMOD_OFF), 209 ALU_INST(SQ_OP2_INST_MOV), 210 BANK_SWIZZLE(SQ_ALU_VEC_012), 211 DST_GPR(0), 212 DST_REL(ABSOLUTE), 213 DST_ELEM(ELEM_Y), 214 CLAMP(1)); 215 /* 4 */ 216 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 217 SRC0_REL(ABSOLUTE), 218 SRC0_ELEM(ELEM_Z), 219 SRC0_NEG(0), 220 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 221 SRC1_REL(ABSOLUTE), 222 SRC1_ELEM(ELEM_Z), 223 SRC1_NEG(0), 224 INDEX_MODE(SQ_INDEX_AR_X), 225 PRED_SEL(SQ_PRED_SEL_OFF), 226 LAST(0)); 227 shader[i++] = ALU_DWORD1_OP2(ChipSet, 228 SRC0_ABS(0), 229 SRC1_ABS(0), 230 UPDATE_EXECUTE_MASK(0), 231 UPDATE_PRED(0), 232 WRITE_MASK(1), 233 FOG_MERGE(0), 234 OMOD(SQ_ALU_OMOD_OFF), 235 ALU_INST(SQ_OP2_INST_MOV), 236 BANK_SWIZZLE(SQ_ALU_VEC_012), 237 DST_GPR(0), 238 DST_REL(ABSOLUTE), 239 DST_ELEM(ELEM_Z), 240 CLAMP(1)); 241 /* 5 */ 242 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 243 SRC0_REL(ABSOLUTE), 244 SRC0_ELEM(ELEM_W), 245 SRC0_NEG(0), 246 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 247 SRC1_REL(ABSOLUTE), 248 SRC1_ELEM(ELEM_W), 249 SRC1_NEG(0), 250 INDEX_MODE(SQ_INDEX_AR_X), 251 PRED_SEL(SQ_PRED_SEL_OFF), 252 LAST(1)); 253 shader[i++] = ALU_DWORD1_OP2(ChipSet, 254 SRC0_ABS(0), 255 SRC1_ABS(0), 256 UPDATE_EXECUTE_MASK(0), 257 UPDATE_PRED(0), 258 WRITE_MASK(1), 259 FOG_MERGE(0), 260 OMOD(SQ_ALU_OMOD_OFF), 261 ALU_INST(SQ_OP2_INST_MOV), 262 BANK_SWIZZLE(SQ_ALU_VEC_012), 263 DST_GPR(0), 264 DST_REL(ABSOLUTE), 265 DST_ELEM(ELEM_W), 266 CLAMP(1)); 267 268 return i; 269} 270 271/* copy vs --------------------------------------- */ 272int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) 273{ 274 int i = 0; 275 276 /* 0 */ 277 shader[i++] = CF_DWORD0(ADDR(4)); 278 shader[i++] = CF_DWORD1(POP_COUNT(0), 279 CF_CONST(0), 280 COND(SQ_CF_COND_ACTIVE), 281 I_COUNT(2), 282 CALL_COUNT(0), 283 END_OF_PROGRAM(0), 284 VALID_PIXEL_MODE(0), 285 CF_INST(SQ_CF_INST_VTX), 286 WHOLE_QUAD_MODE(0), 287 BARRIER(1)); 288 /* 1 */ 289 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 290 TYPE(SQ_EXPORT_POS), 291 RW_GPR(1), 292 RW_REL(ABSOLUTE), 293 INDEX_GPR(0), 294 ELEM_SIZE(0)); 295 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 296 SRC_SEL_Y(SQ_SEL_Y), 297 SRC_SEL_Z(SQ_SEL_Z), 298 SRC_SEL_W(SQ_SEL_W), 299 R6xx_ELEM_LOOP(0), 300 BURST_COUNT(0), 301 END_OF_PROGRAM(0), 302 VALID_PIXEL_MODE(0), 303 CF_INST(SQ_CF_INST_EXPORT_DONE), 304 WHOLE_QUAD_MODE(0), 305 BARRIER(1)); 306 /* 2 */ 307 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 308 TYPE(SQ_EXPORT_PARAM), 309 RW_GPR(0), 310 RW_REL(ABSOLUTE), 311 INDEX_GPR(0), 312 ELEM_SIZE(0)); 313 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 314 SRC_SEL_Y(SQ_SEL_Y), 315 SRC_SEL_Z(SQ_SEL_Z), 316 SRC_SEL_W(SQ_SEL_W), 317 R6xx_ELEM_LOOP(0), 318 BURST_COUNT(0), 319 END_OF_PROGRAM(1), 320 VALID_PIXEL_MODE(0), 321 CF_INST(SQ_CF_INST_EXPORT_DONE), 322 WHOLE_QUAD_MODE(0), 323 BARRIER(0)); 324 /* 3 */ 325 shader[i++] = 0x00000000; 326 shader[i++] = 0x00000000; 327 /* 4/5 */ 328 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 329 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 330 FETCH_WHOLE_QUAD(0), 331 BUFFER_ID(0), 332 SRC_GPR(0), 333 SRC_REL(ABSOLUTE), 334 SRC_SEL_X(SQ_SEL_X), 335 MEGA_FETCH_COUNT(16)); 336 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 337 DST_REL(0), 338 DST_SEL_X(SQ_SEL_X), 339 DST_SEL_Y(SQ_SEL_Y), 340 DST_SEL_Z(SQ_SEL_0), 341 DST_SEL_W(SQ_SEL_1), 342 USE_CONST_FIELDS(0), 343 DATA_FORMAT(FMT_32_32_FLOAT), 344 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 345 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 346 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 347 shader[i++] = VTX_DWORD2(OFFSET(0), 348#if X_BYTE_ORDER == X_BIG_ENDIAN 349 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 350#else 351 ENDIAN_SWAP(SQ_ENDIAN_NONE), 352#endif 353 CONST_BUF_NO_STRIDE(0), 354 MEGA_FETCH(1)); 355 shader[i++] = VTX_DWORD_PAD; 356 /* 6/7 */ 357 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 358 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 359 FETCH_WHOLE_QUAD(0), 360 BUFFER_ID(0), 361 SRC_GPR(0), 362 SRC_REL(ABSOLUTE), 363 SRC_SEL_X(SQ_SEL_X), 364 MEGA_FETCH_COUNT(8)); 365 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 366 DST_REL(0), 367 DST_SEL_X(SQ_SEL_X), 368 DST_SEL_Y(SQ_SEL_Y), 369 DST_SEL_Z(SQ_SEL_0), 370 DST_SEL_W(SQ_SEL_1), 371 USE_CONST_FIELDS(0), 372 DATA_FORMAT(FMT_32_32_FLOAT), 373 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 374 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 375 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 376 shader[i++] = VTX_DWORD2(OFFSET(8), 377#if X_BYTE_ORDER == X_BIG_ENDIAN 378 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 379#else 380 ENDIAN_SWAP(SQ_ENDIAN_NONE), 381#endif 382 CONST_BUF_NO_STRIDE(0), 383 MEGA_FETCH(0)); 384 shader[i++] = VTX_DWORD_PAD; 385 386 return i; 387} 388 389/* copy ps --------------------------------------- */ 390int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) 391{ 392 int i=0; 393 394 /* CF INST 0 */ 395 shader[i++] = CF_DWORD0(ADDR(2)); 396 shader[i++] = CF_DWORD1(POP_COUNT(0), 397 CF_CONST(0), 398 COND(SQ_CF_COND_ACTIVE), 399 I_COUNT(1), 400 CALL_COUNT(0), 401 END_OF_PROGRAM(0), 402 VALID_PIXEL_MODE(0), 403 CF_INST(SQ_CF_INST_TEX), 404 WHOLE_QUAD_MODE(0), 405 BARRIER(1)); 406 /* CF INST 1 */ 407 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 408 TYPE(SQ_EXPORT_PIXEL), 409 RW_GPR(0), 410 RW_REL(ABSOLUTE), 411 INDEX_GPR(0), 412 ELEM_SIZE(1)); 413 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 414 SRC_SEL_Y(SQ_SEL_Y), 415 SRC_SEL_Z(SQ_SEL_Z), 416 SRC_SEL_W(SQ_SEL_W), 417 R6xx_ELEM_LOOP(0), 418 BURST_COUNT(1), 419 END_OF_PROGRAM(1), 420 VALID_PIXEL_MODE(0), 421 CF_INST(SQ_CF_INST_EXPORT_DONE), 422 WHOLE_QUAD_MODE(0), 423 BARRIER(1)); 424 /* TEX INST 0 */ 425 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 426 BC_FRAC_MODE(0), 427 FETCH_WHOLE_QUAD(0), 428 RESOURCE_ID(0), 429 SRC_GPR(0), 430 SRC_REL(ABSOLUTE), 431 R7xx_ALT_CONST(0)); 432 shader[i++] = TEX_DWORD1(DST_GPR(0), 433 DST_REL(ABSOLUTE), 434 DST_SEL_X(SQ_SEL_X), /* R */ 435 DST_SEL_Y(SQ_SEL_Y), /* G */ 436 DST_SEL_Z(SQ_SEL_Z), /* B */ 437 DST_SEL_W(SQ_SEL_W), /* A */ 438 LOD_BIAS(0), 439 COORD_TYPE_X(TEX_UNNORMALIZED), 440 COORD_TYPE_Y(TEX_UNNORMALIZED), 441 COORD_TYPE_Z(TEX_UNNORMALIZED), 442 COORD_TYPE_W(TEX_UNNORMALIZED)); 443 shader[i++] = TEX_DWORD2(OFFSET_X(0), 444 OFFSET_Y(0), 445 OFFSET_Z(0), 446 SAMPLER_ID(0), 447 SRC_SEL_X(SQ_SEL_X), 448 SRC_SEL_Y(SQ_SEL_Y), 449 SRC_SEL_Z(SQ_SEL_0), 450 SRC_SEL_W(SQ_SEL_1)); 451 shader[i++] = TEX_DWORD_PAD; 452 453 return i; 454} 455 456/* 457 * ; xv vertex shader 458 * 00 VTX: ADDR(4) CNT(2) 459 * 0 VFETCH R1.xy01, R0.x, fc0 MEGA(16) FORMAT(32_32_FLOAT) 460 * FORMAT_COMP(SIGNED) 461 * 1 VFETCH R0.xy01, R0.x, fc0 MINI(8) OFFSET(8) FORMAT(32_32_FLOAT) 462 * FORMAT_COMP(SIGNED) 463 * 01 EXP_DONE: POS0, R1 464 * 02 EXP_DONE: PARAM0, R0 NO_BARRIER 465 * END_OF_PROGRAM 466 */ 467int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) 468{ 469 int i = 0; 470 471 /* 0 */ 472 shader[i++] = CF_DWORD0(ADDR(6)); 473 shader[i++] = CF_DWORD1(POP_COUNT(0), 474 CF_CONST(0), 475 COND(SQ_CF_COND_ACTIVE), 476 I_COUNT(2), 477 CALL_COUNT(0), 478 END_OF_PROGRAM(0), 479 VALID_PIXEL_MODE(0), 480 CF_INST(SQ_CF_INST_VTX), 481 WHOLE_QUAD_MODE(0), 482 BARRIER(1)); 483 484 /* 1 - ALU */ 485 shader[i++] = CF_ALU_DWORD0(ADDR(4), 486 KCACHE_BANK0(0), 487 KCACHE_BANK1(0), 488 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 489 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 490 KCACHE_ADDR0(0), 491 KCACHE_ADDR1(0), 492 I_COUNT(2), 493 USES_WATERFALL(0), 494 CF_INST(SQ_CF_INST_ALU), 495 WHOLE_QUAD_MODE(0), 496 BARRIER(1)); 497 498 /* 2 */ 499 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 500 TYPE(SQ_EXPORT_POS), 501 RW_GPR(1), 502 RW_REL(ABSOLUTE), 503 INDEX_GPR(0), 504 ELEM_SIZE(3)); 505 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 506 SRC_SEL_Y(SQ_SEL_Y), 507 SRC_SEL_Z(SQ_SEL_Z), 508 SRC_SEL_W(SQ_SEL_W), 509 R6xx_ELEM_LOOP(0), 510 BURST_COUNT(1), 511 END_OF_PROGRAM(0), 512 VALID_PIXEL_MODE(0), 513 CF_INST(SQ_CF_INST_EXPORT_DONE), 514 WHOLE_QUAD_MODE(0), 515 BARRIER(1)); 516 /* 3 */ 517 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 518 TYPE(SQ_EXPORT_PARAM), 519 RW_GPR(0), 520 RW_REL(ABSOLUTE), 521 INDEX_GPR(0), 522 ELEM_SIZE(3)); 523 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 524 SRC_SEL_Y(SQ_SEL_Y), 525 SRC_SEL_Z(SQ_SEL_Z), 526 SRC_SEL_W(SQ_SEL_W), 527 R6xx_ELEM_LOOP(0), 528 BURST_COUNT(1), 529 END_OF_PROGRAM(1), 530 VALID_PIXEL_MODE(0), 531 CF_INST(SQ_CF_INST_EXPORT_DONE), 532 WHOLE_QUAD_MODE(0), 533 BARRIER(0)); 534 535 536 /* 4 texX / w */ 537 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 538 SRC0_REL(ABSOLUTE), 539 SRC0_ELEM(ELEM_X), 540 SRC0_NEG(0), 541 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 542 SRC1_REL(ABSOLUTE), 543 SRC1_ELEM(ELEM_X), 544 SRC1_NEG(0), 545 INDEX_MODE(SQ_INDEX_AR_X), 546 PRED_SEL(SQ_PRED_SEL_OFF), 547 LAST(0)); 548 shader[i++] = ALU_DWORD1_OP2(ChipSet, 549 SRC0_ABS(0), 550 SRC1_ABS(0), 551 UPDATE_EXECUTE_MASK(0), 552 UPDATE_PRED(0), 553 WRITE_MASK(1), 554 FOG_MERGE(0), 555 OMOD(SQ_ALU_OMOD_OFF), 556 ALU_INST(SQ_OP2_INST_MUL), 557 BANK_SWIZZLE(SQ_ALU_VEC_012), 558 DST_GPR(0), 559 DST_REL(ABSOLUTE), 560 DST_ELEM(ELEM_X), 561 CLAMP(0)); 562 563 /* 5 texY / h */ 564 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 565 SRC0_REL(ABSOLUTE), 566 SRC0_ELEM(ELEM_Y), 567 SRC0_NEG(0), 568 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 569 SRC1_REL(ABSOLUTE), 570 SRC1_ELEM(ELEM_Y), 571 SRC1_NEG(0), 572 INDEX_MODE(SQ_INDEX_AR_X), 573 PRED_SEL(SQ_PRED_SEL_OFF), 574 LAST(1)); 575 shader[i++] = ALU_DWORD1_OP2(ChipSet, 576 SRC0_ABS(0), 577 SRC1_ABS(0), 578 UPDATE_EXECUTE_MASK(0), 579 UPDATE_PRED(0), 580 WRITE_MASK(1), 581 FOG_MERGE(0), 582 OMOD(SQ_ALU_OMOD_OFF), 583 ALU_INST(SQ_OP2_INST_MUL), 584 BANK_SWIZZLE(SQ_ALU_VEC_012), 585 DST_GPR(0), 586 DST_REL(ABSOLUTE), 587 DST_ELEM(ELEM_Y), 588 CLAMP(0)); 589 590 /* 6/7 */ 591 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 592 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 593 FETCH_WHOLE_QUAD(0), 594 BUFFER_ID(0), 595 SRC_GPR(0), 596 SRC_REL(ABSOLUTE), 597 SRC_SEL_X(SQ_SEL_X), 598 MEGA_FETCH_COUNT(16)); 599 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 600 DST_REL(ABSOLUTE), 601 DST_SEL_X(SQ_SEL_X), 602 DST_SEL_Y(SQ_SEL_Y), 603 DST_SEL_Z(SQ_SEL_0), 604 DST_SEL_W(SQ_SEL_1), 605 USE_CONST_FIELDS(0), 606 DATA_FORMAT(FMT_32_32_FLOAT), 607 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 608 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 609 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 610 shader[i++] = VTX_DWORD2(OFFSET(0), 611#if X_BYTE_ORDER == X_BIG_ENDIAN 612 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 613#else 614 ENDIAN_SWAP(SQ_ENDIAN_NONE), 615#endif 616 CONST_BUF_NO_STRIDE(0), 617 MEGA_FETCH(1)); 618 shader[i++] = VTX_DWORD_PAD; 619 /* 8/9 */ 620 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 621 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 622 FETCH_WHOLE_QUAD(0), 623 BUFFER_ID(0), 624 SRC_GPR(0), 625 SRC_REL(ABSOLUTE), 626 SRC_SEL_X(SQ_SEL_X), 627 MEGA_FETCH_COUNT(8)); 628 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 629 DST_REL(ABSOLUTE), 630 DST_SEL_X(SQ_SEL_X), 631 DST_SEL_Y(SQ_SEL_Y), 632 DST_SEL_Z(SQ_SEL_0), 633 DST_SEL_W(SQ_SEL_1), 634 USE_CONST_FIELDS(0), 635 DATA_FORMAT(FMT_32_32_FLOAT), 636 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 637 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 638 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 639 shader[i++] = VTX_DWORD2(OFFSET(8), 640#if X_BYTE_ORDER == X_BIG_ENDIAN 641 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 642#else 643 ENDIAN_SWAP(SQ_ENDIAN_NONE), 644#endif 645 CONST_BUF_NO_STRIDE(0), 646 MEGA_FETCH(0)); 647 shader[i++] = VTX_DWORD_PAD; 648 649 return i; 650} 651 652int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) 653{ 654 int i = 0; 655 656 /* 0 */ 657 shader[i++] = CF_DWORD0(ADDR(16)); 658 shader[i++] = CF_DWORD1(POP_COUNT(0), 659 CF_CONST(0), 660 COND(SQ_CF_COND_BOOL), 661 I_COUNT(0), 662 CALL_COUNT(0), 663 END_OF_PROGRAM(0), 664 VALID_PIXEL_MODE(0), 665 CF_INST(SQ_CF_INST_CALL), 666 WHOLE_QUAD_MODE(0), 667 BARRIER(0)); 668 /* 1 */ 669 shader[i++] = CF_DWORD0(ADDR(24)); 670 shader[i++] = CF_DWORD1(POP_COUNT(0), 671 CF_CONST(0), 672 COND(SQ_CF_COND_NOT_BOOL), 673 I_COUNT(0), 674 CALL_COUNT(0), 675 END_OF_PROGRAM(0), 676 VALID_PIXEL_MODE(0), 677 CF_INST(SQ_CF_INST_CALL), 678 WHOLE_QUAD_MODE(0), 679 BARRIER(0)); 680 /* 2 */ 681 shader[i++] = CF_ALU_DWORD0(ADDR(4), 682 KCACHE_BANK0(0), 683 KCACHE_BANK1(0), 684 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 685 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 686 KCACHE_ADDR0(0), 687 KCACHE_ADDR1(0), 688 I_COUNT(12), 689 USES_WATERFALL(0), 690 CF_INST(SQ_CF_INST_ALU), 691 WHOLE_QUAD_MODE(0), 692 BARRIER(1)); 693 /* 3 */ 694 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 695 TYPE(SQ_EXPORT_PIXEL), 696 RW_GPR(2), 697 RW_REL(ABSOLUTE), 698 INDEX_GPR(0), 699 ELEM_SIZE(3)); 700 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 701 SRC_SEL_Y(SQ_SEL_Y), 702 SRC_SEL_Z(SQ_SEL_Z), 703 SRC_SEL_W(SQ_SEL_W), 704 R6xx_ELEM_LOOP(0), 705 BURST_COUNT(1), 706 END_OF_PROGRAM(1), 707 VALID_PIXEL_MODE(0), 708 CF_INST(SQ_CF_INST_EXPORT_DONE), 709 WHOLE_QUAD_MODE(0), 710 BARRIER(1)); 711 /* 4,5,6,7 */ 712 /* r2.x = MAD(c0.w, r1.x, c0.x) */ 713 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 714 SRC0_REL(ABSOLUTE), 715 SRC0_ELEM(ELEM_W), 716 SRC0_NEG(0), 717 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 718 SRC1_REL(ABSOLUTE), 719 SRC1_ELEM(ELEM_X), 720 SRC1_NEG(0), 721 INDEX_MODE(SQ_INDEX_LOOP), 722 PRED_SEL(SQ_PRED_SEL_OFF), 723 LAST(0)); 724 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0), 725 SRC2_REL(ABSOLUTE), 726 SRC2_ELEM(ELEM_X), 727 SRC2_NEG(0), 728 ALU_INST(SQ_OP3_INST_MULADD), 729 BANK_SWIZZLE(SQ_ALU_VEC_012), 730 DST_GPR(2), 731 DST_REL(ABSOLUTE), 732 DST_ELEM(ELEM_X), 733 CLAMP(0)); 734 /* r2.y = MAD(c0.w, r1.x, c0.y) */ 735 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 736 SRC0_REL(ABSOLUTE), 737 SRC0_ELEM(ELEM_W), 738 SRC0_NEG(0), 739 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 740 SRC1_REL(ABSOLUTE), 741 SRC1_ELEM(ELEM_X), 742 SRC1_NEG(0), 743 INDEX_MODE(SQ_INDEX_LOOP), 744 PRED_SEL(SQ_PRED_SEL_OFF), 745 LAST(0)); 746 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0), 747 SRC2_REL(ABSOLUTE), 748 SRC2_ELEM(ELEM_Y), 749 SRC2_NEG(0), 750 ALU_INST(SQ_OP3_INST_MULADD), 751 BANK_SWIZZLE(SQ_ALU_VEC_012), 752 DST_GPR(2), 753 DST_REL(ABSOLUTE), 754 DST_ELEM(ELEM_Y), 755 CLAMP(0)); 756 /* r2.z = MAD(c0.w, r1.x, c0.z) */ 757 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 758 SRC0_REL(ABSOLUTE), 759 SRC0_ELEM(ELEM_W), 760 SRC0_NEG(0), 761 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 762 SRC1_REL(ABSOLUTE), 763 SRC1_ELEM(ELEM_X), 764 SRC1_NEG(0), 765 INDEX_MODE(SQ_INDEX_LOOP), 766 PRED_SEL(SQ_PRED_SEL_OFF), 767 LAST(0)); 768 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0), 769 SRC2_REL(ABSOLUTE), 770 SRC2_ELEM(ELEM_Z), 771 SRC2_NEG(0), 772 ALU_INST(SQ_OP3_INST_MULADD), 773 BANK_SWIZZLE(SQ_ALU_VEC_012), 774 DST_GPR(2), 775 DST_REL(ABSOLUTE), 776 DST_ELEM(ELEM_Z), 777 CLAMP(0)); 778 /* r2.w = MAD(0, 0, 1) */ 779 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 780 SRC0_REL(ABSOLUTE), 781 SRC0_ELEM(ELEM_X), 782 SRC0_NEG(0), 783 SRC1_SEL(SQ_ALU_SRC_0), 784 SRC1_REL(ABSOLUTE), 785 SRC1_ELEM(ELEM_X), 786 SRC1_NEG(0), 787 INDEX_MODE(SQ_INDEX_LOOP), 788 PRED_SEL(SQ_PRED_SEL_OFF), 789 LAST(1)); 790 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 791 SRC2_REL(ABSOLUTE), 792 SRC2_ELEM(ELEM_X), 793 SRC2_NEG(0), 794 ALU_INST(SQ_OP3_INST_MULADD), 795 BANK_SWIZZLE(SQ_ALU_VEC_012), 796 DST_GPR(2), 797 DST_REL(ABSOLUTE), 798 DST_ELEM(ELEM_W), 799 CLAMP(0)); 800 801 /* 8,9,10,11 */ 802 /* r2.x = MAD(c1.x, r1.y, pv.x) */ 803 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 804 SRC0_REL(ABSOLUTE), 805 SRC0_ELEM(ELEM_X), 806 SRC0_NEG(0), 807 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 808 SRC1_REL(ABSOLUTE), 809 SRC1_ELEM(ELEM_Y), 810 SRC1_NEG(0), 811 INDEX_MODE(SQ_INDEX_LOOP), 812 PRED_SEL(SQ_PRED_SEL_OFF), 813 LAST(0)); 814 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 815 SRC2_REL(ABSOLUTE), 816 SRC2_ELEM(ELEM_X), 817 SRC2_NEG(0), 818 ALU_INST(SQ_OP3_INST_MULADD), 819 BANK_SWIZZLE(SQ_ALU_VEC_012), 820 DST_GPR(2), 821 DST_REL(ABSOLUTE), 822 DST_ELEM(ELEM_X), 823 CLAMP(0)); 824 /* r2.y = MAD(c1.y, r1.y, pv.y) */ 825 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 826 SRC0_REL(ABSOLUTE), 827 SRC0_ELEM(ELEM_Y), 828 SRC0_NEG(0), 829 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 830 SRC1_REL(ABSOLUTE), 831 SRC1_ELEM(ELEM_Y), 832 SRC1_NEG(0), 833 INDEX_MODE(SQ_INDEX_LOOP), 834 PRED_SEL(SQ_PRED_SEL_OFF), 835 LAST(0)); 836 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 837 SRC2_REL(ABSOLUTE), 838 SRC2_ELEM(ELEM_Y), 839 SRC2_NEG(0), 840 ALU_INST(SQ_OP3_INST_MULADD), 841 BANK_SWIZZLE(SQ_ALU_VEC_012), 842 DST_GPR(2), 843 DST_REL(ABSOLUTE), 844 DST_ELEM(ELEM_Y), 845 CLAMP(0)); 846 /* r2.z = MAD(c1.z, r1.y, pv.z) */ 847 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 848 SRC0_REL(ABSOLUTE), 849 SRC0_ELEM(ELEM_Z), 850 SRC0_NEG(0), 851 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 852 SRC1_REL(ABSOLUTE), 853 SRC1_ELEM(ELEM_Y), 854 SRC1_NEG(0), 855 INDEX_MODE(SQ_INDEX_LOOP), 856 PRED_SEL(SQ_PRED_SEL_OFF), 857 LAST(0)); 858 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 859 SRC2_REL(ABSOLUTE), 860 SRC2_ELEM(ELEM_Z), 861 SRC2_NEG(0), 862 ALU_INST(SQ_OP3_INST_MULADD), 863 BANK_SWIZZLE(SQ_ALU_VEC_012), 864 DST_GPR(2), 865 DST_REL(ABSOLUTE), 866 DST_ELEM(ELEM_Z), 867 CLAMP(0)); 868 /* r2.w = MAD(0, 0, 1) */ 869 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 870 SRC0_REL(ABSOLUTE), 871 SRC0_ELEM(ELEM_X), 872 SRC0_NEG(0), 873 SRC1_SEL(SQ_ALU_SRC_0), 874 SRC1_REL(ABSOLUTE), 875 SRC1_ELEM(ELEM_X), 876 SRC1_NEG(0), 877 INDEX_MODE(SQ_INDEX_LOOP), 878 PRED_SEL(SQ_PRED_SEL_OFF), 879 LAST(1)); 880 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 881 SRC2_REL(ABSOLUTE), 882 SRC2_ELEM(ELEM_W), 883 SRC2_NEG(0), 884 ALU_INST(SQ_OP3_INST_MULADD), 885 BANK_SWIZZLE(SQ_ALU_VEC_012), 886 DST_GPR(2), 887 DST_REL(ABSOLUTE), 888 DST_ELEM(ELEM_W), 889 CLAMP(0)); 890 /* 12,13,14,15 */ 891 /* r2.x = MAD(c2.x, r1.z, pv.x) */ 892 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), 893 SRC0_REL(ABSOLUTE), 894 SRC0_ELEM(ELEM_X), 895 SRC0_NEG(0), 896 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 897 SRC1_REL(ABSOLUTE), 898 SRC1_ELEM(ELEM_Z), 899 SRC1_NEG(0), 900 INDEX_MODE(SQ_INDEX_LOOP), 901 PRED_SEL(SQ_PRED_SEL_OFF), 902 LAST(0)); 903 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 904 SRC2_REL(ABSOLUTE), 905 SRC2_ELEM(ELEM_X), 906 SRC2_NEG(0), 907 ALU_INST(SQ_OP3_INST_MULADD), 908 BANK_SWIZZLE(SQ_ALU_VEC_012), 909 DST_GPR(2), 910 DST_REL(ABSOLUTE), 911 DST_ELEM(ELEM_X), 912 CLAMP(1)); 913 /* r2.y = MAD(c2.y, r1.z, pv.y) */ 914 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), 915 SRC0_REL(ABSOLUTE), 916 SRC0_ELEM(ELEM_Y), 917 SRC0_NEG(0), 918 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 919 SRC1_REL(ABSOLUTE), 920 SRC1_ELEM(ELEM_Z), 921 SRC1_NEG(0), 922 INDEX_MODE(SQ_INDEX_LOOP), 923 PRED_SEL(SQ_PRED_SEL_OFF), 924 LAST(0)); 925 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 926 SRC2_REL(ABSOLUTE), 927 SRC2_ELEM(ELEM_Y), 928 SRC2_NEG(0), 929 ALU_INST(SQ_OP3_INST_MULADD), 930 BANK_SWIZZLE(SQ_ALU_VEC_012), 931 DST_GPR(2), 932 DST_REL(ABSOLUTE), 933 DST_ELEM(ELEM_Y), 934 CLAMP(1)); 935 /* r2.z = MAD(c2.z, r1.z, pv.z) */ 936 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), 937 SRC0_REL(ABSOLUTE), 938 SRC0_ELEM(ELEM_Z), 939 SRC0_NEG(0), 940 SRC1_SEL(ALU_SRC_GPR_BASE + 1), 941 SRC1_REL(ABSOLUTE), 942 SRC1_ELEM(ELEM_Z), 943 SRC1_NEG(0), 944 INDEX_MODE(SQ_INDEX_LOOP), 945 PRED_SEL(SQ_PRED_SEL_OFF), 946 LAST(0)); 947 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 948 SRC2_REL(ABSOLUTE), 949 SRC2_ELEM(ELEM_Z), 950 SRC2_NEG(0), 951 ALU_INST(SQ_OP3_INST_MULADD), 952 BANK_SWIZZLE(SQ_ALU_VEC_012), 953 DST_GPR(2), 954 DST_REL(ABSOLUTE), 955 DST_ELEM(ELEM_Z), 956 CLAMP(1)); 957 /* r2.w = MAD(0, 0, 1) */ 958 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 959 SRC0_REL(ABSOLUTE), 960 SRC0_ELEM(ELEM_X), 961 SRC0_NEG(0), 962 SRC1_SEL(SQ_ALU_SRC_0), 963 SRC1_REL(ABSOLUTE), 964 SRC1_ELEM(ELEM_X), 965 SRC1_NEG(0), 966 INDEX_MODE(SQ_INDEX_LOOP), 967 PRED_SEL(SQ_PRED_SEL_OFF), 968 LAST(1)); 969 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 970 SRC2_REL(ABSOLUTE), 971 SRC2_ELEM(ELEM_X), 972 SRC2_NEG(0), 973 ALU_INST(SQ_OP3_INST_MULADD), 974 BANK_SWIZZLE(SQ_ALU_VEC_012), 975 DST_GPR(2), 976 DST_REL(ABSOLUTE), 977 DST_ELEM(ELEM_W), 978 CLAMP(1)); 979 980 /* 16 */ 981 shader[i++] = CF_DWORD0(ADDR(18)); 982 shader[i++] = CF_DWORD1(POP_COUNT(0), 983 CF_CONST(0), 984 COND(SQ_CF_COND_ACTIVE), 985 I_COUNT(3), 986 CALL_COUNT(0), 987 END_OF_PROGRAM(0), 988 VALID_PIXEL_MODE(0), 989 CF_INST(SQ_CF_INST_TEX), 990 WHOLE_QUAD_MODE(0), 991 BARRIER(1)); 992 /* 17 */ 993 shader[i++] = CF_DWORD0(ADDR(0)); 994 shader[i++] = CF_DWORD1(POP_COUNT(0), 995 CF_CONST(0), 996 COND(SQ_CF_COND_ACTIVE), 997 I_COUNT(0), 998 CALL_COUNT(0), 999 END_OF_PROGRAM(0), 1000 VALID_PIXEL_MODE(0), 1001 CF_INST(SQ_CF_INST_RETURN), 1002 WHOLE_QUAD_MODE(0), 1003 BARRIER(1)); 1004 /* 18/19 */ 1005 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1006 BC_FRAC_MODE(0), 1007 FETCH_WHOLE_QUAD(0), 1008 RESOURCE_ID(0), 1009 SRC_GPR(0), 1010 SRC_REL(ABSOLUTE), 1011 R7xx_ALT_CONST(0)); 1012 shader[i++] = TEX_DWORD1(DST_GPR(1), 1013 DST_REL(ABSOLUTE), 1014 DST_SEL_X(SQ_SEL_X), 1015 DST_SEL_Y(SQ_SEL_MASK), 1016 DST_SEL_Z(SQ_SEL_MASK), 1017 DST_SEL_W(SQ_SEL_1), 1018 LOD_BIAS(0), 1019 COORD_TYPE_X(TEX_NORMALIZED), 1020 COORD_TYPE_Y(TEX_NORMALIZED), 1021 COORD_TYPE_Z(TEX_NORMALIZED), 1022 COORD_TYPE_W(TEX_NORMALIZED)); 1023 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1024 OFFSET_Y(0), 1025 OFFSET_Z(0), 1026 SAMPLER_ID(0), 1027 SRC_SEL_X(SQ_SEL_X), 1028 SRC_SEL_Y(SQ_SEL_Y), 1029 SRC_SEL_Z(SQ_SEL_0), 1030 SRC_SEL_W(SQ_SEL_1)); 1031 shader[i++] = TEX_DWORD_PAD; 1032 /* 20/21 */ 1033 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1034 BC_FRAC_MODE(0), 1035 FETCH_WHOLE_QUAD(0), 1036 RESOURCE_ID(1), 1037 SRC_GPR(0), 1038 SRC_REL(ABSOLUTE), 1039 R7xx_ALT_CONST(0)); 1040 shader[i++] = TEX_DWORD1(DST_GPR(1), 1041 DST_REL(ABSOLUTE), 1042 DST_SEL_X(SQ_SEL_MASK), 1043 DST_SEL_Y(SQ_SEL_MASK), 1044 DST_SEL_Z(SQ_SEL_X), 1045 DST_SEL_W(SQ_SEL_MASK), 1046 LOD_BIAS(0), 1047 COORD_TYPE_X(TEX_NORMALIZED), 1048 COORD_TYPE_Y(TEX_NORMALIZED), 1049 COORD_TYPE_Z(TEX_NORMALIZED), 1050 COORD_TYPE_W(TEX_NORMALIZED)); 1051 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1052 OFFSET_Y(0), 1053 OFFSET_Z(0), 1054 SAMPLER_ID(1), 1055 SRC_SEL_X(SQ_SEL_X), 1056 SRC_SEL_Y(SQ_SEL_Y), 1057 SRC_SEL_Z(SQ_SEL_0), 1058 SRC_SEL_W(SQ_SEL_1)); 1059 shader[i++] = TEX_DWORD_PAD; 1060 /* 22/23 */ 1061 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1062 BC_FRAC_MODE(0), 1063 FETCH_WHOLE_QUAD(0), 1064 RESOURCE_ID(2), 1065 SRC_GPR(0), 1066 SRC_REL(ABSOLUTE), 1067 R7xx_ALT_CONST(0)); 1068 shader[i++] = TEX_DWORD1(DST_GPR(1), 1069 DST_REL(ABSOLUTE), 1070 DST_SEL_X(SQ_SEL_MASK), 1071 DST_SEL_Y(SQ_SEL_X), 1072 DST_SEL_Z(SQ_SEL_MASK), 1073 DST_SEL_W(SQ_SEL_MASK), 1074 LOD_BIAS(0), 1075 COORD_TYPE_X(TEX_NORMALIZED), 1076 COORD_TYPE_Y(TEX_NORMALIZED), 1077 COORD_TYPE_Z(TEX_NORMALIZED), 1078 COORD_TYPE_W(TEX_NORMALIZED)); 1079 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1080 OFFSET_Y(0), 1081 OFFSET_Z(0), 1082 SAMPLER_ID(2), 1083 SRC_SEL_X(SQ_SEL_X), 1084 SRC_SEL_Y(SQ_SEL_Y), 1085 SRC_SEL_Z(SQ_SEL_0), 1086 SRC_SEL_W(SQ_SEL_1)); 1087 shader[i++] = TEX_DWORD_PAD; 1088 /* 24 */ 1089 shader[i++] = CF_DWORD0(ADDR(26)); 1090 shader[i++] = CF_DWORD1(POP_COUNT(0), 1091 CF_CONST(0), 1092 COND(SQ_CF_COND_ACTIVE), 1093 I_COUNT(1), 1094 CALL_COUNT(0), 1095 END_OF_PROGRAM(0), 1096 VALID_PIXEL_MODE(0), 1097 CF_INST(SQ_CF_INST_TEX), 1098 WHOLE_QUAD_MODE(0), 1099 BARRIER(1)); 1100 /* 25 */ 1101 shader[i++] = CF_DWORD0(ADDR(0)); 1102 shader[i++] = CF_DWORD1(POP_COUNT(0), 1103 CF_CONST(0), 1104 COND(SQ_CF_COND_ACTIVE), 1105 I_COUNT(0), 1106 CALL_COUNT(0), 1107 END_OF_PROGRAM(0), 1108 VALID_PIXEL_MODE(0), 1109 CF_INST(SQ_CF_INST_RETURN), 1110 WHOLE_QUAD_MODE(0), 1111 BARRIER(1)); 1112 /* 26/27 */ 1113 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1114 BC_FRAC_MODE(0), 1115 FETCH_WHOLE_QUAD(0), 1116 RESOURCE_ID(0), 1117 SRC_GPR(0), 1118 SRC_REL(ABSOLUTE), 1119 R7xx_ALT_CONST(0)); 1120 shader[i++] = TEX_DWORD1(DST_GPR(1), 1121 DST_REL(ABSOLUTE), 1122 DST_SEL_X(SQ_SEL_X), 1123 DST_SEL_Y(SQ_SEL_Y), 1124 DST_SEL_Z(SQ_SEL_Z), 1125 DST_SEL_W(SQ_SEL_1), 1126 LOD_BIAS(0), 1127 COORD_TYPE_X(TEX_NORMALIZED), 1128 COORD_TYPE_Y(TEX_NORMALIZED), 1129 COORD_TYPE_Z(TEX_NORMALIZED), 1130 COORD_TYPE_W(TEX_NORMALIZED)); 1131 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1132 OFFSET_Y(0), 1133 OFFSET_Z(0), 1134 SAMPLER_ID(0), 1135 SRC_SEL_X(SQ_SEL_X), 1136 SRC_SEL_Y(SQ_SEL_Y), 1137 SRC_SEL_Z(SQ_SEL_0), 1138 SRC_SEL_W(SQ_SEL_1)); 1139 shader[i++] = TEX_DWORD_PAD; 1140 1141 return i; 1142} 1143 1144/* comp vs --------------------------------------- */ 1145int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) 1146{ 1147 int i = 0; 1148 1149 /* 0 */ 1150 shader[i++] = CF_DWORD0(ADDR(3)); 1151 shader[i++] = CF_DWORD1(POP_COUNT(0), 1152 CF_CONST(0), 1153 COND(SQ_CF_COND_BOOL), 1154 I_COUNT(0), 1155 CALL_COUNT(0), 1156 END_OF_PROGRAM(0), 1157 VALID_PIXEL_MODE(0), 1158 CF_INST(SQ_CF_INST_CALL), 1159 WHOLE_QUAD_MODE(0), 1160 BARRIER(0)); 1161 /* 1 */ 1162 shader[i++] = CF_DWORD0(ADDR(9)); 1163 shader[i++] = CF_DWORD1(POP_COUNT(0), 1164 CF_CONST(0), 1165 COND(SQ_CF_COND_NOT_BOOL), 1166 I_COUNT(0), 1167 CALL_COUNT(0), 1168 END_OF_PROGRAM(0), 1169 VALID_PIXEL_MODE(0), 1170 CF_INST(SQ_CF_INST_CALL), 1171 WHOLE_QUAD_MODE(0), 1172 BARRIER(0)); 1173 /* 2 */ 1174 shader[i++] = CF_DWORD0(ADDR(0)); 1175 shader[i++] = CF_DWORD1(POP_COUNT(0), 1176 CF_CONST(0), 1177 COND(SQ_CF_COND_ACTIVE), 1178 I_COUNT(0), 1179 CALL_COUNT(0), 1180 END_OF_PROGRAM(1), 1181 VALID_PIXEL_MODE(0), 1182 CF_INST(SQ_CF_INST_NOP), 1183 WHOLE_QUAD_MODE(0), 1184 BARRIER(1)); 1185 /* 3 - mask sub */ 1186 shader[i++] = CF_DWORD0(ADDR(44)); 1187 shader[i++] = CF_DWORD1(POP_COUNT(0), 1188 CF_CONST(0), 1189 COND(SQ_CF_COND_ACTIVE), 1190 I_COUNT(3), 1191 CALL_COUNT(0), 1192 END_OF_PROGRAM(0), 1193 VALID_PIXEL_MODE(0), 1194 CF_INST(SQ_CF_INST_VTX), 1195 WHOLE_QUAD_MODE(0), 1196 BARRIER(1)); 1197 1198 /* 4 - ALU */ 1199 shader[i++] = CF_ALU_DWORD0(ADDR(14), 1200 KCACHE_BANK0(0), 1201 KCACHE_BANK1(0), 1202 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 1203 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1204 KCACHE_ADDR0(0), 1205 KCACHE_ADDR1(0), 1206 I_COUNT(20), 1207 USES_WATERFALL(0), 1208 CF_INST(SQ_CF_INST_ALU), 1209 WHOLE_QUAD_MODE(0), 1210 BARRIER(1)); 1211 1212 /* 5 - dst */ 1213 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1214 TYPE(SQ_EXPORT_POS), 1215 RW_GPR(2), 1216 RW_REL(ABSOLUTE), 1217 INDEX_GPR(0), 1218 ELEM_SIZE(0)); 1219 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1220 SRC_SEL_Y(SQ_SEL_Y), 1221 SRC_SEL_Z(SQ_SEL_0), 1222 SRC_SEL_W(SQ_SEL_1), 1223 R6xx_ELEM_LOOP(0), 1224 BURST_COUNT(1), 1225 END_OF_PROGRAM(0), 1226 VALID_PIXEL_MODE(0), 1227 CF_INST(SQ_CF_INST_EXPORT_DONE), 1228 WHOLE_QUAD_MODE(0), 1229 BARRIER(1)); 1230 /* 6 - src */ 1231 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1232 TYPE(SQ_EXPORT_PARAM), 1233 RW_GPR(1), 1234 RW_REL(ABSOLUTE), 1235 INDEX_GPR(0), 1236 ELEM_SIZE(0)); 1237 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1238 SRC_SEL_Y(SQ_SEL_Y), 1239 SRC_SEL_Z(SQ_SEL_0), 1240 SRC_SEL_W(SQ_SEL_1), 1241 R6xx_ELEM_LOOP(0), 1242 BURST_COUNT(1), 1243 END_OF_PROGRAM(0), 1244 VALID_PIXEL_MODE(0), 1245 CF_INST(SQ_CF_INST_EXPORT), 1246 WHOLE_QUAD_MODE(0), 1247 BARRIER(0)); 1248 /* 7 - mask */ 1249 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), 1250 TYPE(SQ_EXPORT_PARAM), 1251 RW_GPR(0), 1252 RW_REL(ABSOLUTE), 1253 INDEX_GPR(0), 1254 ELEM_SIZE(0)); 1255 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1256 SRC_SEL_Y(SQ_SEL_Y), 1257 SRC_SEL_Z(SQ_SEL_0), 1258 SRC_SEL_W(SQ_SEL_1), 1259 R6xx_ELEM_LOOP(0), 1260 BURST_COUNT(1), 1261 END_OF_PROGRAM(0), 1262 VALID_PIXEL_MODE(0), 1263 CF_INST(SQ_CF_INST_EXPORT_DONE), 1264 WHOLE_QUAD_MODE(0), 1265 BARRIER(0)); 1266 /* 8 */ 1267 shader[i++] = CF_DWORD0(ADDR(0)); 1268 shader[i++] = CF_DWORD1(POP_COUNT(0), 1269 CF_CONST(0), 1270 COND(SQ_CF_COND_ACTIVE), 1271 I_COUNT(0), 1272 CALL_COUNT(0), 1273 END_OF_PROGRAM(0), 1274 VALID_PIXEL_MODE(0), 1275 CF_INST(SQ_CF_INST_RETURN), 1276 WHOLE_QUAD_MODE(0), 1277 BARRIER(1)); 1278 /* 9 - non-mask sub */ 1279 shader[i++] = CF_DWORD0(ADDR(50)); 1280 shader[i++] = CF_DWORD1(POP_COUNT(0), 1281 CF_CONST(0), 1282 COND(SQ_CF_COND_ACTIVE), 1283 I_COUNT(2), 1284 CALL_COUNT(0), 1285 END_OF_PROGRAM(0), 1286 VALID_PIXEL_MODE(0), 1287 CF_INST(SQ_CF_INST_VTX), 1288 WHOLE_QUAD_MODE(0), 1289 BARRIER(1)); 1290 1291 /* 10 - ALU */ 1292 shader[i++] = CF_ALU_DWORD0(ADDR(34), 1293 KCACHE_BANK0(0), 1294 KCACHE_BANK1(0), 1295 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 1296 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1297 KCACHE_ADDR0(0), 1298 KCACHE_ADDR1(0), 1299 I_COUNT(10), 1300 USES_WATERFALL(0), 1301 CF_INST(SQ_CF_INST_ALU), 1302 WHOLE_QUAD_MODE(0), 1303 BARRIER(1)); 1304 1305 /* 11 - dst */ 1306 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1307 TYPE(SQ_EXPORT_POS), 1308 RW_GPR(1), 1309 RW_REL(ABSOLUTE), 1310 INDEX_GPR(0), 1311 ELEM_SIZE(0)); 1312 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1313 SRC_SEL_Y(SQ_SEL_Y), 1314 SRC_SEL_Z(SQ_SEL_0), 1315 SRC_SEL_W(SQ_SEL_1), 1316 R6xx_ELEM_LOOP(0), 1317 BURST_COUNT(0), 1318 END_OF_PROGRAM(0), 1319 VALID_PIXEL_MODE(0), 1320 CF_INST(SQ_CF_INST_EXPORT_DONE), 1321 WHOLE_QUAD_MODE(0), 1322 BARRIER(1)); 1323 /* 12 - src */ 1324 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1325 TYPE(SQ_EXPORT_PARAM), 1326 RW_GPR(0), 1327 RW_REL(ABSOLUTE), 1328 INDEX_GPR(0), 1329 ELEM_SIZE(0)); 1330 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1331 SRC_SEL_Y(SQ_SEL_Y), 1332 SRC_SEL_Z(SQ_SEL_0), 1333 SRC_SEL_W(SQ_SEL_1), 1334 R6xx_ELEM_LOOP(0), 1335 BURST_COUNT(0), 1336 END_OF_PROGRAM(0), 1337 VALID_PIXEL_MODE(0), 1338 CF_INST(SQ_CF_INST_EXPORT_DONE), 1339 WHOLE_QUAD_MODE(0), 1340 BARRIER(0)); 1341 /* 13 */ 1342 shader[i++] = CF_DWORD0(ADDR(0)); 1343 shader[i++] = CF_DWORD1(POP_COUNT(0), 1344 CF_CONST(0), 1345 COND(SQ_CF_COND_ACTIVE), 1346 I_COUNT(0), 1347 CALL_COUNT(0), 1348 END_OF_PROGRAM(0), 1349 VALID_PIXEL_MODE(0), 1350 CF_INST(SQ_CF_INST_RETURN), 1351 WHOLE_QUAD_MODE(0), 1352 BARRIER(1)); 1353 1354 1355 /* 14 srcX.x DOT4 - mask */ 1356 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1357 SRC0_REL(ABSOLUTE), 1358 SRC0_ELEM(ELEM_X), 1359 SRC0_NEG(0), 1360 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1361 SRC1_REL(ABSOLUTE), 1362 SRC1_ELEM(ELEM_X), 1363 SRC1_NEG(0), 1364 INDEX_MODE(SQ_INDEX_LOOP), 1365 PRED_SEL(SQ_PRED_SEL_OFF), 1366 LAST(0)); 1367 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1368 SRC0_ABS(0), 1369 SRC1_ABS(0), 1370 UPDATE_EXECUTE_MASK(0), 1371 UPDATE_PRED(0), 1372 WRITE_MASK(1), 1373 FOG_MERGE(0), 1374 OMOD(SQ_ALU_OMOD_OFF), 1375 ALU_INST(SQ_OP2_INST_DOT4), 1376 BANK_SWIZZLE(SQ_ALU_VEC_012), 1377 DST_GPR(3), 1378 DST_REL(ABSOLUTE), 1379 DST_ELEM(ELEM_X), 1380 CLAMP(0)); 1381 1382 /* 15 srcX.y DOT4 - mask */ 1383 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1384 SRC0_REL(ABSOLUTE), 1385 SRC0_ELEM(ELEM_Y), 1386 SRC0_NEG(0), 1387 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1388 SRC1_REL(ABSOLUTE), 1389 SRC1_ELEM(ELEM_Y), 1390 SRC1_NEG(0), 1391 INDEX_MODE(SQ_INDEX_LOOP), 1392 PRED_SEL(SQ_PRED_SEL_OFF), 1393 LAST(0)); 1394 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1395 SRC0_ABS(0), 1396 SRC1_ABS(0), 1397 UPDATE_EXECUTE_MASK(0), 1398 UPDATE_PRED(0), 1399 WRITE_MASK(0), 1400 FOG_MERGE(0), 1401 OMOD(SQ_ALU_OMOD_OFF), 1402 ALU_INST(SQ_OP2_INST_DOT4), 1403 BANK_SWIZZLE(SQ_ALU_VEC_012), 1404 DST_GPR(3), 1405 DST_REL(ABSOLUTE), 1406 DST_ELEM(ELEM_Y), 1407 CLAMP(0)); 1408 1409 /* 16 srcX.z DOT4 - mask */ 1410 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1411 SRC0_REL(ABSOLUTE), 1412 SRC0_ELEM(ELEM_Z), 1413 SRC0_NEG(0), 1414 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1415 SRC1_REL(ABSOLUTE), 1416 SRC1_ELEM(ELEM_Z), 1417 SRC1_NEG(0), 1418 INDEX_MODE(SQ_INDEX_LOOP), 1419 PRED_SEL(SQ_PRED_SEL_OFF), 1420 LAST(0)); 1421 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1422 SRC0_ABS(0), 1423 SRC1_ABS(0), 1424 UPDATE_EXECUTE_MASK(0), 1425 UPDATE_PRED(0), 1426 WRITE_MASK(0), 1427 FOG_MERGE(0), 1428 OMOD(SQ_ALU_OMOD_OFF), 1429 ALU_INST(SQ_OP2_INST_DOT4), 1430 BANK_SWIZZLE(SQ_ALU_VEC_012), 1431 DST_GPR(3), 1432 DST_REL(ABSOLUTE), 1433 DST_ELEM(ELEM_Z), 1434 CLAMP(0)); 1435 1436 /* 17 srcX.w DOT4 - mask */ 1437 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1438 SRC0_REL(ABSOLUTE), 1439 SRC0_ELEM(ELEM_W), 1440 SRC0_NEG(0), 1441 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1442 SRC1_REL(ABSOLUTE), 1443 SRC1_ELEM(ELEM_W), 1444 SRC1_NEG(0), 1445 INDEX_MODE(SQ_INDEX_LOOP), 1446 PRED_SEL(SQ_PRED_SEL_OFF), 1447 LAST(1)); 1448 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1449 SRC0_ABS(0), 1450 SRC1_ABS(0), 1451 UPDATE_EXECUTE_MASK(0), 1452 UPDATE_PRED(0), 1453 WRITE_MASK(0), 1454 FOG_MERGE(0), 1455 OMOD(SQ_ALU_OMOD_OFF), 1456 ALU_INST(SQ_OP2_INST_DOT4), 1457 BANK_SWIZZLE(SQ_ALU_VEC_012), 1458 DST_GPR(3), 1459 DST_REL(ABSOLUTE), 1460 DST_ELEM(ELEM_W), 1461 CLAMP(0)); 1462 1463 /* 18 srcY.x DOT4 - mask */ 1464 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1465 SRC0_REL(ABSOLUTE), 1466 SRC0_ELEM(ELEM_X), 1467 SRC0_NEG(0), 1468 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1469 SRC1_REL(ABSOLUTE), 1470 SRC1_ELEM(ELEM_X), 1471 SRC1_NEG(0), 1472 INDEX_MODE(SQ_INDEX_LOOP), 1473 PRED_SEL(SQ_PRED_SEL_OFF), 1474 LAST(0)); 1475 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1476 SRC0_ABS(0), 1477 SRC1_ABS(0), 1478 UPDATE_EXECUTE_MASK(0), 1479 UPDATE_PRED(0), 1480 WRITE_MASK(0), 1481 FOG_MERGE(0), 1482 OMOD(SQ_ALU_OMOD_OFF), 1483 ALU_INST(SQ_OP2_INST_DOT4), 1484 BANK_SWIZZLE(SQ_ALU_VEC_012), 1485 DST_GPR(3), 1486 DST_REL(ABSOLUTE), 1487 DST_ELEM(ELEM_X), 1488 CLAMP(0)); 1489 1490 /* 19 srcY.y DOT4 - mask */ 1491 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1492 SRC0_REL(ABSOLUTE), 1493 SRC0_ELEM(ELEM_Y), 1494 SRC0_NEG(0), 1495 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1496 SRC1_REL(ABSOLUTE), 1497 SRC1_ELEM(ELEM_Y), 1498 SRC1_NEG(0), 1499 INDEX_MODE(SQ_INDEX_LOOP), 1500 PRED_SEL(SQ_PRED_SEL_OFF), 1501 LAST(0)); 1502 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1503 SRC0_ABS(0), 1504 SRC1_ABS(0), 1505 UPDATE_EXECUTE_MASK(0), 1506 UPDATE_PRED(0), 1507 WRITE_MASK(1), 1508 FOG_MERGE(0), 1509 OMOD(SQ_ALU_OMOD_OFF), 1510 ALU_INST(SQ_OP2_INST_DOT4), 1511 BANK_SWIZZLE(SQ_ALU_VEC_012), 1512 DST_GPR(3), 1513 DST_REL(ABSOLUTE), 1514 DST_ELEM(ELEM_Y), 1515 CLAMP(0)); 1516 1517 /* 20 srcY.z DOT4 - mask */ 1518 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1519 SRC0_REL(ABSOLUTE), 1520 SRC0_ELEM(ELEM_Z), 1521 SRC0_NEG(0), 1522 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1523 SRC1_REL(ABSOLUTE), 1524 SRC1_ELEM(ELEM_Z), 1525 SRC1_NEG(0), 1526 INDEX_MODE(SQ_INDEX_LOOP), 1527 PRED_SEL(SQ_PRED_SEL_OFF), 1528 LAST(0)); 1529 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1530 SRC0_ABS(0), 1531 SRC1_ABS(0), 1532 UPDATE_EXECUTE_MASK(0), 1533 UPDATE_PRED(0), 1534 WRITE_MASK(0), 1535 FOG_MERGE(0), 1536 OMOD(SQ_ALU_OMOD_OFF), 1537 ALU_INST(SQ_OP2_INST_DOT4), 1538 BANK_SWIZZLE(SQ_ALU_VEC_012), 1539 DST_GPR(3), 1540 DST_REL(ABSOLUTE), 1541 DST_ELEM(ELEM_Z), 1542 CLAMP(0)); 1543 1544 /* 21 srcY.w DOT4 - mask */ 1545 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 1546 SRC0_REL(ABSOLUTE), 1547 SRC0_ELEM(ELEM_W), 1548 SRC0_NEG(0), 1549 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1550 SRC1_REL(ABSOLUTE), 1551 SRC1_ELEM(ELEM_W), 1552 SRC1_NEG(0), 1553 INDEX_MODE(SQ_INDEX_LOOP), 1554 PRED_SEL(SQ_PRED_SEL_OFF), 1555 LAST(1)); 1556 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1557 SRC0_ABS(0), 1558 SRC1_ABS(0), 1559 UPDATE_EXECUTE_MASK(0), 1560 UPDATE_PRED(0), 1561 WRITE_MASK(0), 1562 FOG_MERGE(0), 1563 OMOD(SQ_ALU_OMOD_OFF), 1564 ALU_INST(SQ_OP2_INST_DOT4), 1565 BANK_SWIZZLE(SQ_ALU_VEC_012), 1566 DST_GPR(3), 1567 DST_REL(ABSOLUTE), 1568 DST_ELEM(ELEM_W), 1569 CLAMP(0)); 1570 1571 /* 22 maskX.x DOT4 - mask */ 1572 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1573 SRC0_REL(ABSOLUTE), 1574 SRC0_ELEM(ELEM_X), 1575 SRC0_NEG(0), 1576 SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1577 SRC1_REL(ABSOLUTE), 1578 SRC1_ELEM(ELEM_X), 1579 SRC1_NEG(0), 1580 INDEX_MODE(SQ_INDEX_LOOP), 1581 PRED_SEL(SQ_PRED_SEL_OFF), 1582 LAST(0)); 1583 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1584 SRC0_ABS(0), 1585 SRC1_ABS(0), 1586 UPDATE_EXECUTE_MASK(0), 1587 UPDATE_PRED(0), 1588 WRITE_MASK(1), 1589 FOG_MERGE(0), 1590 OMOD(SQ_ALU_OMOD_OFF), 1591 ALU_INST(SQ_OP2_INST_DOT4), 1592 BANK_SWIZZLE(SQ_ALU_VEC_012), 1593 DST_GPR(4), 1594 DST_REL(ABSOLUTE), 1595 DST_ELEM(ELEM_X), 1596 CLAMP(0)); 1597 1598 /* 23 maskX.y DOT4 - mask */ 1599 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1600 SRC0_REL(ABSOLUTE), 1601 SRC0_ELEM(ELEM_Y), 1602 SRC0_NEG(0), 1603 SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1604 SRC1_REL(ABSOLUTE), 1605 SRC1_ELEM(ELEM_Y), 1606 SRC1_NEG(0), 1607 INDEX_MODE(SQ_INDEX_LOOP), 1608 PRED_SEL(SQ_PRED_SEL_OFF), 1609 LAST(0)); 1610 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1611 SRC0_ABS(0), 1612 SRC1_ABS(0), 1613 UPDATE_EXECUTE_MASK(0), 1614 UPDATE_PRED(0), 1615 WRITE_MASK(0), 1616 FOG_MERGE(0), 1617 OMOD(SQ_ALU_OMOD_OFF), 1618 ALU_INST(SQ_OP2_INST_DOT4), 1619 BANK_SWIZZLE(SQ_ALU_VEC_012), 1620 DST_GPR(4), 1621 DST_REL(ABSOLUTE), 1622 DST_ELEM(ELEM_Y), 1623 CLAMP(0)); 1624 1625 /* 24 maskX.z DOT4 - mask */ 1626 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1627 SRC0_REL(ABSOLUTE), 1628 SRC0_ELEM(ELEM_Z), 1629 SRC0_NEG(0), 1630 SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1631 SRC1_REL(ABSOLUTE), 1632 SRC1_ELEM(ELEM_Z), 1633 SRC1_NEG(0), 1634 INDEX_MODE(SQ_INDEX_LOOP), 1635 PRED_SEL(SQ_PRED_SEL_OFF), 1636 LAST(0)); 1637 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1638 SRC0_ABS(0), 1639 SRC1_ABS(0), 1640 UPDATE_EXECUTE_MASK(0), 1641 UPDATE_PRED(0), 1642 WRITE_MASK(0), 1643 FOG_MERGE(0), 1644 OMOD(SQ_ALU_OMOD_OFF), 1645 ALU_INST(SQ_OP2_INST_DOT4), 1646 BANK_SWIZZLE(SQ_ALU_VEC_012), 1647 DST_GPR(4), 1648 DST_REL(ABSOLUTE), 1649 DST_ELEM(ELEM_Z), 1650 CLAMP(0)); 1651 1652 /* 25 maskX.w DOT4 - mask */ 1653 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1654 SRC0_REL(ABSOLUTE), 1655 SRC0_ELEM(ELEM_W), 1656 SRC0_NEG(0), 1657 SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1658 SRC1_REL(ABSOLUTE), 1659 SRC1_ELEM(ELEM_W), 1660 SRC1_NEG(0), 1661 INDEX_MODE(SQ_INDEX_LOOP), 1662 PRED_SEL(SQ_PRED_SEL_OFF), 1663 LAST(1)); 1664 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1665 SRC0_ABS(0), 1666 SRC1_ABS(0), 1667 UPDATE_EXECUTE_MASK(0), 1668 UPDATE_PRED(0), 1669 WRITE_MASK(0), 1670 FOG_MERGE(0), 1671 OMOD(SQ_ALU_OMOD_OFF), 1672 ALU_INST(SQ_OP2_INST_DOT4), 1673 BANK_SWIZZLE(SQ_ALU_VEC_012), 1674 DST_GPR(4), 1675 DST_REL(ABSOLUTE), 1676 DST_ELEM(ELEM_W), 1677 CLAMP(0)); 1678 1679 /* 26 maskY.x DOT4 - mask */ 1680 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1681 SRC0_REL(ABSOLUTE), 1682 SRC0_ELEM(ELEM_X), 1683 SRC0_NEG(0), 1684 SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1685 SRC1_REL(ABSOLUTE), 1686 SRC1_ELEM(ELEM_X), 1687 SRC1_NEG(0), 1688 INDEX_MODE(SQ_INDEX_LOOP), 1689 PRED_SEL(SQ_PRED_SEL_OFF), 1690 LAST(0)); 1691 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1692 SRC0_ABS(0), 1693 SRC1_ABS(0), 1694 UPDATE_EXECUTE_MASK(0), 1695 UPDATE_PRED(0), 1696 WRITE_MASK(0), 1697 FOG_MERGE(0), 1698 OMOD(SQ_ALU_OMOD_OFF), 1699 ALU_INST(SQ_OP2_INST_DOT4), 1700 BANK_SWIZZLE(SQ_ALU_VEC_012), 1701 DST_GPR(4), 1702 DST_REL(ABSOLUTE), 1703 DST_ELEM(ELEM_X), 1704 CLAMP(0)); 1705 1706 /* 27 maskY.y DOT4 - mask */ 1707 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1708 SRC0_REL(ABSOLUTE), 1709 SRC0_ELEM(ELEM_Y), 1710 SRC0_NEG(0), 1711 SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1712 SRC1_REL(ABSOLUTE), 1713 SRC1_ELEM(ELEM_Y), 1714 SRC1_NEG(0), 1715 INDEX_MODE(SQ_INDEX_LOOP), 1716 PRED_SEL(SQ_PRED_SEL_OFF), 1717 LAST(0)); 1718 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1719 SRC0_ABS(0), 1720 SRC1_ABS(0), 1721 UPDATE_EXECUTE_MASK(0), 1722 UPDATE_PRED(0), 1723 WRITE_MASK(1), 1724 FOG_MERGE(0), 1725 OMOD(SQ_ALU_OMOD_OFF), 1726 ALU_INST(SQ_OP2_INST_DOT4), 1727 BANK_SWIZZLE(SQ_ALU_VEC_012), 1728 DST_GPR(4), 1729 DST_REL(ABSOLUTE), 1730 DST_ELEM(ELEM_Y), 1731 CLAMP(0)); 1732 1733 /* 28 maskY.z DOT4 - mask */ 1734 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1735 SRC0_REL(ABSOLUTE), 1736 SRC0_ELEM(ELEM_Z), 1737 SRC0_NEG(0), 1738 SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1739 SRC1_REL(ABSOLUTE), 1740 SRC1_ELEM(ELEM_Z), 1741 SRC1_NEG(0), 1742 INDEX_MODE(SQ_INDEX_LOOP), 1743 PRED_SEL(SQ_PRED_SEL_OFF), 1744 LAST(0)); 1745 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1746 SRC0_ABS(0), 1747 SRC1_ABS(0), 1748 UPDATE_EXECUTE_MASK(0), 1749 UPDATE_PRED(0), 1750 WRITE_MASK(0), 1751 FOG_MERGE(0), 1752 OMOD(SQ_ALU_OMOD_OFF), 1753 ALU_INST(SQ_OP2_INST_DOT4), 1754 BANK_SWIZZLE(SQ_ALU_VEC_012), 1755 DST_GPR(4), 1756 DST_REL(ABSOLUTE), 1757 DST_ELEM(ELEM_Z), 1758 CLAMP(0)); 1759 1760 /* 29 maskY.w DOT4 - mask */ 1761 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1762 SRC0_REL(ABSOLUTE), 1763 SRC0_ELEM(ELEM_W), 1764 SRC0_NEG(0), 1765 SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1766 SRC1_REL(ABSOLUTE), 1767 SRC1_ELEM(ELEM_W), 1768 SRC1_NEG(0), 1769 INDEX_MODE(SQ_INDEX_LOOP), 1770 PRED_SEL(SQ_PRED_SEL_OFF), 1771 LAST(1)); 1772 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1773 SRC0_ABS(0), 1774 SRC1_ABS(0), 1775 UPDATE_EXECUTE_MASK(0), 1776 UPDATE_PRED(0), 1777 WRITE_MASK(0), 1778 FOG_MERGE(0), 1779 OMOD(SQ_ALU_OMOD_OFF), 1780 ALU_INST(SQ_OP2_INST_DOT4), 1781 BANK_SWIZZLE(SQ_ALU_VEC_012), 1782 DST_GPR(4), 1783 DST_REL(ABSOLUTE), 1784 DST_ELEM(ELEM_W), 1785 CLAMP(0)); 1786 1787 /* 30 srcX / w */ 1788 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1789 SRC0_REL(ABSOLUTE), 1790 SRC0_ELEM(ELEM_X), 1791 SRC0_NEG(0), 1792 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1793 SRC1_REL(ABSOLUTE), 1794 SRC1_ELEM(ELEM_W), 1795 SRC1_NEG(0), 1796 INDEX_MODE(SQ_INDEX_AR_X), 1797 PRED_SEL(SQ_PRED_SEL_OFF), 1798 LAST(1)); 1799 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1800 SRC0_ABS(0), 1801 SRC1_ABS(0), 1802 UPDATE_EXECUTE_MASK(0), 1803 UPDATE_PRED(0), 1804 WRITE_MASK(1), 1805 FOG_MERGE(0), 1806 OMOD(SQ_ALU_OMOD_OFF), 1807 ALU_INST(SQ_OP2_INST_MUL), 1808 BANK_SWIZZLE(SQ_ALU_VEC_012), 1809 DST_GPR(1), 1810 DST_REL(ABSOLUTE), 1811 DST_ELEM(ELEM_X), 1812 CLAMP(0)); 1813 1814 /* 31 srcY / h */ 1815 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), 1816 SRC0_REL(ABSOLUTE), 1817 SRC0_ELEM(ELEM_Y), 1818 SRC0_NEG(0), 1819 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 1820 SRC1_REL(ABSOLUTE), 1821 SRC1_ELEM(ELEM_W), 1822 SRC1_NEG(0), 1823 INDEX_MODE(SQ_INDEX_AR_X), 1824 PRED_SEL(SQ_PRED_SEL_OFF), 1825 LAST(1)); 1826 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1827 SRC0_ABS(0), 1828 SRC1_ABS(0), 1829 UPDATE_EXECUTE_MASK(0), 1830 UPDATE_PRED(0), 1831 WRITE_MASK(1), 1832 FOG_MERGE(0), 1833 OMOD(SQ_ALU_OMOD_OFF), 1834 ALU_INST(SQ_OP2_INST_MUL), 1835 BANK_SWIZZLE(SQ_ALU_VEC_012), 1836 DST_GPR(1), 1837 DST_REL(ABSOLUTE), 1838 DST_ELEM(ELEM_Y), 1839 CLAMP(0)); 1840 1841 /* 32 maskX / w */ 1842 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 1843 SRC0_REL(ABSOLUTE), 1844 SRC0_ELEM(ELEM_X), 1845 SRC0_NEG(0), 1846 SRC1_SEL(ALU_SRC_CFILE_BASE + 2), 1847 SRC1_REL(ABSOLUTE), 1848 SRC1_ELEM(ELEM_W), 1849 SRC1_NEG(0), 1850 INDEX_MODE(SQ_INDEX_AR_X), 1851 PRED_SEL(SQ_PRED_SEL_OFF), 1852 LAST(1)); 1853 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1854 SRC0_ABS(0), 1855 SRC1_ABS(0), 1856 UPDATE_EXECUTE_MASK(0), 1857 UPDATE_PRED(0), 1858 WRITE_MASK(1), 1859 FOG_MERGE(0), 1860 OMOD(SQ_ALU_OMOD_OFF), 1861 ALU_INST(SQ_OP2_INST_MUL), 1862 BANK_SWIZZLE(SQ_ALU_VEC_012), 1863 DST_GPR(0), 1864 DST_REL(ABSOLUTE), 1865 DST_ELEM(ELEM_X), 1866 CLAMP(0)); 1867 1868 /* 33 maskY / h */ 1869 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), 1870 SRC0_REL(ABSOLUTE), 1871 SRC0_ELEM(ELEM_Y), 1872 SRC0_NEG(0), 1873 SRC1_SEL(ALU_SRC_CFILE_BASE + 3), 1874 SRC1_REL(ABSOLUTE), 1875 SRC1_ELEM(ELEM_W), 1876 SRC1_NEG(0), 1877 INDEX_MODE(SQ_INDEX_AR_X), 1878 PRED_SEL(SQ_PRED_SEL_OFF), 1879 LAST(1)); 1880 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1881 SRC0_ABS(0), 1882 SRC1_ABS(0), 1883 UPDATE_EXECUTE_MASK(0), 1884 UPDATE_PRED(0), 1885 WRITE_MASK(1), 1886 FOG_MERGE(0), 1887 OMOD(SQ_ALU_OMOD_OFF), 1888 ALU_INST(SQ_OP2_INST_MUL), 1889 BANK_SWIZZLE(SQ_ALU_VEC_012), 1890 DST_GPR(0), 1891 DST_REL(ABSOLUTE), 1892 DST_ELEM(ELEM_Y), 1893 CLAMP(0)); 1894 1895 /* 34 srcX.x DOT4 - non-mask */ 1896 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1897 SRC0_REL(ABSOLUTE), 1898 SRC0_ELEM(ELEM_X), 1899 SRC0_NEG(0), 1900 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1901 SRC1_REL(ABSOLUTE), 1902 SRC1_ELEM(ELEM_X), 1903 SRC1_NEG(0), 1904 INDEX_MODE(SQ_INDEX_LOOP), 1905 PRED_SEL(SQ_PRED_SEL_OFF), 1906 LAST(0)); 1907 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1908 SRC0_ABS(0), 1909 SRC1_ABS(0), 1910 UPDATE_EXECUTE_MASK(0), 1911 UPDATE_PRED(0), 1912 WRITE_MASK(1), 1913 FOG_MERGE(0), 1914 OMOD(SQ_ALU_OMOD_OFF), 1915 ALU_INST(SQ_OP2_INST_DOT4), 1916 BANK_SWIZZLE(SQ_ALU_VEC_012), 1917 DST_GPR(2), 1918 DST_REL(ABSOLUTE), 1919 DST_ELEM(ELEM_X), 1920 CLAMP(0)); 1921 1922 /* 35 srcX.y DOT4 - non-mask */ 1923 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1924 SRC0_REL(ABSOLUTE), 1925 SRC0_ELEM(ELEM_Y), 1926 SRC0_NEG(0), 1927 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1928 SRC1_REL(ABSOLUTE), 1929 SRC1_ELEM(ELEM_Y), 1930 SRC1_NEG(0), 1931 INDEX_MODE(SQ_INDEX_LOOP), 1932 PRED_SEL(SQ_PRED_SEL_OFF), 1933 LAST(0)); 1934 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1935 SRC0_ABS(0), 1936 SRC1_ABS(0), 1937 UPDATE_EXECUTE_MASK(0), 1938 UPDATE_PRED(0), 1939 WRITE_MASK(0), 1940 FOG_MERGE(0), 1941 OMOD(SQ_ALU_OMOD_OFF), 1942 ALU_INST(SQ_OP2_INST_DOT4), 1943 BANK_SWIZZLE(SQ_ALU_VEC_012), 1944 DST_GPR(2), 1945 DST_REL(ABSOLUTE), 1946 DST_ELEM(ELEM_Y), 1947 CLAMP(0)); 1948 1949 /* 36 srcX.z DOT4 - non-mask */ 1950 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1951 SRC0_REL(ABSOLUTE), 1952 SRC0_ELEM(ELEM_Z), 1953 SRC0_NEG(0), 1954 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1955 SRC1_REL(ABSOLUTE), 1956 SRC1_ELEM(ELEM_Z), 1957 SRC1_NEG(0), 1958 INDEX_MODE(SQ_INDEX_LOOP), 1959 PRED_SEL(SQ_PRED_SEL_OFF), 1960 LAST(0)); 1961 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1962 SRC0_ABS(0), 1963 SRC1_ABS(0), 1964 UPDATE_EXECUTE_MASK(0), 1965 UPDATE_PRED(0), 1966 WRITE_MASK(0), 1967 FOG_MERGE(0), 1968 OMOD(SQ_ALU_OMOD_OFF), 1969 ALU_INST(SQ_OP2_INST_DOT4), 1970 BANK_SWIZZLE(SQ_ALU_VEC_012), 1971 DST_GPR(2), 1972 DST_REL(ABSOLUTE), 1973 DST_ELEM(ELEM_Z), 1974 CLAMP(0)); 1975 1976 /* 37 srcX.w DOT4 - non-mask */ 1977 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 1978 SRC0_REL(ABSOLUTE), 1979 SRC0_ELEM(ELEM_W), 1980 SRC0_NEG(0), 1981 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 1982 SRC1_REL(ABSOLUTE), 1983 SRC1_ELEM(ELEM_W), 1984 SRC1_NEG(0), 1985 INDEX_MODE(SQ_INDEX_LOOP), 1986 PRED_SEL(SQ_PRED_SEL_OFF), 1987 LAST(1)); 1988 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1989 SRC0_ABS(0), 1990 SRC1_ABS(0), 1991 UPDATE_EXECUTE_MASK(0), 1992 UPDATE_PRED(0), 1993 WRITE_MASK(0), 1994 FOG_MERGE(0), 1995 OMOD(SQ_ALU_OMOD_OFF), 1996 ALU_INST(SQ_OP2_INST_DOT4), 1997 BANK_SWIZZLE(SQ_ALU_VEC_012), 1998 DST_GPR(2), 1999 DST_REL(ABSOLUTE), 2000 DST_ELEM(ELEM_W), 2001 CLAMP(0)); 2002 2003 /* 38 srcY.x DOT4 - non-mask */ 2004 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2005 SRC0_REL(ABSOLUTE), 2006 SRC0_ELEM(ELEM_X), 2007 SRC0_NEG(0), 2008 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2009 SRC1_REL(ABSOLUTE), 2010 SRC1_ELEM(ELEM_X), 2011 SRC1_NEG(0), 2012 INDEX_MODE(SQ_INDEX_LOOP), 2013 PRED_SEL(SQ_PRED_SEL_OFF), 2014 LAST(0)); 2015 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2016 SRC0_ABS(0), 2017 SRC1_ABS(0), 2018 UPDATE_EXECUTE_MASK(0), 2019 UPDATE_PRED(0), 2020 WRITE_MASK(0), 2021 FOG_MERGE(0), 2022 OMOD(SQ_ALU_OMOD_OFF), 2023 ALU_INST(SQ_OP2_INST_DOT4), 2024 BANK_SWIZZLE(SQ_ALU_VEC_012), 2025 DST_GPR(2), 2026 DST_REL(ABSOLUTE), 2027 DST_ELEM(ELEM_X), 2028 CLAMP(0)); 2029 2030 /* 39 srcY.y DOT4 - non-mask */ 2031 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2032 SRC0_REL(ABSOLUTE), 2033 SRC0_ELEM(ELEM_Y), 2034 SRC0_NEG(0), 2035 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2036 SRC1_REL(ABSOLUTE), 2037 SRC1_ELEM(ELEM_Y), 2038 SRC1_NEG(0), 2039 INDEX_MODE(SQ_INDEX_LOOP), 2040 PRED_SEL(SQ_PRED_SEL_OFF), 2041 LAST(0)); 2042 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2043 SRC0_ABS(0), 2044 SRC1_ABS(0), 2045 UPDATE_EXECUTE_MASK(0), 2046 UPDATE_PRED(0), 2047 WRITE_MASK(1), 2048 FOG_MERGE(0), 2049 OMOD(SQ_ALU_OMOD_OFF), 2050 ALU_INST(SQ_OP2_INST_DOT4), 2051 BANK_SWIZZLE(SQ_ALU_VEC_012), 2052 DST_GPR(2), 2053 DST_REL(ABSOLUTE), 2054 DST_ELEM(ELEM_Y), 2055 CLAMP(0)); 2056 2057 /* 40 srcY.z DOT4 - non-mask */ 2058 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2059 SRC0_REL(ABSOLUTE), 2060 SRC0_ELEM(ELEM_Z), 2061 SRC0_NEG(0), 2062 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2063 SRC1_REL(ABSOLUTE), 2064 SRC1_ELEM(ELEM_Z), 2065 SRC1_NEG(0), 2066 INDEX_MODE(SQ_INDEX_LOOP), 2067 PRED_SEL(SQ_PRED_SEL_OFF), 2068 LAST(0)); 2069 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2070 SRC0_ABS(0), 2071 SRC1_ABS(0), 2072 UPDATE_EXECUTE_MASK(0), 2073 UPDATE_PRED(0), 2074 WRITE_MASK(0), 2075 FOG_MERGE(0), 2076 OMOD(SQ_ALU_OMOD_OFF), 2077 ALU_INST(SQ_OP2_INST_DOT4), 2078 BANK_SWIZZLE(SQ_ALU_VEC_012), 2079 DST_GPR(2), 2080 DST_REL(ABSOLUTE), 2081 DST_ELEM(ELEM_Z), 2082 CLAMP(0)); 2083 2084 /* 41 srcY.w DOT4 - non-mask */ 2085 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), 2086 SRC0_REL(ABSOLUTE), 2087 SRC0_ELEM(ELEM_W), 2088 SRC0_NEG(0), 2089 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2090 SRC1_REL(ABSOLUTE), 2091 SRC1_ELEM(ELEM_W), 2092 SRC1_NEG(0), 2093 INDEX_MODE(SQ_INDEX_LOOP), 2094 PRED_SEL(SQ_PRED_SEL_OFF), 2095 LAST(1)); 2096 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2097 SRC0_ABS(0), 2098 SRC1_ABS(0), 2099 UPDATE_EXECUTE_MASK(0), 2100 UPDATE_PRED(0), 2101 WRITE_MASK(0), 2102 FOG_MERGE(0), 2103 OMOD(SQ_ALU_OMOD_OFF), 2104 ALU_INST(SQ_OP2_INST_DOT4), 2105 BANK_SWIZZLE(SQ_ALU_VEC_012), 2106 DST_GPR(2), 2107 DST_REL(ABSOLUTE), 2108 DST_ELEM(ELEM_W), 2109 CLAMP(0)); 2110 2111 /* 42 srcX / w */ 2112 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2113 SRC0_REL(ABSOLUTE), 2114 SRC0_ELEM(ELEM_X), 2115 SRC0_NEG(0), 2116 SRC1_SEL(ALU_SRC_CFILE_BASE + 0), 2117 SRC1_REL(ABSOLUTE), 2118 SRC1_ELEM(ELEM_W), 2119 SRC1_NEG(0), 2120 INDEX_MODE(SQ_INDEX_AR_X), 2121 PRED_SEL(SQ_PRED_SEL_OFF), 2122 LAST(1)); 2123 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2124 SRC0_ABS(0), 2125 SRC1_ABS(0), 2126 UPDATE_EXECUTE_MASK(0), 2127 UPDATE_PRED(0), 2128 WRITE_MASK(1), 2129 FOG_MERGE(0), 2130 OMOD(SQ_ALU_OMOD_OFF), 2131 ALU_INST(SQ_OP2_INST_MUL), 2132 BANK_SWIZZLE(SQ_ALU_VEC_012), 2133 DST_GPR(0), 2134 DST_REL(ABSOLUTE), 2135 DST_ELEM(ELEM_X), 2136 CLAMP(0)); 2137 2138 /* 43 srcY / h */ 2139 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), 2140 SRC0_REL(ABSOLUTE), 2141 SRC0_ELEM(ELEM_Y), 2142 SRC0_NEG(0), 2143 SRC1_SEL(ALU_SRC_CFILE_BASE + 1), 2144 SRC1_REL(ABSOLUTE), 2145 SRC1_ELEM(ELEM_W), 2146 SRC1_NEG(0), 2147 INDEX_MODE(SQ_INDEX_AR_X), 2148 PRED_SEL(SQ_PRED_SEL_OFF), 2149 LAST(1)); 2150 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2151 SRC0_ABS(0), 2152 SRC1_ABS(0), 2153 UPDATE_EXECUTE_MASK(0), 2154 UPDATE_PRED(0), 2155 WRITE_MASK(1), 2156 FOG_MERGE(0), 2157 OMOD(SQ_ALU_OMOD_OFF), 2158 ALU_INST(SQ_OP2_INST_MUL), 2159 BANK_SWIZZLE(SQ_ALU_VEC_012), 2160 DST_GPR(0), 2161 DST_REL(ABSOLUTE), 2162 DST_ELEM(ELEM_Y), 2163 CLAMP(0)); 2164 2165 /* 44/45 - dst - mask */ 2166 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2167 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2168 FETCH_WHOLE_QUAD(0), 2169 BUFFER_ID(0), 2170 SRC_GPR(0), 2171 SRC_REL(ABSOLUTE), 2172 SRC_SEL_X(SQ_SEL_X), 2173 MEGA_FETCH_COUNT(24)); 2174 shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), 2175 DST_REL(0), 2176 DST_SEL_X(SQ_SEL_X), 2177 DST_SEL_Y(SQ_SEL_Y), 2178 DST_SEL_Z(SQ_SEL_0), 2179 DST_SEL_W(SQ_SEL_1), 2180 USE_CONST_FIELDS(0), 2181 DATA_FORMAT(FMT_32_32_FLOAT), 2182 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2183 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2184 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2185 shader[i++] = VTX_DWORD2(OFFSET(0), 2186#if X_BYTE_ORDER == X_BIG_ENDIAN 2187 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2188#else 2189 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2190#endif 2191 CONST_BUF_NO_STRIDE(0), 2192 MEGA_FETCH(1)); 2193 shader[i++] = VTX_DWORD_PAD; 2194 /* 46/47 - src */ 2195 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2196 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2197 FETCH_WHOLE_QUAD(0), 2198 BUFFER_ID(0), 2199 SRC_GPR(0), 2200 SRC_REL(ABSOLUTE), 2201 SRC_SEL_X(SQ_SEL_X), 2202 MEGA_FETCH_COUNT(8)); 2203 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2204 DST_REL(0), 2205 DST_SEL_X(SQ_SEL_X), 2206 DST_SEL_Y(SQ_SEL_Y), 2207 DST_SEL_Z(SQ_SEL_1), 2208 DST_SEL_W(SQ_SEL_0), 2209 USE_CONST_FIELDS(0), 2210 DATA_FORMAT(FMT_32_32_FLOAT), 2211 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2212 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2213 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2214 shader[i++] = VTX_DWORD2(OFFSET(8), 2215#if X_BYTE_ORDER == X_BIG_ENDIAN 2216 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2217#else 2218 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2219#endif 2220 CONST_BUF_NO_STRIDE(0), 2221 MEGA_FETCH(0)); 2222 shader[i++] = VTX_DWORD_PAD; 2223 /* 48/49 - mask */ 2224 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2225 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2226 FETCH_WHOLE_QUAD(0), 2227 BUFFER_ID(0), 2228 SRC_GPR(0), 2229 SRC_REL(ABSOLUTE), 2230 SRC_SEL_X(SQ_SEL_X), 2231 MEGA_FETCH_COUNT(8)); 2232 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2233 DST_REL(0), 2234 DST_SEL_X(SQ_SEL_X), 2235 DST_SEL_Y(SQ_SEL_Y), 2236 DST_SEL_Z(SQ_SEL_1), 2237 DST_SEL_W(SQ_SEL_0), 2238 USE_CONST_FIELDS(0), 2239 DATA_FORMAT(FMT_32_32_FLOAT), 2240 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2241 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2242 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2243 shader[i++] = VTX_DWORD2(OFFSET(16), 2244#if X_BYTE_ORDER == X_BIG_ENDIAN 2245 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2246#else 2247 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2248#endif 2249 CONST_BUF_NO_STRIDE(0), 2250 MEGA_FETCH(0)); 2251 shader[i++] = VTX_DWORD_PAD; 2252 2253 /* 50/51 - dst - non-mask */ 2254 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2255 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2256 FETCH_WHOLE_QUAD(0), 2257 BUFFER_ID(0), 2258 SRC_GPR(0), 2259 SRC_REL(ABSOLUTE), 2260 SRC_SEL_X(SQ_SEL_X), 2261 MEGA_FETCH_COUNT(16)); 2262 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2263 DST_REL(0), 2264 DST_SEL_X(SQ_SEL_X), 2265 DST_SEL_Y(SQ_SEL_Y), 2266 DST_SEL_Z(SQ_SEL_0), 2267 DST_SEL_W(SQ_SEL_1), 2268 USE_CONST_FIELDS(0), 2269 DATA_FORMAT(FMT_32_32_FLOAT), 2270 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2271 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2272 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2273 shader[i++] = VTX_DWORD2(OFFSET(0), 2274#if X_BYTE_ORDER == X_BIG_ENDIAN 2275 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2276#else 2277 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2278#endif 2279 CONST_BUF_NO_STRIDE(0), 2280 MEGA_FETCH(1)); 2281 shader[i++] = VTX_DWORD_PAD; 2282 /* 52/53 - src */ 2283 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2284 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2285 FETCH_WHOLE_QUAD(0), 2286 BUFFER_ID(0), 2287 SRC_GPR(0), 2288 SRC_REL(ABSOLUTE), 2289 SRC_SEL_X(SQ_SEL_X), 2290 MEGA_FETCH_COUNT(8)); 2291 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2292 DST_REL(0), 2293 DST_SEL_X(SQ_SEL_X), 2294 DST_SEL_Y(SQ_SEL_Y), 2295 DST_SEL_Z(SQ_SEL_1), 2296 DST_SEL_W(SQ_SEL_0), 2297 USE_CONST_FIELDS(0), 2298 DATA_FORMAT(FMT_32_32_FLOAT), 2299 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2300 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2301 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2302 shader[i++] = VTX_DWORD2(OFFSET(8), 2303#if X_BYTE_ORDER == X_BIG_ENDIAN 2304 ENDIAN_SWAP(SQ_ENDIAN_8IN32), 2305#else 2306 ENDIAN_SWAP(SQ_ENDIAN_NONE), 2307#endif 2308 CONST_BUF_NO_STRIDE(0), 2309 MEGA_FETCH(0)); 2310 shader[i++] = VTX_DWORD_PAD; 2311 2312 return i; 2313} 2314 2315/* comp ps --------------------------------------- */ 2316int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) 2317{ 2318 int i = 0; 2319 2320 /* 0 */ 2321 /* call fetch-mask if boolean1 == true */ 2322 shader[i++] = CF_DWORD0(ADDR(10)); 2323 shader[i++] = CF_DWORD1(POP_COUNT(0), 2324 CF_CONST(1), 2325 COND(SQ_CF_COND_BOOL), 2326 I_COUNT(0), 2327 CALL_COUNT(0), 2328 END_OF_PROGRAM(0), 2329 VALID_PIXEL_MODE(0), 2330 CF_INST(SQ_CF_INST_CALL), 2331 WHOLE_QUAD_MODE(0), 2332 BARRIER(0)); 2333 /* 1 */ 2334 /* call read-constant-mask if boolean1 == false */ 2335 shader[i++] = CF_DWORD0(ADDR(12)); 2336 shader[i++] = CF_DWORD1(POP_COUNT(0), 2337 CF_CONST(1), 2338 COND(SQ_CF_COND_NOT_BOOL), 2339 I_COUNT(0), 2340 CALL_COUNT(0), 2341 END_OF_PROGRAM(0), 2342 VALID_PIXEL_MODE(0), 2343 CF_INST(SQ_CF_INST_CALL), 2344 WHOLE_QUAD_MODE(0), 2345 BARRIER(0)); 2346 /* 2 */ 2347 /* call fetch-src if boolean0 == true */ 2348 shader[i++] = CF_DWORD0(ADDR(6)); 2349 shader[i++] = CF_DWORD1(POP_COUNT(0), 2350 CF_CONST(0), 2351 COND(SQ_CF_COND_BOOL), 2352 I_COUNT(0), 2353 CALL_COUNT(0), 2354 END_OF_PROGRAM(0), 2355 VALID_PIXEL_MODE(0), 2356 CF_INST(SQ_CF_INST_CALL), 2357 WHOLE_QUAD_MODE(0), 2358 BARRIER(0)); 2359 2360 /* 3 */ 2361 /* call read-constant-src if boolean0 == false */ 2362 shader[i++] = CF_DWORD0(ADDR(8)); 2363 shader[i++] = CF_DWORD1(POP_COUNT(0), 2364 CF_CONST(0), 2365 COND(SQ_CF_COND_NOT_BOOL), 2366 I_COUNT(0), 2367 CALL_COUNT(0), 2368 END_OF_PROGRAM(0), 2369 VALID_PIXEL_MODE(0), 2370 CF_INST(SQ_CF_INST_CALL), 2371 WHOLE_QUAD_MODE(0), 2372 BARRIER(0)); 2373 2374 /* 4 */ 2375 /* src IN mask (GPR0 := GPR1 .* GPR0) */ 2376 shader[i++] = CF_ALU_DWORD0(ADDR(14), 2377 KCACHE_BANK0(0), 2378 KCACHE_BANK1(0), 2379 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2380 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2381 KCACHE_ADDR0(0), 2382 KCACHE_ADDR1(0), 2383 I_COUNT(4), 2384 USES_WATERFALL(0), 2385 CF_INST(SQ_CF_INST_ALU), 2386 WHOLE_QUAD_MODE(0), 2387 BARRIER(1)); 2388 2389 /* 5 */ 2390 /* export pixel data */ 2391 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2392 TYPE(SQ_EXPORT_PIXEL), 2393 RW_GPR(0), 2394 RW_REL(ABSOLUTE), 2395 INDEX_GPR(0), 2396 ELEM_SIZE(1)); 2397 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2398 SRC_SEL_Y(SQ_SEL_Y), 2399 SRC_SEL_Z(SQ_SEL_Z), 2400 SRC_SEL_W(SQ_SEL_W), 2401 R6xx_ELEM_LOOP(0), 2402 BURST_COUNT(1), 2403 END_OF_PROGRAM(1), 2404 VALID_PIXEL_MODE(0), 2405 CF_INST(SQ_CF_INST_EXPORT_DONE), 2406 WHOLE_QUAD_MODE(0), 2407 BARRIER(1)); 2408 /* subroutine fetch src */ 2409 /* 6 */ 2410 /* fetch src into GPR0*/ 2411 shader[i++] = CF_DWORD0(ADDR(26)); 2412 shader[i++] = CF_DWORD1(POP_COUNT(0), 2413 CF_CONST(0), 2414 COND(SQ_CF_COND_ACTIVE), 2415 I_COUNT(1), 2416 CALL_COUNT(0), 2417 END_OF_PROGRAM(0), 2418 VALID_PIXEL_MODE(0), 2419 CF_INST(SQ_CF_INST_TEX), 2420 WHOLE_QUAD_MODE(0), 2421 BARRIER(1)); 2422 2423 /* 7 */ 2424 /* return */ 2425 shader[i++] = CF_DWORD0(ADDR(0)); 2426 shader[i++] = CF_DWORD1(POP_COUNT(0), 2427 CF_CONST(0), 2428 COND(SQ_CF_COND_ACTIVE), 2429 I_COUNT(0), 2430 CALL_COUNT(0), 2431 END_OF_PROGRAM(0), 2432 VALID_PIXEL_MODE(0), 2433 CF_INST(SQ_CF_INST_RETURN), 2434 WHOLE_QUAD_MODE(0), 2435 BARRIER(1)); 2436 2437 /* subroutine read-constant-src*/ 2438 /* 8 */ 2439 /* read constants into GPR0 */ 2440 shader[i++] = CF_ALU_DWORD0(ADDR(18), 2441 KCACHE_BANK0(0), 2442 KCACHE_BANK1(0), 2443 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2444 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2445 KCACHE_ADDR0(0), 2446 KCACHE_ADDR1(0), 2447 I_COUNT(4), 2448 USES_WATERFALL(0), 2449 CF_INST(SQ_CF_INST_ALU), 2450 WHOLE_QUAD_MODE(0), 2451 BARRIER(1)); 2452 /* 9 */ 2453 /* return */ 2454 shader[i++] = CF_DWORD0(ADDR(0)); 2455 shader[i++] = CF_DWORD1(POP_COUNT(0), 2456 CF_CONST(0), 2457 COND(SQ_CF_COND_ACTIVE), 2458 I_COUNT(0), 2459 CALL_COUNT(0), 2460 END_OF_PROGRAM(0), 2461 VALID_PIXEL_MODE(0), 2462 CF_INST(SQ_CF_INST_RETURN), 2463 WHOLE_QUAD_MODE(0), 2464 BARRIER(1)); 2465 2466 /* subroutine fetch mask */ 2467 /* 10 */ 2468 /* fetch mask into GPR1*/ 2469 shader[i++] = CF_DWORD0(ADDR(28)); 2470 shader[i++] = CF_DWORD1(POP_COUNT(0), 2471 CF_CONST(0), 2472 COND(SQ_CF_COND_ACTIVE), 2473 I_COUNT(1), 2474 CALL_COUNT(0), 2475 END_OF_PROGRAM(0), 2476 VALID_PIXEL_MODE(0), 2477 CF_INST(SQ_CF_INST_TEX), 2478 WHOLE_QUAD_MODE(0), 2479 BARRIER(1)); 2480 2481 /* 11 */ 2482 /* return */ 2483 shader[i++] = CF_DWORD0(ADDR(0)); 2484 shader[i++] = CF_DWORD1(POP_COUNT(0), 2485 CF_CONST(0), 2486 COND(SQ_CF_COND_ACTIVE), 2487 I_COUNT(0), 2488 CALL_COUNT(0), 2489 END_OF_PROGRAM(0), 2490 VALID_PIXEL_MODE(0), 2491 CF_INST(SQ_CF_INST_RETURN), 2492 WHOLE_QUAD_MODE(0), 2493 BARRIER(1)); 2494 2495 /* subroutine read-constant-mask*/ 2496 /* 12 */ 2497 /* read constants into GPR1 */ 2498 shader[i++] = CF_ALU_DWORD0(ADDR(22), 2499 KCACHE_BANK0(0), 2500 KCACHE_BANK1(0), 2501 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 2502 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 2503 KCACHE_ADDR0(0), 2504 KCACHE_ADDR1(0), 2505 I_COUNT(4), 2506 USES_WATERFALL(0), 2507 CF_INST(SQ_CF_INST_ALU), 2508 WHOLE_QUAD_MODE(0), 2509 BARRIER(1)); 2510 /* 13 */ 2511 /* return */ 2512 shader[i++] = CF_DWORD0(ADDR(0)); 2513 shader[i++] = CF_DWORD1(POP_COUNT(0), 2514 CF_CONST(0), 2515 COND(SQ_CF_COND_ACTIVE), 2516 I_COUNT(0), 2517 CALL_COUNT(0), 2518 END_OF_PROGRAM(0), 2519 VALID_PIXEL_MODE(0), 2520 CF_INST(SQ_CF_INST_RETURN), 2521 WHOLE_QUAD_MODE(0), 2522 BARRIER(1)); 2523 /* ALU clauses */ 2524 2525 /* 14 - alu 0 */ 2526 /* MUL gpr[0].x gpr[1].x gpr[0].x */ 2527 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 2528 SRC0_REL(ABSOLUTE), 2529 SRC0_ELEM(ELEM_X), 2530 SRC0_NEG(0), 2531 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2532 SRC1_REL(ABSOLUTE), 2533 SRC1_ELEM(ELEM_X), 2534 SRC1_NEG(0), 2535 INDEX_MODE(SQ_INDEX_LOOP), 2536 PRED_SEL(SQ_PRED_SEL_OFF), 2537 LAST(0)); 2538 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2539 SRC0_ABS(0), 2540 SRC1_ABS(0), 2541 UPDATE_EXECUTE_MASK(0), 2542 UPDATE_PRED(0), 2543 WRITE_MASK(1), 2544 FOG_MERGE(0), 2545 OMOD(SQ_ALU_OMOD_OFF), 2546 ALU_INST(SQ_OP2_INST_MUL), 2547 BANK_SWIZZLE(SQ_ALU_VEC_012), 2548 DST_GPR(0), 2549 DST_REL(ABSOLUTE), 2550 DST_ELEM(ELEM_X), 2551 CLAMP(1)); 2552 /* 15 - alu 1 */ 2553 /* MUL gpr[0].y gpr[1].y gpr[0].y */ 2554 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 2555 SRC0_REL(ABSOLUTE), 2556 SRC0_ELEM(ELEM_Y), 2557 SRC0_NEG(0), 2558 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2559 SRC1_REL(ABSOLUTE), 2560 SRC1_ELEM(ELEM_Y), 2561 SRC1_NEG(0), 2562 INDEX_MODE(SQ_INDEX_LOOP), 2563 PRED_SEL(SQ_PRED_SEL_OFF), 2564 LAST(0)); 2565 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2566 SRC0_ABS(0), 2567 SRC1_ABS(0), 2568 UPDATE_EXECUTE_MASK(0), 2569 UPDATE_PRED(0), 2570 WRITE_MASK(1), 2571 FOG_MERGE(0), 2572 OMOD(SQ_ALU_OMOD_OFF), 2573 ALU_INST(SQ_OP2_INST_MUL), 2574 BANK_SWIZZLE(SQ_ALU_VEC_012), 2575 DST_GPR(0), 2576 DST_REL(ABSOLUTE), 2577 DST_ELEM(ELEM_Y), 2578 CLAMP(1)); 2579 /* 16 - alu 2 */ 2580 /* MUL gpr[0].z gpr[1].z gpr[0].z */ 2581 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 2582 SRC0_REL(ABSOLUTE), 2583 SRC0_ELEM(ELEM_Z), 2584 SRC0_NEG(0), 2585 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2586 SRC1_REL(ABSOLUTE), 2587 SRC1_ELEM(ELEM_Z), 2588 SRC1_NEG(0), 2589 INDEX_MODE(SQ_INDEX_LOOP), 2590 PRED_SEL(SQ_PRED_SEL_OFF), 2591 LAST(0)); 2592 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2593 SRC0_ABS(0), 2594 SRC1_ABS(0), 2595 UPDATE_EXECUTE_MASK(0), 2596 UPDATE_PRED(0), 2597 WRITE_MASK(1), 2598 FOG_MERGE(0), 2599 OMOD(SQ_ALU_OMOD_OFF), 2600 ALU_INST(SQ_OP2_INST_MUL), 2601 BANK_SWIZZLE(SQ_ALU_VEC_012), 2602 DST_GPR(0), 2603 DST_REL(ABSOLUTE), 2604 DST_ELEM(ELEM_Z), 2605 CLAMP(1)); 2606 /* 17 - alu 3 */ 2607 /* MUL gpr[0].w gpr[1].w gpr[0].w */ 2608 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), 2609 SRC0_REL(ABSOLUTE), 2610 SRC0_ELEM(ELEM_W), 2611 SRC0_NEG(0), 2612 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2613 SRC1_REL(ABSOLUTE), 2614 SRC1_ELEM(ELEM_W), 2615 SRC1_NEG(0), 2616 INDEX_MODE(SQ_INDEX_LOOP), 2617 PRED_SEL(SQ_PRED_SEL_OFF), 2618 LAST(1)); 2619 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2620 SRC0_ABS(0), 2621 SRC1_ABS(0), 2622 UPDATE_EXECUTE_MASK(0), 2623 UPDATE_PRED(0), 2624 WRITE_MASK(1), 2625 FOG_MERGE(0), 2626 OMOD(SQ_ALU_OMOD_OFF), 2627 ALU_INST(SQ_OP2_INST_MUL), 2628 BANK_SWIZZLE(SQ_ALU_VEC_012), 2629 DST_GPR(0), 2630 DST_REL(ABSOLUTE), 2631 DST_ELEM(ELEM_W), 2632 CLAMP(1)); 2633 2634 /* 18 */ 2635 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 2636 SRC0_REL(ABSOLUTE), 2637 SRC0_ELEM(ELEM_X), 2638 SRC0_NEG(0), 2639 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2640 SRC1_REL(ABSOLUTE), 2641 SRC1_ELEM(ELEM_X), 2642 SRC1_NEG(0), 2643 INDEX_MODE(SQ_INDEX_AR_X), 2644 PRED_SEL(SQ_PRED_SEL_OFF), 2645 LAST(0)); 2646 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2647 SRC0_ABS(0), 2648 SRC1_ABS(0), 2649 UPDATE_EXECUTE_MASK(0), 2650 UPDATE_PRED(0), 2651 WRITE_MASK(1), 2652 FOG_MERGE(0), 2653 OMOD(SQ_ALU_OMOD_OFF), 2654 ALU_INST(SQ_OP2_INST_MOV), 2655 BANK_SWIZZLE(SQ_ALU_VEC_012), 2656 DST_GPR(0), 2657 DST_REL(ABSOLUTE), 2658 DST_ELEM(ELEM_X), 2659 CLAMP(1)); 2660 /* 19 */ 2661 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 2662 SRC0_REL(ABSOLUTE), 2663 SRC0_ELEM(ELEM_Y), 2664 SRC0_NEG(0), 2665 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2666 SRC1_REL(ABSOLUTE), 2667 SRC1_ELEM(ELEM_Y), 2668 SRC1_NEG(0), 2669 INDEX_MODE(SQ_INDEX_AR_X), 2670 PRED_SEL(SQ_PRED_SEL_OFF), 2671 LAST(0)); 2672 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2673 SRC0_ABS(0), 2674 SRC1_ABS(0), 2675 UPDATE_EXECUTE_MASK(0), 2676 UPDATE_PRED(0), 2677 WRITE_MASK(1), 2678 FOG_MERGE(0), 2679 OMOD(SQ_ALU_OMOD_OFF), 2680 ALU_INST(SQ_OP2_INST_MOV), 2681 BANK_SWIZZLE(SQ_ALU_VEC_012), 2682 DST_GPR(0), 2683 DST_REL(ABSOLUTE), 2684 DST_ELEM(ELEM_Y), 2685 CLAMP(1)); 2686 /* 20 */ 2687 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 2688 SRC0_REL(ABSOLUTE), 2689 SRC0_ELEM(ELEM_Z), 2690 SRC0_NEG(0), 2691 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2692 SRC1_REL(ABSOLUTE), 2693 SRC1_ELEM(ELEM_Z), 2694 SRC1_NEG(0), 2695 INDEX_MODE(SQ_INDEX_AR_X), 2696 PRED_SEL(SQ_PRED_SEL_OFF), 2697 LAST(0)); 2698 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2699 SRC0_ABS(0), 2700 SRC1_ABS(0), 2701 UPDATE_EXECUTE_MASK(0), 2702 UPDATE_PRED(0), 2703 WRITE_MASK(1), 2704 FOG_MERGE(0), 2705 OMOD(SQ_ALU_OMOD_OFF), 2706 ALU_INST(SQ_OP2_INST_MOV), 2707 BANK_SWIZZLE(SQ_ALU_VEC_012), 2708 DST_GPR(0), 2709 DST_REL(ABSOLUTE), 2710 DST_ELEM(ELEM_Z), 2711 CLAMP(1)); 2712 /* 21 */ 2713 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), 2714 SRC0_REL(ABSOLUTE), 2715 SRC0_ELEM(ELEM_W), 2716 SRC0_NEG(0), 2717 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2718 SRC1_REL(ABSOLUTE), 2719 SRC1_ELEM(ELEM_W), 2720 SRC1_NEG(0), 2721 INDEX_MODE(SQ_INDEX_AR_X), 2722 PRED_SEL(SQ_PRED_SEL_OFF), 2723 LAST(1)); 2724 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2725 SRC0_ABS(0), 2726 SRC1_ABS(0), 2727 UPDATE_EXECUTE_MASK(0), 2728 UPDATE_PRED(0), 2729 WRITE_MASK(1), 2730 FOG_MERGE(0), 2731 OMOD(SQ_ALU_OMOD_OFF), 2732 ALU_INST(SQ_OP2_INST_MOV), 2733 BANK_SWIZZLE(SQ_ALU_VEC_012), 2734 DST_GPR(0), 2735 DST_REL(ABSOLUTE), 2736 DST_ELEM(ELEM_W), 2737 CLAMP(1)); 2738 2739 /* 22 */ 2740 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 2741 SRC0_REL(ABSOLUTE), 2742 SRC0_ELEM(ELEM_X), 2743 SRC0_NEG(0), 2744 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2745 SRC1_REL(ABSOLUTE), 2746 SRC1_ELEM(ELEM_X), 2747 SRC1_NEG(0), 2748 INDEX_MODE(SQ_INDEX_AR_X), 2749 PRED_SEL(SQ_PRED_SEL_OFF), 2750 LAST(0)); 2751 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2752 SRC0_ABS(0), 2753 SRC1_ABS(0), 2754 UPDATE_EXECUTE_MASK(0), 2755 UPDATE_PRED(0), 2756 WRITE_MASK(1), 2757 FOG_MERGE(0), 2758 OMOD(SQ_ALU_OMOD_OFF), 2759 ALU_INST(SQ_OP2_INST_MOV), 2760 BANK_SWIZZLE(SQ_ALU_VEC_012), 2761 DST_GPR(1), 2762 DST_REL(ABSOLUTE), 2763 DST_ELEM(ELEM_X), 2764 CLAMP(1)); 2765 /* 23 */ 2766 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 2767 SRC0_REL(ABSOLUTE), 2768 SRC0_ELEM(ELEM_Y), 2769 SRC0_NEG(0), 2770 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2771 SRC1_REL(ABSOLUTE), 2772 SRC1_ELEM(ELEM_Y), 2773 SRC1_NEG(0), 2774 INDEX_MODE(SQ_INDEX_AR_X), 2775 PRED_SEL(SQ_PRED_SEL_OFF), 2776 LAST(0)); 2777 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2778 SRC0_ABS(0), 2779 SRC1_ABS(0), 2780 UPDATE_EXECUTE_MASK(0), 2781 UPDATE_PRED(0), 2782 WRITE_MASK(1), 2783 FOG_MERGE(0), 2784 OMOD(SQ_ALU_OMOD_OFF), 2785 ALU_INST(SQ_OP2_INST_MOV), 2786 BANK_SWIZZLE(SQ_ALU_VEC_012), 2787 DST_GPR(1), 2788 DST_REL(ABSOLUTE), 2789 DST_ELEM(ELEM_Y), 2790 CLAMP(1)); 2791 /* 24 */ 2792 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 2793 SRC0_REL(ABSOLUTE), 2794 SRC0_ELEM(ELEM_Z), 2795 SRC0_NEG(0), 2796 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2797 SRC1_REL(ABSOLUTE), 2798 SRC1_ELEM(ELEM_Z), 2799 SRC1_NEG(0), 2800 INDEX_MODE(SQ_INDEX_AR_X), 2801 PRED_SEL(SQ_PRED_SEL_OFF), 2802 LAST(0)); 2803 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2804 SRC0_ABS(0), 2805 SRC1_ABS(0), 2806 UPDATE_EXECUTE_MASK(0), 2807 UPDATE_PRED(0), 2808 WRITE_MASK(1), 2809 FOG_MERGE(0), 2810 OMOD(SQ_ALU_OMOD_OFF), 2811 ALU_INST(SQ_OP2_INST_MOV), 2812 BANK_SWIZZLE(SQ_ALU_VEC_012), 2813 DST_GPR(1), 2814 DST_REL(ABSOLUTE), 2815 DST_ELEM(ELEM_Z), 2816 CLAMP(1)); 2817 /* 25 */ 2818 shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), 2819 SRC0_REL(ABSOLUTE), 2820 SRC0_ELEM(ELEM_W), 2821 SRC0_NEG(0), 2822 SRC1_SEL(ALU_SRC_GPR_BASE + 0), 2823 SRC1_REL(ABSOLUTE), 2824 SRC1_ELEM(ELEM_W), 2825 SRC1_NEG(0), 2826 INDEX_MODE(SQ_INDEX_AR_X), 2827 PRED_SEL(SQ_PRED_SEL_OFF), 2828 LAST(1)); 2829 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2830 SRC0_ABS(0), 2831 SRC1_ABS(0), 2832 UPDATE_EXECUTE_MASK(0), 2833 UPDATE_PRED(0), 2834 WRITE_MASK(1), 2835 FOG_MERGE(0), 2836 OMOD(SQ_ALU_OMOD_OFF), 2837 ALU_INST(SQ_OP2_INST_MOV), 2838 BANK_SWIZZLE(SQ_ALU_VEC_012), 2839 DST_GPR(1), 2840 DST_REL(ABSOLUTE), 2841 DST_ELEM(ELEM_W), 2842 CLAMP(1)); 2843 2844 /* 26/27 - src */ 2845 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 2846 BC_FRAC_MODE(0), 2847 FETCH_WHOLE_QUAD(0), 2848 RESOURCE_ID(0), 2849 SRC_GPR(0), 2850 SRC_REL(ABSOLUTE), 2851 R7xx_ALT_CONST(0)); 2852 shader[i++] = TEX_DWORD1(DST_GPR(0), 2853 DST_REL(ABSOLUTE), 2854 DST_SEL_X(SQ_SEL_X), 2855 DST_SEL_Y(SQ_SEL_Y), 2856 DST_SEL_Z(SQ_SEL_Z), 2857 DST_SEL_W(SQ_SEL_W), 2858 LOD_BIAS(0), 2859 COORD_TYPE_X(TEX_NORMALIZED), 2860 COORD_TYPE_Y(TEX_NORMALIZED), 2861 COORD_TYPE_Z(TEX_NORMALIZED), 2862 COORD_TYPE_W(TEX_NORMALIZED)); 2863 shader[i++] = TEX_DWORD2(OFFSET_X(0), 2864 OFFSET_Y(0), 2865 OFFSET_Z(0), 2866 SAMPLER_ID(0), 2867 SRC_SEL_X(SQ_SEL_X), 2868 SRC_SEL_Y(SQ_SEL_Y), 2869 SRC_SEL_Z(SQ_SEL_0), 2870 SRC_SEL_W(SQ_SEL_1)); 2871 shader[i++] = TEX_DWORD_PAD; 2872 /* 28/29 - mask */ 2873 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 2874 BC_FRAC_MODE(0), 2875 FETCH_WHOLE_QUAD(0), 2876 RESOURCE_ID(1), 2877 SRC_GPR(1), 2878 SRC_REL(ABSOLUTE), 2879 R7xx_ALT_CONST(0)); 2880 shader[i++] = TEX_DWORD1(DST_GPR(1), 2881 DST_REL(ABSOLUTE), 2882 DST_SEL_X(SQ_SEL_X), 2883 DST_SEL_Y(SQ_SEL_Y), 2884 DST_SEL_Z(SQ_SEL_Z), 2885 DST_SEL_W(SQ_SEL_W), 2886 LOD_BIAS(0), 2887 COORD_TYPE_X(TEX_NORMALIZED), 2888 COORD_TYPE_Y(TEX_NORMALIZED), 2889 COORD_TYPE_Z(TEX_NORMALIZED), 2890 COORD_TYPE_W(TEX_NORMALIZED)); 2891 shader[i++] = TEX_DWORD2(OFFSET_X(0), 2892 OFFSET_Y(0), 2893 OFFSET_Z(0), 2894 SAMPLER_ID(1), 2895 SRC_SEL_X(SQ_SEL_X), 2896 SRC_SEL_Y(SQ_SEL_Y), 2897 SRC_SEL_Z(SQ_SEL_0), 2898 SRC_SEL_W(SQ_SEL_1)); 2899 shader[i++] = TEX_DWORD_PAD; 2900 2901 return i; 2902} 2903