r600_shader.c revision 2f39173d
1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#include "xf86.h" 32 33#include "radeon.h" 34#include "r600_shader.h" 35#include "r600_reg.h" 36 37/* solid vs --------------------------------------- */ 38int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) 39{ 40 int i = 0; 41 42 /* 0 */ 43 shader[i++] = CF_DWORD0(ADDR(4)); 44 shader[i++] = CF_DWORD1(POP_COUNT(0), 45 CF_CONST(0), 46 COND(SQ_CF_COND_ACTIVE), 47 I_COUNT(1), 48 CALL_COUNT(0), 49 END_OF_PROGRAM(0), 50 VALID_PIXEL_MODE(0), 51 CF_INST(SQ_CF_INST_VTX), 52 WHOLE_QUAD_MODE(0), 53 BARRIER(1)); 54 /* 1 */ 55 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 56 TYPE(SQ_EXPORT_POS), 57 RW_GPR(1), 58 RW_REL(ABSOLUTE), 59 INDEX_GPR(0), 60 ELEM_SIZE(0)); 61 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 62 SRC_SEL_Y(SQ_SEL_Y), 63 SRC_SEL_Z(SQ_SEL_Z), 64 SRC_SEL_W(SQ_SEL_W), 65 R6xx_ELEM_LOOP(0), 66 BURST_COUNT(1), 67 END_OF_PROGRAM(0), 68 VALID_PIXEL_MODE(0), 69 CF_INST(SQ_CF_INST_EXPORT_DONE), 70 WHOLE_QUAD_MODE(0), 71 BARRIER(1)); 72 /* 2 - always export a param whether it's used or not */ 73 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 74 TYPE(SQ_EXPORT_PARAM), 75 RW_GPR(0), 76 RW_REL(ABSOLUTE), 77 INDEX_GPR(0), 78 ELEM_SIZE(0)); 79 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 80 SRC_SEL_Y(SQ_SEL_Y), 81 SRC_SEL_Z(SQ_SEL_Z), 82 SRC_SEL_W(SQ_SEL_W), 83 R6xx_ELEM_LOOP(0), 84 BURST_COUNT(0), 85 END_OF_PROGRAM(1), 86 VALID_PIXEL_MODE(0), 87 CF_INST(SQ_CF_INST_EXPORT_DONE), 88 WHOLE_QUAD_MODE(0), 89 BARRIER(0)); 90 /* 3 - padding */ 91 shader[i++] = 0x00000000; 92 shader[i++] = 0x00000000; 93 /* 4/5 */ 94 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 95 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 96 FETCH_WHOLE_QUAD(0), 97 BUFFER_ID(0), 98 SRC_GPR(0), 99 SRC_REL(ABSOLUTE), 100 SRC_SEL_X(SQ_SEL_X), 101 MEGA_FETCH_COUNT(8)); 102 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 103 DST_REL(0), 104 DST_SEL_X(SQ_SEL_X), 105 DST_SEL_Y(SQ_SEL_Y), 106 DST_SEL_Z(SQ_SEL_0), 107 DST_SEL_W(SQ_SEL_1), 108 USE_CONST_FIELDS(0), 109 DATA_FORMAT(FMT_32_32_FLOAT), 110 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 111 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 112 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 113 shader[i++] = VTX_DWORD2(OFFSET(0), 114 ENDIAN_SWAP(ENDIAN_NONE), 115 CONST_BUF_NO_STRIDE(0), 116 MEGA_FETCH(1)); 117 shader[i++] = VTX_DWORD_PAD; 118 119 return i; 120} 121 122/* solid ps --------------------------------------- */ 123int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) 124{ 125 int i = 0; 126 127 /* 0 */ 128 shader[i++] = CF_ALU_DWORD0(ADDR(2), 129 KCACHE_BANK0(0), 130 KCACHE_BANK1(0), 131 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 132 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 133 KCACHE_ADDR0(0), 134 KCACHE_ADDR1(0), 135 I_COUNT(4), 136 USES_WATERFALL(0), 137 CF_INST(SQ_CF_INST_ALU), 138 WHOLE_QUAD_MODE(0), 139 BARRIER(1)); 140 /* 1 */ 141 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 142 TYPE(SQ_EXPORT_PIXEL), 143 RW_GPR(0), 144 RW_REL(ABSOLUTE), 145 INDEX_GPR(0), 146 ELEM_SIZE(1)); 147 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 148 SRC_SEL_Y(SQ_SEL_Y), 149 SRC_SEL_Z(SQ_SEL_Z), 150 SRC_SEL_W(SQ_SEL_W), 151 R6xx_ELEM_LOOP(0), 152 BURST_COUNT(1), 153 END_OF_PROGRAM(1), 154 VALID_PIXEL_MODE(0), 155 CF_INST(SQ_CF_INST_EXPORT_DONE), 156 WHOLE_QUAD_MODE(0), 157 BARRIER(1)); 158 159 /* 2 */ 160 shader[i++] = ALU_DWORD0(SRC0_SEL(256), 161 SRC0_REL(ABSOLUTE), 162 SRC0_ELEM(ELEM_X), 163 SRC0_NEG(0), 164 SRC1_SEL(0), 165 SRC1_REL(ABSOLUTE), 166 SRC1_ELEM(ELEM_X), 167 SRC1_NEG(0), 168 INDEX_MODE(SQ_INDEX_AR_X), 169 PRED_SEL(SQ_PRED_SEL_OFF), 170 LAST(0)); 171 shader[i++] = ALU_DWORD1_OP2(ChipSet, 172 SRC0_ABS(0), 173 SRC1_ABS(0), 174 UPDATE_EXECUTE_MASK(0), 175 UPDATE_PRED(0), 176 WRITE_MASK(1), 177 FOG_MERGE(0), 178 OMOD(SQ_ALU_OMOD_OFF), 179 ALU_INST(SQ_OP2_INST_MOV), 180 BANK_SWIZZLE(SQ_ALU_VEC_012), 181 DST_GPR(0), 182 DST_REL(ABSOLUTE), 183 DST_ELEM(ELEM_X), 184 CLAMP(1)); 185 /* 3 */ 186 shader[i++] = ALU_DWORD0(SRC0_SEL(256), 187 SRC0_REL(ABSOLUTE), 188 SRC0_ELEM(ELEM_Y), 189 SRC0_NEG(0), 190 SRC1_SEL(0), 191 SRC1_REL(ABSOLUTE), 192 SRC1_ELEM(ELEM_Y), 193 SRC1_NEG(0), 194 INDEX_MODE(SQ_INDEX_AR_X), 195 PRED_SEL(SQ_PRED_SEL_OFF), 196 LAST(0)); 197 shader[i++] = ALU_DWORD1_OP2(ChipSet, 198 SRC0_ABS(0), 199 SRC1_ABS(0), 200 UPDATE_EXECUTE_MASK(0), 201 UPDATE_PRED(0), 202 WRITE_MASK(1), 203 FOG_MERGE(0), 204 OMOD(SQ_ALU_OMOD_OFF), 205 ALU_INST(SQ_OP2_INST_MOV), 206 BANK_SWIZZLE(SQ_ALU_VEC_012), 207 DST_GPR(0), 208 DST_REL(ABSOLUTE), 209 DST_ELEM(ELEM_Y), 210 CLAMP(1)); 211 /* 4 */ 212 shader[i++] = ALU_DWORD0(SRC0_SEL(256), 213 SRC0_REL(ABSOLUTE), 214 SRC0_ELEM(ELEM_Z), 215 SRC0_NEG(0), 216 SRC1_SEL(0), 217 SRC1_REL(ABSOLUTE), 218 SRC1_ELEM(ELEM_Z), 219 SRC1_NEG(0), 220 INDEX_MODE(SQ_INDEX_AR_X), 221 PRED_SEL(SQ_PRED_SEL_OFF), 222 LAST(0)); 223 shader[i++] = ALU_DWORD1_OP2(ChipSet, 224 SRC0_ABS(0), 225 SRC1_ABS(0), 226 UPDATE_EXECUTE_MASK(0), 227 UPDATE_PRED(0), 228 WRITE_MASK(1), 229 FOG_MERGE(0), 230 OMOD(SQ_ALU_OMOD_OFF), 231 ALU_INST(SQ_OP2_INST_MOV), 232 BANK_SWIZZLE(SQ_ALU_VEC_012), 233 DST_GPR(0), 234 DST_REL(ABSOLUTE), 235 DST_ELEM(ELEM_Z), 236 CLAMP(1)); 237 /* 5 */ 238 shader[i++] = ALU_DWORD0(SRC0_SEL(256), 239 SRC0_REL(ABSOLUTE), 240 SRC0_ELEM(ELEM_W), 241 SRC0_NEG(0), 242 SRC1_SEL(0), 243 SRC1_REL(ABSOLUTE), 244 SRC1_ELEM(ELEM_W), 245 SRC1_NEG(0), 246 INDEX_MODE(SQ_INDEX_AR_X), 247 PRED_SEL(SQ_PRED_SEL_OFF), 248 LAST(1)); 249 shader[i++] = ALU_DWORD1_OP2(ChipSet, 250 SRC0_ABS(0), 251 SRC1_ABS(0), 252 UPDATE_EXECUTE_MASK(0), 253 UPDATE_PRED(0), 254 WRITE_MASK(1), 255 FOG_MERGE(0), 256 OMOD(SQ_ALU_OMOD_OFF), 257 ALU_INST(SQ_OP2_INST_MOV), 258 BANK_SWIZZLE(SQ_ALU_VEC_012), 259 DST_GPR(0), 260 DST_REL(ABSOLUTE), 261 DST_ELEM(ELEM_W), 262 CLAMP(1)); 263 264 return i; 265} 266 267/* copy vs --------------------------------------- */ 268int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) 269{ 270 int i = 0; 271 272 /* 0 */ 273 shader[i++] = CF_DWORD0(ADDR(4)); 274 shader[i++] = CF_DWORD1(POP_COUNT(0), 275 CF_CONST(0), 276 COND(SQ_CF_COND_ACTIVE), 277 I_COUNT(2), 278 CALL_COUNT(0), 279 END_OF_PROGRAM(0), 280 VALID_PIXEL_MODE(0), 281 CF_INST(SQ_CF_INST_VTX), 282 WHOLE_QUAD_MODE(0), 283 BARRIER(1)); 284 /* 1 */ 285 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 286 TYPE(SQ_EXPORT_POS), 287 RW_GPR(1), 288 RW_REL(ABSOLUTE), 289 INDEX_GPR(0), 290 ELEM_SIZE(0)); 291 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 292 SRC_SEL_Y(SQ_SEL_Y), 293 SRC_SEL_Z(SQ_SEL_Z), 294 SRC_SEL_W(SQ_SEL_W), 295 R6xx_ELEM_LOOP(0), 296 BURST_COUNT(0), 297 END_OF_PROGRAM(0), 298 VALID_PIXEL_MODE(0), 299 CF_INST(SQ_CF_INST_EXPORT_DONE), 300 WHOLE_QUAD_MODE(0), 301 BARRIER(1)); 302 /* 2 */ 303 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 304 TYPE(SQ_EXPORT_PARAM), 305 RW_GPR(0), 306 RW_REL(ABSOLUTE), 307 INDEX_GPR(0), 308 ELEM_SIZE(0)); 309 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 310 SRC_SEL_Y(SQ_SEL_Y), 311 SRC_SEL_Z(SQ_SEL_Z), 312 SRC_SEL_W(SQ_SEL_W), 313 R6xx_ELEM_LOOP(0), 314 BURST_COUNT(0), 315 END_OF_PROGRAM(1), 316 VALID_PIXEL_MODE(0), 317 CF_INST(SQ_CF_INST_EXPORT_DONE), 318 WHOLE_QUAD_MODE(0), 319 BARRIER(0)); 320 /* 3 */ 321 shader[i++] = 0x00000000; 322 shader[i++] = 0x00000000; 323 /* 4/5 */ 324 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 325 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 326 FETCH_WHOLE_QUAD(0), 327 BUFFER_ID(0), 328 SRC_GPR(0), 329 SRC_REL(ABSOLUTE), 330 SRC_SEL_X(SQ_SEL_X), 331 MEGA_FETCH_COUNT(16)); 332 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 333 DST_REL(0), 334 DST_SEL_X(SQ_SEL_X), 335 DST_SEL_Y(SQ_SEL_Y), 336 DST_SEL_Z(SQ_SEL_0), 337 DST_SEL_W(SQ_SEL_1), 338 USE_CONST_FIELDS(0), 339 DATA_FORMAT(FMT_32_32_FLOAT), 340 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 341 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 342 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 343 shader[i++] = VTX_DWORD2(OFFSET(0), 344 ENDIAN_SWAP(ENDIAN_NONE), 345 CONST_BUF_NO_STRIDE(0), 346 MEGA_FETCH(1)); 347 shader[i++] = VTX_DWORD_PAD; 348 /* 6/7 */ 349 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 350 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 351 FETCH_WHOLE_QUAD(0), 352 BUFFER_ID(0), 353 SRC_GPR(0), 354 SRC_REL(ABSOLUTE), 355 SRC_SEL_X(SQ_SEL_X), 356 MEGA_FETCH_COUNT(8)); 357 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 358 DST_REL(0), 359 DST_SEL_X(SQ_SEL_X), 360 DST_SEL_Y(SQ_SEL_Y), 361 DST_SEL_Z(SQ_SEL_0), 362 DST_SEL_W(SQ_SEL_1), 363 USE_CONST_FIELDS(0), 364 DATA_FORMAT(FMT_32_32_FLOAT), 365 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 366 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 367 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 368 shader[i++] = VTX_DWORD2(OFFSET(8), 369 ENDIAN_SWAP(ENDIAN_NONE), 370 CONST_BUF_NO_STRIDE(0), 371 MEGA_FETCH(0)); 372 shader[i++] = VTX_DWORD_PAD; 373 374 return i; 375} 376 377/* copy ps --------------------------------------- */ 378int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) 379{ 380 int i=0; 381 382 /* CF INST 0 */ 383 shader[i++] = CF_DWORD0(ADDR(2)); 384 shader[i++] = CF_DWORD1(POP_COUNT(0), 385 CF_CONST(0), 386 COND(SQ_CF_COND_ACTIVE), 387 I_COUNT(1), 388 CALL_COUNT(0), 389 END_OF_PROGRAM(0), 390 VALID_PIXEL_MODE(0), 391 CF_INST(SQ_CF_INST_TEX), 392 WHOLE_QUAD_MODE(0), 393 BARRIER(1)); 394 /* CF INST 1 */ 395 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 396 TYPE(SQ_EXPORT_PIXEL), 397 RW_GPR(0), 398 RW_REL(ABSOLUTE), 399 INDEX_GPR(0), 400 ELEM_SIZE(1)); 401 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 402 SRC_SEL_Y(SQ_SEL_Y), 403 SRC_SEL_Z(SQ_SEL_Z), 404 SRC_SEL_W(SQ_SEL_W), 405 R6xx_ELEM_LOOP(0), 406 BURST_COUNT(1), 407 END_OF_PROGRAM(1), 408 VALID_PIXEL_MODE(0), 409 CF_INST(SQ_CF_INST_EXPORT_DONE), 410 WHOLE_QUAD_MODE(0), 411 BARRIER(1)); 412 /* TEX INST 0 */ 413 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 414 BC_FRAC_MODE(0), 415 FETCH_WHOLE_QUAD(0), 416 RESOURCE_ID(0), 417 SRC_GPR(0), 418 SRC_REL(ABSOLUTE), 419 R7xx_ALT_CONST(0)); 420 shader[i++] = TEX_DWORD1(DST_GPR(0), 421 DST_REL(ABSOLUTE), 422 DST_SEL_X(SQ_SEL_X), /* R */ 423 DST_SEL_Y(SQ_SEL_Y), /* G */ 424 DST_SEL_Z(SQ_SEL_Z), /* B */ 425 DST_SEL_W(SQ_SEL_W), /* A */ 426 LOD_BIAS(0), 427 COORD_TYPE_X(TEX_UNNORMALIZED), 428 COORD_TYPE_Y(TEX_UNNORMALIZED), 429 COORD_TYPE_Z(TEX_UNNORMALIZED), 430 COORD_TYPE_W(TEX_UNNORMALIZED)); 431 shader[i++] = TEX_DWORD2(OFFSET_X(0), 432 OFFSET_Y(0), 433 OFFSET_Z(0), 434 SAMPLER_ID(0), 435 SRC_SEL_X(SQ_SEL_X), 436 SRC_SEL_Y(SQ_SEL_Y), 437 SRC_SEL_Z(SQ_SEL_0), 438 SRC_SEL_W(SQ_SEL_1)); 439 shader[i++] = TEX_DWORD_PAD; 440 441 return i; 442} 443 444/* 445 * ; xv vertex shader 446 * 00 VTX: ADDR(4) CNT(2) 447 * 0 VFETCH R1.xy01, R0.x, fc0 MEGA(16) FORMAT(32_32_FLOAT) 448 * FORMAT_COMP(SIGNED) 449 * 1 VFETCH R0.xy01, R0.x, fc0 MINI(8) OFFSET(8) FORMAT(32_32_FLOAT) 450 * FORMAT_COMP(SIGNED) 451 * 01 EXP_DONE: POS0, R1 452 * 02 EXP_DONE: PARAM0, R0 NO_BARRIER 453 * END_OF_PROGRAM 454 */ 455int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) 456{ 457 int i = 0; 458 459 /* 0 */ 460 shader[i++] = CF_DWORD0(ADDR(6)); 461 shader[i++] = CF_DWORD1(POP_COUNT(0), 462 CF_CONST(0), 463 COND(SQ_CF_COND_ACTIVE), 464 I_COUNT(2), 465 CALL_COUNT(0), 466 END_OF_PROGRAM(0), 467 VALID_PIXEL_MODE(0), 468 CF_INST(SQ_CF_INST_VTX), 469 WHOLE_QUAD_MODE(0), 470 BARRIER(1)); 471 472 /* 1 - ALU */ 473 shader[i++] = CF_ALU_DWORD0(ADDR(4), 474 KCACHE_BANK0(0), 475 KCACHE_BANK1(0), 476 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 477 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 478 KCACHE_ADDR0(0), 479 KCACHE_ADDR1(0), 480 I_COUNT(2), 481 USES_WATERFALL(0), 482 CF_INST(SQ_CF_INST_ALU), 483 WHOLE_QUAD_MODE(0), 484 BARRIER(1)); 485 486 /* 2 */ 487 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 488 TYPE(SQ_EXPORT_POS), 489 RW_GPR(1), 490 RW_REL(ABSOLUTE), 491 INDEX_GPR(0), 492 ELEM_SIZE(3)); 493 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 494 SRC_SEL_Y(SQ_SEL_Y), 495 SRC_SEL_Z(SQ_SEL_Z), 496 SRC_SEL_W(SQ_SEL_W), 497 R6xx_ELEM_LOOP(0), 498 BURST_COUNT(1), 499 END_OF_PROGRAM(0), 500 VALID_PIXEL_MODE(0), 501 CF_INST(SQ_CF_INST_EXPORT_DONE), 502 WHOLE_QUAD_MODE(0), 503 BARRIER(1)); 504 /* 3 */ 505 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 506 TYPE(SQ_EXPORT_PARAM), 507 RW_GPR(0), 508 RW_REL(ABSOLUTE), 509 INDEX_GPR(0), 510 ELEM_SIZE(3)); 511 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 512 SRC_SEL_Y(SQ_SEL_Y), 513 SRC_SEL_Z(SQ_SEL_Z), 514 SRC_SEL_W(SQ_SEL_W), 515 R6xx_ELEM_LOOP(0), 516 BURST_COUNT(1), 517 END_OF_PROGRAM(1), 518 VALID_PIXEL_MODE(0), 519 CF_INST(SQ_CF_INST_EXPORT_DONE), 520 WHOLE_QUAD_MODE(0), 521 BARRIER(0)); 522 523 524 /* 4 texX / w */ 525 shader[i++] = ALU_DWORD0(SRC0_SEL(0), 526 SRC0_REL(ABSOLUTE), 527 SRC0_ELEM(ELEM_X), 528 SRC0_NEG(0), 529 SRC1_SEL(256), 530 SRC1_REL(ABSOLUTE), 531 SRC1_ELEM(ELEM_X), 532 SRC1_NEG(0), 533 INDEX_MODE(SQ_INDEX_AR_X), 534 PRED_SEL(SQ_PRED_SEL_OFF), 535 LAST(0)); 536 shader[i++] = ALU_DWORD1_OP2(ChipSet, 537 SRC0_ABS(0), 538 SRC1_ABS(0), 539 UPDATE_EXECUTE_MASK(0), 540 UPDATE_PRED(0), 541 WRITE_MASK(1), 542 FOG_MERGE(0), 543 OMOD(SQ_ALU_OMOD_OFF), 544 ALU_INST(SQ_OP2_INST_MUL), 545 BANK_SWIZZLE(SQ_ALU_VEC_012), 546 DST_GPR(0), 547 DST_REL(ABSOLUTE), 548 DST_ELEM(ELEM_X), 549 CLAMP(0)); 550 551 /* 5 texY / h */ 552 shader[i++] = ALU_DWORD0(SRC0_SEL(0), 553 SRC0_REL(ABSOLUTE), 554 SRC0_ELEM(ELEM_Y), 555 SRC0_NEG(0), 556 SRC1_SEL(256), 557 SRC1_REL(ABSOLUTE), 558 SRC1_ELEM(ELEM_Y), 559 SRC1_NEG(0), 560 INDEX_MODE(SQ_INDEX_AR_X), 561 PRED_SEL(SQ_PRED_SEL_OFF), 562 LAST(1)); 563 shader[i++] = ALU_DWORD1_OP2(ChipSet, 564 SRC0_ABS(0), 565 SRC1_ABS(0), 566 UPDATE_EXECUTE_MASK(0), 567 UPDATE_PRED(0), 568 WRITE_MASK(1), 569 FOG_MERGE(0), 570 OMOD(SQ_ALU_OMOD_OFF), 571 ALU_INST(SQ_OP2_INST_MUL), 572 BANK_SWIZZLE(SQ_ALU_VEC_012), 573 DST_GPR(0), 574 DST_REL(ABSOLUTE), 575 DST_ELEM(ELEM_Y), 576 CLAMP(0)); 577 578 /* 6/7 */ 579 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 580 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 581 FETCH_WHOLE_QUAD(0), 582 BUFFER_ID(0), 583 SRC_GPR(0), 584 SRC_REL(ABSOLUTE), 585 SRC_SEL_X(SQ_SEL_X), 586 MEGA_FETCH_COUNT(16)); 587 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 588 DST_REL(ABSOLUTE), 589 DST_SEL_X(SQ_SEL_X), 590 DST_SEL_Y(SQ_SEL_Y), 591 DST_SEL_Z(SQ_SEL_0), 592 DST_SEL_W(SQ_SEL_1), 593 USE_CONST_FIELDS(0), 594 DATA_FORMAT(FMT_32_32_FLOAT), 595 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 596 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 597 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 598 shader[i++] = VTX_DWORD2(OFFSET(0), 599 ENDIAN_SWAP(ENDIAN_NONE), 600 CONST_BUF_NO_STRIDE(0), 601 MEGA_FETCH(1)); 602 shader[i++] = VTX_DWORD_PAD; 603 /* 8/9 */ 604 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 605 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 606 FETCH_WHOLE_QUAD(0), 607 BUFFER_ID(0), 608 SRC_GPR(0), 609 SRC_REL(ABSOLUTE), 610 SRC_SEL_X(SQ_SEL_X), 611 MEGA_FETCH_COUNT(8)); 612 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 613 DST_REL(ABSOLUTE), 614 DST_SEL_X(SQ_SEL_X), 615 DST_SEL_Y(SQ_SEL_Y), 616 DST_SEL_Z(SQ_SEL_0), 617 DST_SEL_W(SQ_SEL_1), 618 USE_CONST_FIELDS(0), 619 DATA_FORMAT(FMT_32_32_FLOAT), 620 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 621 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 622 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 623 shader[i++] = VTX_DWORD2(OFFSET(8), 624 ENDIAN_SWAP(ENDIAN_NONE), 625 CONST_BUF_NO_STRIDE(0), 626 MEGA_FETCH(0)); 627 shader[i++] = VTX_DWORD_PAD; 628 629 return i; 630} 631 632int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) 633{ 634 int i = 0; 635 636 /* 0 */ 637 shader[i++] = CF_DWORD0(ADDR(16)); 638 shader[i++] = CF_DWORD1(POP_COUNT(0), 639 CF_CONST(0), 640 COND(SQ_CF_COND_BOOL), 641 I_COUNT(0), 642 CALL_COUNT(0), 643 END_OF_PROGRAM(0), 644 VALID_PIXEL_MODE(0), 645 CF_INST(SQ_CF_INST_CALL), 646 WHOLE_QUAD_MODE(0), 647 BARRIER(0)); 648 /* 1 */ 649 shader[i++] = CF_DWORD0(ADDR(24)); 650 shader[i++] = CF_DWORD1(POP_COUNT(0), 651 CF_CONST(0), 652 COND(SQ_CF_COND_NOT_BOOL), 653 I_COUNT(0), 654 CALL_COUNT(0), 655 END_OF_PROGRAM(0), 656 VALID_PIXEL_MODE(0), 657 CF_INST(SQ_CF_INST_CALL), 658 WHOLE_QUAD_MODE(0), 659 BARRIER(0)); 660 /* 2 */ 661 shader[i++] = CF_ALU_DWORD0(ADDR(4), 662 KCACHE_BANK0(0), 663 KCACHE_BANK1(0), 664 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 665 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 666 KCACHE_ADDR0(0), 667 KCACHE_ADDR1(0), 668 I_COUNT(12), 669 USES_WATERFALL(0), 670 CF_INST(SQ_CF_INST_ALU), 671 WHOLE_QUAD_MODE(0), 672 BARRIER(1)); 673 /* 3 */ 674 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 675 TYPE(SQ_EXPORT_PIXEL), 676 RW_GPR(2), 677 RW_REL(ABSOLUTE), 678 INDEX_GPR(0), 679 ELEM_SIZE(3)); 680 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 681 SRC_SEL_Y(SQ_SEL_Y), 682 SRC_SEL_Z(SQ_SEL_Z), 683 SRC_SEL_W(SQ_SEL_W), 684 R6xx_ELEM_LOOP(0), 685 BURST_COUNT(1), 686 END_OF_PROGRAM(1), 687 VALID_PIXEL_MODE(0), 688 CF_INST(SQ_CF_INST_EXPORT_DONE), 689 WHOLE_QUAD_MODE(0), 690 BARRIER(1)); 691 /* 4,5,6,7 */ 692 /* r2.x = MAD(c0.w, r1.x, c0.x) */ 693 shader[i++] = ALU_DWORD0(SRC0_SEL(256), 694 SRC0_REL(ABSOLUTE), 695 SRC0_ELEM(ELEM_W), 696 SRC0_NEG(0), 697 SRC1_SEL(1), 698 SRC1_REL(ABSOLUTE), 699 SRC1_ELEM(ELEM_X), 700 SRC1_NEG(0), 701 INDEX_MODE(SQ_INDEX_LOOP), 702 PRED_SEL(SQ_PRED_SEL_OFF), 703 LAST(0)); 704 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), 705 SRC2_REL(ABSOLUTE), 706 SRC2_ELEM(ELEM_X), 707 SRC2_NEG(0), 708 ALU_INST(SQ_OP3_INST_MULADD), 709 BANK_SWIZZLE(SQ_ALU_VEC_012), 710 DST_GPR(2), 711 DST_REL(ABSOLUTE), 712 DST_ELEM(ELEM_X), 713 CLAMP(0)); 714 /* r2.y = MAD(c0.w, r1.x, c0.y) */ 715 shader[i++] = ALU_DWORD0(SRC0_SEL(256), 716 SRC0_REL(ABSOLUTE), 717 SRC0_ELEM(ELEM_W), 718 SRC0_NEG(0), 719 SRC1_SEL(1), 720 SRC1_REL(ABSOLUTE), 721 SRC1_ELEM(ELEM_X), 722 SRC1_NEG(0), 723 INDEX_MODE(SQ_INDEX_LOOP), 724 PRED_SEL(SQ_PRED_SEL_OFF), 725 LAST(0)); 726 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), 727 SRC2_REL(ABSOLUTE), 728 SRC2_ELEM(ELEM_Y), 729 SRC2_NEG(0), 730 ALU_INST(SQ_OP3_INST_MULADD), 731 BANK_SWIZZLE(SQ_ALU_VEC_012), 732 DST_GPR(2), 733 DST_REL(ABSOLUTE), 734 DST_ELEM(ELEM_Y), 735 CLAMP(0)); 736 /* r2.z = MAD(c0.w, r1.x, c0.z) */ 737 shader[i++] = ALU_DWORD0(SRC0_SEL(256), 738 SRC0_REL(ABSOLUTE), 739 SRC0_ELEM(ELEM_W), 740 SRC0_NEG(0), 741 SRC1_SEL(1), 742 SRC1_REL(ABSOLUTE), 743 SRC1_ELEM(ELEM_X), 744 SRC1_NEG(0), 745 INDEX_MODE(SQ_INDEX_LOOP), 746 PRED_SEL(SQ_PRED_SEL_OFF), 747 LAST(0)); 748 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), 749 SRC2_REL(ABSOLUTE), 750 SRC2_ELEM(ELEM_Z), 751 SRC2_NEG(0), 752 ALU_INST(SQ_OP3_INST_MULADD), 753 BANK_SWIZZLE(SQ_ALU_VEC_012), 754 DST_GPR(2), 755 DST_REL(ABSOLUTE), 756 DST_ELEM(ELEM_Z), 757 CLAMP(0)); 758 /* r2.w = MAD(0, 0, 1) */ 759 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 760 SRC0_REL(ABSOLUTE), 761 SRC0_ELEM(ELEM_X), 762 SRC0_NEG(0), 763 SRC1_SEL(SQ_ALU_SRC_0), 764 SRC1_REL(ABSOLUTE), 765 SRC1_ELEM(ELEM_X), 766 SRC1_NEG(0), 767 INDEX_MODE(SQ_INDEX_LOOP), 768 PRED_SEL(SQ_PRED_SEL_OFF), 769 LAST(1)); 770 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 771 SRC2_REL(ABSOLUTE), 772 SRC2_ELEM(ELEM_X), 773 SRC2_NEG(0), 774 ALU_INST(SQ_OP3_INST_MULADD), 775 BANK_SWIZZLE(SQ_ALU_VEC_012), 776 DST_GPR(2), 777 DST_REL(ABSOLUTE), 778 DST_ELEM(ELEM_W), 779 CLAMP(0)); 780 781 /* 8,9,10,11 */ 782 /* r2.x = MAD(c1.x, r1.y, pv.x) */ 783 shader[i++] = ALU_DWORD0(SRC0_SEL(257), 784 SRC0_REL(ABSOLUTE), 785 SRC0_ELEM(ELEM_X), 786 SRC0_NEG(0), 787 SRC1_SEL(1), 788 SRC1_REL(ABSOLUTE), 789 SRC1_ELEM(ELEM_Y), 790 SRC1_NEG(0), 791 INDEX_MODE(SQ_INDEX_LOOP), 792 PRED_SEL(SQ_PRED_SEL_OFF), 793 LAST(0)); 794 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 795 SRC2_REL(ABSOLUTE), 796 SRC2_ELEM(ELEM_X), 797 SRC2_NEG(0), 798 ALU_INST(SQ_OP3_INST_MULADD), 799 BANK_SWIZZLE(SQ_ALU_VEC_012), 800 DST_GPR(2), 801 DST_REL(ABSOLUTE), 802 DST_ELEM(ELEM_X), 803 CLAMP(0)); 804 /* r2.y = MAD(c1.y, r1.y, pv.y) */ 805 shader[i++] = ALU_DWORD0(SRC0_SEL(257), 806 SRC0_REL(ABSOLUTE), 807 SRC0_ELEM(ELEM_Y), 808 SRC0_NEG(0), 809 SRC1_SEL(1), 810 SRC1_REL(ABSOLUTE), 811 SRC1_ELEM(ELEM_Y), 812 SRC1_NEG(0), 813 INDEX_MODE(SQ_INDEX_LOOP), 814 PRED_SEL(SQ_PRED_SEL_OFF), 815 LAST(0)); 816 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 817 SRC2_REL(ABSOLUTE), 818 SRC2_ELEM(ELEM_Y), 819 SRC2_NEG(0), 820 ALU_INST(SQ_OP3_INST_MULADD), 821 BANK_SWIZZLE(SQ_ALU_VEC_012), 822 DST_GPR(2), 823 DST_REL(ABSOLUTE), 824 DST_ELEM(ELEM_Y), 825 CLAMP(0)); 826 /* r2.z = MAD(c1.z, r1.y, pv.z) */ 827 shader[i++] = ALU_DWORD0(SRC0_SEL(257), 828 SRC0_REL(ABSOLUTE), 829 SRC0_ELEM(ELEM_Z), 830 SRC0_NEG(0), 831 SRC1_SEL(1), 832 SRC1_REL(ABSOLUTE), 833 SRC1_ELEM(ELEM_Y), 834 SRC1_NEG(0), 835 INDEX_MODE(SQ_INDEX_LOOP), 836 PRED_SEL(SQ_PRED_SEL_OFF), 837 LAST(0)); 838 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 839 SRC2_REL(ABSOLUTE), 840 SRC2_ELEM(ELEM_Z), 841 SRC2_NEG(0), 842 ALU_INST(SQ_OP3_INST_MULADD), 843 BANK_SWIZZLE(SQ_ALU_VEC_012), 844 DST_GPR(2), 845 DST_REL(ABSOLUTE), 846 DST_ELEM(ELEM_Z), 847 CLAMP(0)); 848 /* r2.w = MAD(0, 0, 1) */ 849 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 850 SRC0_REL(ABSOLUTE), 851 SRC0_ELEM(ELEM_X), 852 SRC0_NEG(0), 853 SRC1_SEL(SQ_ALU_SRC_0), 854 SRC1_REL(ABSOLUTE), 855 SRC1_ELEM(ELEM_X), 856 SRC1_NEG(0), 857 INDEX_MODE(SQ_INDEX_LOOP), 858 PRED_SEL(SQ_PRED_SEL_OFF), 859 LAST(1)); 860 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 861 SRC2_REL(ABSOLUTE), 862 SRC2_ELEM(ELEM_W), 863 SRC2_NEG(0), 864 ALU_INST(SQ_OP3_INST_MULADD), 865 BANK_SWIZZLE(SQ_ALU_VEC_012), 866 DST_GPR(2), 867 DST_REL(ABSOLUTE), 868 DST_ELEM(ELEM_W), 869 CLAMP(0)); 870 /* 12,13,14,15 */ 871 /* r2.x = MAD(c2.x, r1.z, pv.x) */ 872 shader[i++] = ALU_DWORD0(SRC0_SEL(258), 873 SRC0_REL(ABSOLUTE), 874 SRC0_ELEM(ELEM_X), 875 SRC0_NEG(0), 876 SRC1_SEL(1), 877 SRC1_REL(ABSOLUTE), 878 SRC1_ELEM(ELEM_Z), 879 SRC1_NEG(0), 880 INDEX_MODE(SQ_INDEX_LOOP), 881 PRED_SEL(SQ_PRED_SEL_OFF), 882 LAST(0)); 883 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 884 SRC2_REL(ABSOLUTE), 885 SRC2_ELEM(ELEM_X), 886 SRC2_NEG(0), 887 ALU_INST(SQ_OP3_INST_MULADD), 888 BANK_SWIZZLE(SQ_ALU_VEC_012), 889 DST_GPR(2), 890 DST_REL(ABSOLUTE), 891 DST_ELEM(ELEM_X), 892 CLAMP(1)); 893 /* r2.y = MAD(c2.y, r1.z, pv.y) */ 894 shader[i++] = ALU_DWORD0(SRC0_SEL(258), 895 SRC0_REL(ABSOLUTE), 896 SRC0_ELEM(ELEM_Y), 897 SRC0_NEG(0), 898 SRC1_SEL(1), 899 SRC1_REL(ABSOLUTE), 900 SRC1_ELEM(ELEM_Z), 901 SRC1_NEG(0), 902 INDEX_MODE(SQ_INDEX_LOOP), 903 PRED_SEL(SQ_PRED_SEL_OFF), 904 LAST(0)); 905 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 906 SRC2_REL(ABSOLUTE), 907 SRC2_ELEM(ELEM_Y), 908 SRC2_NEG(0), 909 ALU_INST(SQ_OP3_INST_MULADD), 910 BANK_SWIZZLE(SQ_ALU_VEC_012), 911 DST_GPR(2), 912 DST_REL(ABSOLUTE), 913 DST_ELEM(ELEM_Y), 914 CLAMP(1)); 915 /* r2.z = MAD(c2.z, r1.z, pv.z) */ 916 shader[i++] = ALU_DWORD0(SRC0_SEL(258), 917 SRC0_REL(ABSOLUTE), 918 SRC0_ELEM(ELEM_Z), 919 SRC0_NEG(0), 920 SRC1_SEL(1), 921 SRC1_REL(ABSOLUTE), 922 SRC1_ELEM(ELEM_Z), 923 SRC1_NEG(0), 924 INDEX_MODE(SQ_INDEX_LOOP), 925 PRED_SEL(SQ_PRED_SEL_OFF), 926 LAST(0)); 927 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), 928 SRC2_REL(ABSOLUTE), 929 SRC2_ELEM(ELEM_Z), 930 SRC2_NEG(0), 931 ALU_INST(SQ_OP3_INST_MULADD), 932 BANK_SWIZZLE(SQ_ALU_VEC_012), 933 DST_GPR(2), 934 DST_REL(ABSOLUTE), 935 DST_ELEM(ELEM_Z), 936 CLAMP(1)); 937 /* r2.w = MAD(0, 0, 1) */ 938 shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), 939 SRC0_REL(ABSOLUTE), 940 SRC0_ELEM(ELEM_X), 941 SRC0_NEG(0), 942 SRC1_SEL(SQ_ALU_SRC_0), 943 SRC1_REL(ABSOLUTE), 944 SRC1_ELEM(ELEM_X), 945 SRC1_NEG(0), 946 INDEX_MODE(SQ_INDEX_LOOP), 947 PRED_SEL(SQ_PRED_SEL_OFF), 948 LAST(1)); 949 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), 950 SRC2_REL(ABSOLUTE), 951 SRC2_ELEM(ELEM_X), 952 SRC2_NEG(0), 953 ALU_INST(SQ_OP3_INST_MULADD), 954 BANK_SWIZZLE(SQ_ALU_VEC_012), 955 DST_GPR(2), 956 DST_REL(ABSOLUTE), 957 DST_ELEM(ELEM_W), 958 CLAMP(1)); 959 960 /* 16 */ 961 shader[i++] = CF_DWORD0(ADDR(18)); 962 shader[i++] = CF_DWORD1(POP_COUNT(0), 963 CF_CONST(0), 964 COND(SQ_CF_COND_ACTIVE), 965 I_COUNT(3), 966 CALL_COUNT(0), 967 END_OF_PROGRAM(0), 968 VALID_PIXEL_MODE(0), 969 CF_INST(SQ_CF_INST_TEX), 970 WHOLE_QUAD_MODE(0), 971 BARRIER(1)); 972 /* 17 */ 973 shader[i++] = CF_DWORD0(ADDR(0)); 974 shader[i++] = CF_DWORD1(POP_COUNT(0), 975 CF_CONST(0), 976 COND(SQ_CF_COND_ACTIVE), 977 I_COUNT(0), 978 CALL_COUNT(0), 979 END_OF_PROGRAM(0), 980 VALID_PIXEL_MODE(0), 981 CF_INST(SQ_CF_INST_RETURN), 982 WHOLE_QUAD_MODE(0), 983 BARRIER(1)); 984 /* 18/19 */ 985 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 986 BC_FRAC_MODE(0), 987 FETCH_WHOLE_QUAD(0), 988 RESOURCE_ID(0), 989 SRC_GPR(0), 990 SRC_REL(ABSOLUTE), 991 R7xx_ALT_CONST(0)); 992 shader[i++] = TEX_DWORD1(DST_GPR(1), 993 DST_REL(ABSOLUTE), 994 DST_SEL_X(SQ_SEL_X), 995 DST_SEL_Y(SQ_SEL_MASK), 996 DST_SEL_Z(SQ_SEL_MASK), 997 DST_SEL_W(SQ_SEL_1), 998 LOD_BIAS(0), 999 COORD_TYPE_X(TEX_NORMALIZED), 1000 COORD_TYPE_Y(TEX_NORMALIZED), 1001 COORD_TYPE_Z(TEX_NORMALIZED), 1002 COORD_TYPE_W(TEX_NORMALIZED)); 1003 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1004 OFFSET_Y(0), 1005 OFFSET_Z(0), 1006 SAMPLER_ID(0), 1007 SRC_SEL_X(SQ_SEL_X), 1008 SRC_SEL_Y(SQ_SEL_Y), 1009 SRC_SEL_Z(SQ_SEL_0), 1010 SRC_SEL_W(SQ_SEL_1)); 1011 shader[i++] = TEX_DWORD_PAD; 1012 /* 20/21 */ 1013 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1014 BC_FRAC_MODE(0), 1015 FETCH_WHOLE_QUAD(0), 1016 RESOURCE_ID(1), 1017 SRC_GPR(0), 1018 SRC_REL(ABSOLUTE), 1019 R7xx_ALT_CONST(0)); 1020 shader[i++] = TEX_DWORD1(DST_GPR(1), 1021 DST_REL(ABSOLUTE), 1022 DST_SEL_X(SQ_SEL_MASK), 1023 DST_SEL_Y(SQ_SEL_MASK), 1024 DST_SEL_Z(SQ_SEL_X), 1025 DST_SEL_W(SQ_SEL_MASK), 1026 LOD_BIAS(0), 1027 COORD_TYPE_X(TEX_NORMALIZED), 1028 COORD_TYPE_Y(TEX_NORMALIZED), 1029 COORD_TYPE_Z(TEX_NORMALIZED), 1030 COORD_TYPE_W(TEX_NORMALIZED)); 1031 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1032 OFFSET_Y(0), 1033 OFFSET_Z(0), 1034 SAMPLER_ID(1), 1035 SRC_SEL_X(SQ_SEL_X), 1036 SRC_SEL_Y(SQ_SEL_Y), 1037 SRC_SEL_Z(SQ_SEL_0), 1038 SRC_SEL_W(SQ_SEL_1)); 1039 shader[i++] = TEX_DWORD_PAD; 1040 /* 22/23 */ 1041 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1042 BC_FRAC_MODE(0), 1043 FETCH_WHOLE_QUAD(0), 1044 RESOURCE_ID(2), 1045 SRC_GPR(0), 1046 SRC_REL(ABSOLUTE), 1047 R7xx_ALT_CONST(0)); 1048 shader[i++] = TEX_DWORD1(DST_GPR(1), 1049 DST_REL(ABSOLUTE), 1050 DST_SEL_X(SQ_SEL_MASK), 1051 DST_SEL_Y(SQ_SEL_X), 1052 DST_SEL_Z(SQ_SEL_MASK), 1053 DST_SEL_W(SQ_SEL_MASK), 1054 LOD_BIAS(0), 1055 COORD_TYPE_X(TEX_NORMALIZED), 1056 COORD_TYPE_Y(TEX_NORMALIZED), 1057 COORD_TYPE_Z(TEX_NORMALIZED), 1058 COORD_TYPE_W(TEX_NORMALIZED)); 1059 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1060 OFFSET_Y(0), 1061 OFFSET_Z(0), 1062 SAMPLER_ID(2), 1063 SRC_SEL_X(SQ_SEL_X), 1064 SRC_SEL_Y(SQ_SEL_Y), 1065 SRC_SEL_Z(SQ_SEL_0), 1066 SRC_SEL_W(SQ_SEL_1)); 1067 shader[i++] = TEX_DWORD_PAD; 1068 /* 24 */ 1069 shader[i++] = CF_DWORD0(ADDR(26)); 1070 shader[i++] = CF_DWORD1(POP_COUNT(0), 1071 CF_CONST(0), 1072 COND(SQ_CF_COND_ACTIVE), 1073 I_COUNT(2), 1074 CALL_COUNT(0), 1075 END_OF_PROGRAM(0), 1076 VALID_PIXEL_MODE(0), 1077 CF_INST(SQ_CF_INST_TEX), 1078 WHOLE_QUAD_MODE(0), 1079 BARRIER(1)); 1080 /* 25 */ 1081 shader[i++] = CF_DWORD0(ADDR(0)); 1082 shader[i++] = CF_DWORD1(POP_COUNT(0), 1083 CF_CONST(0), 1084 COND(SQ_CF_COND_ACTIVE), 1085 I_COUNT(0), 1086 CALL_COUNT(0), 1087 END_OF_PROGRAM(0), 1088 VALID_PIXEL_MODE(0), 1089 CF_INST(SQ_CF_INST_RETURN), 1090 WHOLE_QUAD_MODE(0), 1091 BARRIER(1)); 1092 /* 26/27 */ 1093 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1094 BC_FRAC_MODE(0), 1095 FETCH_WHOLE_QUAD(0), 1096 RESOURCE_ID(0), 1097 SRC_GPR(0), 1098 SRC_REL(ABSOLUTE), 1099 R7xx_ALT_CONST(0)); 1100 shader[i++] = TEX_DWORD1(DST_GPR(1), 1101 DST_REL(ABSOLUTE), 1102 DST_SEL_X(SQ_SEL_X), 1103 DST_SEL_Y(SQ_SEL_MASK), 1104 DST_SEL_Z(SQ_SEL_MASK), 1105 DST_SEL_W(SQ_SEL_1), 1106 LOD_BIAS(0), 1107 COORD_TYPE_X(TEX_NORMALIZED), 1108 COORD_TYPE_Y(TEX_NORMALIZED), 1109 COORD_TYPE_Z(TEX_NORMALIZED), 1110 COORD_TYPE_W(TEX_NORMALIZED)); 1111 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1112 OFFSET_Y(0), 1113 OFFSET_Z(0), 1114 SAMPLER_ID(0), 1115 SRC_SEL_X(SQ_SEL_X), 1116 SRC_SEL_Y(SQ_SEL_Y), 1117 SRC_SEL_Z(SQ_SEL_0), 1118 SRC_SEL_W(SQ_SEL_1)); 1119 shader[i++] = TEX_DWORD_PAD; 1120 /* 28/29 */ 1121 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1122 BC_FRAC_MODE(0), 1123 FETCH_WHOLE_QUAD(0), 1124 RESOURCE_ID(1), 1125 SRC_GPR(0), 1126 SRC_REL(ABSOLUTE), 1127 R7xx_ALT_CONST(0)); 1128 shader[i++] = TEX_DWORD1(DST_GPR(1), 1129 DST_REL(ABSOLUTE), 1130 DST_SEL_X(SQ_SEL_MASK), 1131 DST_SEL_Y(SQ_SEL_X), 1132 DST_SEL_Z(SQ_SEL_Y), 1133 DST_SEL_W(SQ_SEL_MASK), 1134 LOD_BIAS(0), 1135 COORD_TYPE_X(TEX_NORMALIZED), 1136 COORD_TYPE_Y(TEX_NORMALIZED), 1137 COORD_TYPE_Z(TEX_NORMALIZED), 1138 COORD_TYPE_W(TEX_NORMALIZED)); 1139 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1140 OFFSET_Y(0), 1141 OFFSET_Z(0), 1142 SAMPLER_ID(1), 1143 SRC_SEL_X(SQ_SEL_X), 1144 SRC_SEL_Y(SQ_SEL_Y), 1145 SRC_SEL_Z(SQ_SEL_0), 1146 SRC_SEL_W(SQ_SEL_1)); 1147 shader[i++] = TEX_DWORD_PAD; 1148 1149 return i; 1150} 1151 1152/* comp mask ps --------------------------------------- */ 1153int R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* shader) 1154{ 1155 int i = 0; 1156 1157 /* 0 */ 1158 shader[i++] = CF_DWORD0(ADDR(8)); 1159 shader[i++] = CF_DWORD1(POP_COUNT(0), 1160 CF_CONST(0), 1161 COND(SQ_CF_COND_ACTIVE), 1162 I_COUNT(2), 1163 CALL_COUNT(0), 1164 END_OF_PROGRAM(0), 1165 VALID_PIXEL_MODE(0), 1166 CF_INST(SQ_CF_INST_TEX), 1167 WHOLE_QUAD_MODE(0), 1168 BARRIER(1)); 1169 1170 /* 1 */ 1171 shader[i++] = CF_ALU_DWORD0(ADDR(3), 1172 KCACHE_BANK0(0), 1173 KCACHE_BANK1(0), 1174 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 1175 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1176 KCACHE_ADDR0(0), 1177 KCACHE_ADDR1(0), 1178 I_COUNT(4), 1179 USES_WATERFALL(0), 1180 CF_INST(SQ_CF_INST_ALU), 1181 WHOLE_QUAD_MODE(0), 1182 BARRIER(1)); 1183 1184 /* 2 */ 1185 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 1186 TYPE(SQ_EXPORT_PIXEL), 1187 RW_GPR(2), 1188 RW_REL(ABSOLUTE), 1189 INDEX_GPR(0), 1190 ELEM_SIZE(1)); 1191 1192 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1193 SRC_SEL_Y(SQ_SEL_Y), 1194 SRC_SEL_Z(SQ_SEL_Z), 1195 SRC_SEL_W(SQ_SEL_W), 1196 R6xx_ELEM_LOOP(0), 1197 BURST_COUNT(1), 1198 END_OF_PROGRAM(1), 1199 VALID_PIXEL_MODE(0), 1200 CF_INST(SQ_CF_INST_EXPORT_DONE), 1201 WHOLE_QUAD_MODE(0), 1202 BARRIER(1)); 1203 1204 /* 3 - alu 0 */ 1205 /* MUL gpr[2].x gpr[1].x gpr[0].x */ 1206 shader[i++] = ALU_DWORD0(SRC0_SEL(1), 1207 SRC0_REL(ABSOLUTE), 1208 SRC0_ELEM(ELEM_X), 1209 SRC0_NEG(0), 1210 SRC1_SEL(0), 1211 SRC1_REL(ABSOLUTE), 1212 SRC1_ELEM(ELEM_X), 1213 SRC1_NEG(0), 1214 INDEX_MODE(SQ_INDEX_LOOP), 1215 PRED_SEL(SQ_PRED_SEL_OFF), 1216 LAST(0)); 1217 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1218 SRC0_ABS(0), 1219 SRC1_ABS(0), 1220 UPDATE_EXECUTE_MASK(0), 1221 UPDATE_PRED(0), 1222 WRITE_MASK(1), 1223 FOG_MERGE(0), 1224 OMOD(SQ_ALU_OMOD_OFF), 1225 ALU_INST(SQ_OP2_INST_MUL), 1226 BANK_SWIZZLE(SQ_ALU_VEC_012), 1227 DST_GPR(2), 1228 DST_REL(ABSOLUTE), 1229 DST_ELEM(ELEM_X), 1230 CLAMP(1)); 1231 /* 4 - alu 1 */ 1232 /* MUL gpr[2].y gpr[1].y gpr[0].y */ 1233 shader[i++] = ALU_DWORD0(SRC0_SEL(1), 1234 SRC0_REL(ABSOLUTE), 1235 SRC0_ELEM(ELEM_Y), 1236 SRC0_NEG(0), 1237 SRC1_SEL(0), 1238 SRC1_REL(ABSOLUTE), 1239 SRC1_ELEM(ELEM_Y), 1240 SRC1_NEG(0), 1241 INDEX_MODE(SQ_INDEX_LOOP), 1242 PRED_SEL(SQ_PRED_SEL_OFF), 1243 LAST(0)); 1244 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1245 SRC0_ABS(0), 1246 SRC1_ABS(0), 1247 UPDATE_EXECUTE_MASK(0), 1248 UPDATE_PRED(0), 1249 WRITE_MASK(1), 1250 FOG_MERGE(0), 1251 OMOD(SQ_ALU_OMOD_OFF), 1252 ALU_INST(SQ_OP2_INST_MUL), 1253 BANK_SWIZZLE(SQ_ALU_VEC_012), 1254 DST_GPR(2), 1255 DST_REL(ABSOLUTE), 1256 DST_ELEM(ELEM_Y), 1257 CLAMP(1)); 1258 /* 5 - alu 2 */ 1259 /* MUL gpr[2].z gpr[1].z gpr[0].z */ 1260 shader[i++] = ALU_DWORD0(SRC0_SEL(1), 1261 SRC0_REL(ABSOLUTE), 1262 SRC0_ELEM(ELEM_Z), 1263 SRC0_NEG(0), 1264 SRC1_SEL(0), 1265 SRC1_REL(ABSOLUTE), 1266 SRC1_ELEM(ELEM_Z), 1267 SRC1_NEG(0), 1268 INDEX_MODE(SQ_INDEX_LOOP), 1269 PRED_SEL(SQ_PRED_SEL_OFF), 1270 LAST(0)); 1271 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1272 SRC0_ABS(0), 1273 SRC1_ABS(0), 1274 UPDATE_EXECUTE_MASK(0), 1275 UPDATE_PRED(0), 1276 WRITE_MASK(1), 1277 FOG_MERGE(0), 1278 OMOD(SQ_ALU_OMOD_OFF), 1279 ALU_INST(SQ_OP2_INST_MUL), 1280 BANK_SWIZZLE(SQ_ALU_VEC_012), 1281 DST_GPR(2), 1282 DST_REL(ABSOLUTE), 1283 DST_ELEM(ELEM_Z), 1284 CLAMP(1)); 1285 /* 6 - alu 3 */ 1286 /* MUL gpr[2].w gpr[1].w gpr[0].w */ 1287 shader[i++] = ALU_DWORD0(SRC0_SEL(1), 1288 SRC0_REL(ABSOLUTE), 1289 SRC0_ELEM(ELEM_W), 1290 SRC0_NEG(0), 1291 SRC1_SEL(0), 1292 SRC1_REL(ABSOLUTE), 1293 SRC1_ELEM(ELEM_W), 1294 SRC1_NEG(0), 1295 INDEX_MODE(SQ_INDEX_LOOP), 1296 PRED_SEL(SQ_PRED_SEL_OFF), 1297 LAST(1)); 1298 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1299 SRC0_ABS(0), 1300 SRC1_ABS(0), 1301 UPDATE_EXECUTE_MASK(0), 1302 UPDATE_PRED(0), 1303 WRITE_MASK(1), 1304 FOG_MERGE(0), 1305 OMOD(SQ_ALU_OMOD_OFF), 1306 ALU_INST(SQ_OP2_INST_MUL), 1307 BANK_SWIZZLE(SQ_ALU_VEC_012), 1308 DST_GPR(2), 1309 DST_REL(ABSOLUTE), 1310 DST_ELEM(ELEM_W), 1311 CLAMP(1)); 1312 /* 7 */ 1313 shader[i++] = 0x00000000; 1314 shader[i++] = 0x00000000; 1315 1316 /* 8/9 - src */ 1317 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1318 BC_FRAC_MODE(0), 1319 FETCH_WHOLE_QUAD(0), 1320 RESOURCE_ID(0), 1321 SRC_GPR(0), 1322 SRC_REL(ABSOLUTE), 1323 R7xx_ALT_CONST(0)); 1324 shader[i++] = TEX_DWORD1(DST_GPR(0), 1325 DST_REL(ABSOLUTE), 1326 DST_SEL_X(SQ_SEL_X), 1327 DST_SEL_Y(SQ_SEL_Y), 1328 DST_SEL_Z(SQ_SEL_Z), 1329 DST_SEL_W(SQ_SEL_W), 1330 LOD_BIAS(0), 1331 COORD_TYPE_X(TEX_NORMALIZED), 1332 COORD_TYPE_Y(TEX_NORMALIZED), 1333 COORD_TYPE_Z(TEX_NORMALIZED), 1334 COORD_TYPE_W(TEX_NORMALIZED)); 1335 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1336 OFFSET_Y(0), 1337 OFFSET_Z(0), 1338 SAMPLER_ID(0), 1339 SRC_SEL_X(SQ_SEL_X), 1340 SRC_SEL_Y(SQ_SEL_Y), 1341 SRC_SEL_Z(SQ_SEL_0), 1342 SRC_SEL_W(SQ_SEL_1)); 1343 shader[i++] = TEX_DWORD_PAD; 1344 /* 10/11 - mask */ 1345 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 1346 BC_FRAC_MODE(0), 1347 FETCH_WHOLE_QUAD(0), 1348 RESOURCE_ID(1), 1349 SRC_GPR(1), 1350 SRC_REL(ABSOLUTE), 1351 R7xx_ALT_CONST(0)); 1352 shader[i++] = TEX_DWORD1(DST_GPR(1), 1353 DST_REL(ABSOLUTE), 1354 DST_SEL_X(SQ_SEL_X), 1355 DST_SEL_Y(SQ_SEL_Y), 1356 DST_SEL_Z(SQ_SEL_Z), 1357 DST_SEL_W(SQ_SEL_W), 1358 LOD_BIAS(0), 1359 COORD_TYPE_X(TEX_NORMALIZED), 1360 COORD_TYPE_Y(TEX_NORMALIZED), 1361 COORD_TYPE_Z(TEX_NORMALIZED), 1362 COORD_TYPE_W(TEX_NORMALIZED)); 1363 shader[i++] = TEX_DWORD2(OFFSET_X(0), 1364 OFFSET_Y(0), 1365 OFFSET_Z(0), 1366 SAMPLER_ID(1), 1367 SRC_SEL_X(SQ_SEL_X), 1368 SRC_SEL_Y(SQ_SEL_Y), 1369 SRC_SEL_Z(SQ_SEL_0), 1370 SRC_SEL_W(SQ_SEL_1)); 1371 shader[i++] = TEX_DWORD_PAD; 1372 1373 return i; 1374} 1375 1376/* comp vs --------------------------------------- */ 1377int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) 1378{ 1379 int i = 0; 1380 1381 /* 0 */ 1382 shader[i++] = CF_DWORD0(ADDR(3)); 1383 shader[i++] = CF_DWORD1(POP_COUNT(0), 1384 CF_CONST(0), 1385 COND(SQ_CF_COND_BOOL), 1386 I_COUNT(0), 1387 CALL_COUNT(0), 1388 END_OF_PROGRAM(0), 1389 VALID_PIXEL_MODE(0), 1390 CF_INST(SQ_CF_INST_CALL), 1391 WHOLE_QUAD_MODE(0), 1392 BARRIER(0)); 1393 /* 1 */ 1394 shader[i++] = CF_DWORD0(ADDR(28)); 1395 shader[i++] = CF_DWORD1(POP_COUNT(0), 1396 CF_CONST(0), 1397 COND(SQ_CF_COND_NOT_BOOL), 1398 I_COUNT(0), 1399 CALL_COUNT(0), 1400 END_OF_PROGRAM(0), 1401 VALID_PIXEL_MODE(0), 1402 CF_INST(SQ_CF_INST_CALL), 1403 WHOLE_QUAD_MODE(0), 1404 BARRIER(0)); 1405 /* 2 */ 1406 shader[i++] = CF_DWORD0(ADDR(0)); 1407 shader[i++] = CF_DWORD1(POP_COUNT(0), 1408 CF_CONST(0), 1409 COND(SQ_CF_COND_ACTIVE), 1410 I_COUNT(0), 1411 CALL_COUNT(0), 1412 END_OF_PROGRAM(1), 1413 VALID_PIXEL_MODE(0), 1414 CF_INST(SQ_CF_INST_NOP), 1415 WHOLE_QUAD_MODE(0), 1416 BARRIER(1)); 1417 /* 3 - mask sub */ 1418 shader[i++] = CF_DWORD0(ADDR(22)); 1419 shader[i++] = CF_DWORD1(POP_COUNT(0), 1420 CF_CONST(0), 1421 COND(SQ_CF_COND_ACTIVE), 1422 I_COUNT(3), 1423 CALL_COUNT(0), 1424 END_OF_PROGRAM(0), 1425 VALID_PIXEL_MODE(0), 1426 CF_INST(SQ_CF_INST_VTX), 1427 WHOLE_QUAD_MODE(0), 1428 BARRIER(1)); 1429 1430 /* 4 - ALU */ 1431 shader[i++] = CF_ALU_DWORD0(ADDR(9), 1432 KCACHE_BANK0(0), 1433 KCACHE_BANK1(0), 1434 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 1435 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1436 KCACHE_ADDR0(0), 1437 KCACHE_ADDR1(0), 1438 I_COUNT(12), 1439 USES_WATERFALL(0), 1440 CF_INST(SQ_CF_INST_ALU), 1441 WHOLE_QUAD_MODE(0), 1442 BARRIER(1)); 1443 1444 /* 5 - dst */ 1445 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1446 TYPE(SQ_EXPORT_POS), 1447 RW_GPR(2), 1448 RW_REL(ABSOLUTE), 1449 INDEX_GPR(0), 1450 ELEM_SIZE(0)); 1451 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1452 SRC_SEL_Y(SQ_SEL_Y), 1453 SRC_SEL_Z(SQ_SEL_0), 1454 SRC_SEL_W(SQ_SEL_1), 1455 R6xx_ELEM_LOOP(0), 1456 BURST_COUNT(1), 1457 END_OF_PROGRAM(0), 1458 VALID_PIXEL_MODE(0), 1459 CF_INST(SQ_CF_INST_EXPORT_DONE), 1460 WHOLE_QUAD_MODE(0), 1461 BARRIER(1)); 1462 /* 6 - src */ 1463 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1464 TYPE(SQ_EXPORT_PARAM), 1465 RW_GPR(1), 1466 RW_REL(ABSOLUTE), 1467 INDEX_GPR(0), 1468 ELEM_SIZE(0)); 1469 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1470 SRC_SEL_Y(SQ_SEL_Y), 1471 SRC_SEL_Z(SQ_SEL_0), 1472 SRC_SEL_W(SQ_SEL_1), 1473 R6xx_ELEM_LOOP(0), 1474 BURST_COUNT(1), 1475 END_OF_PROGRAM(0), 1476 VALID_PIXEL_MODE(0), 1477 CF_INST(SQ_CF_INST_EXPORT), 1478 WHOLE_QUAD_MODE(0), 1479 BARRIER(0)); 1480 /* 7 - mask */ 1481 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), 1482 TYPE(SQ_EXPORT_PARAM), 1483 RW_GPR(0), 1484 RW_REL(ABSOLUTE), 1485 INDEX_GPR(0), 1486 ELEM_SIZE(0)); 1487 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1488 SRC_SEL_Y(SQ_SEL_Y), 1489 SRC_SEL_Z(SQ_SEL_0), 1490 SRC_SEL_W(SQ_SEL_1), 1491 R6xx_ELEM_LOOP(0), 1492 BURST_COUNT(1), 1493 END_OF_PROGRAM(0), 1494 VALID_PIXEL_MODE(0), 1495 CF_INST(SQ_CF_INST_EXPORT_DONE), 1496 WHOLE_QUAD_MODE(0), 1497 BARRIER(0)); 1498 /* 8 */ 1499 shader[i++] = CF_DWORD0(ADDR(0)); 1500 shader[i++] = CF_DWORD1(POP_COUNT(0), 1501 CF_CONST(0), 1502 COND(SQ_CF_COND_ACTIVE), 1503 I_COUNT(0), 1504 CALL_COUNT(0), 1505 END_OF_PROGRAM(0), 1506 VALID_PIXEL_MODE(0), 1507 CF_INST(SQ_CF_INST_RETURN), 1508 WHOLE_QUAD_MODE(0), 1509 BARRIER(1)); 1510 1511 1512 /* 9 srcX MAD */ 1513 shader[i++] = ALU_DWORD0(SRC0_SEL(256), 1514 SRC0_REL(ABSOLUTE), 1515 SRC0_ELEM(ELEM_Y), 1516 SRC0_NEG(0), 1517 SRC1_SEL(1), 1518 SRC1_REL(ABSOLUTE), 1519 SRC1_ELEM(ELEM_Y), 1520 SRC1_NEG(0), 1521 INDEX_MODE(SQ_INDEX_LOOP), 1522 PRED_SEL(SQ_PRED_SEL_OFF), 1523 LAST(1)); 1524 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), 1525 SRC2_REL(ABSOLUTE), 1526 SRC2_ELEM(ELEM_Z), 1527 SRC2_NEG(0), 1528 ALU_INST(SQ_OP3_INST_MULADD), 1529 BANK_SWIZZLE(SQ_ALU_VEC_012), 1530 DST_GPR(1), 1531 DST_REL(ABSOLUTE), 1532 DST_ELEM(ELEM_Z), 1533 CLAMP(0)); 1534 /* 10 srcY MAD */ 1535 shader[i++] = ALU_DWORD0(SRC0_SEL(257), 1536 SRC0_REL(ABSOLUTE), 1537 SRC0_ELEM(ELEM_Y), 1538 SRC0_NEG(0), 1539 SRC1_SEL(1), 1540 SRC1_REL(ABSOLUTE), 1541 SRC1_ELEM(ELEM_Y), 1542 SRC1_NEG(0), 1543 INDEX_MODE(SQ_INDEX_LOOP), 1544 PRED_SEL(SQ_PRED_SEL_OFF), 1545 LAST(1)); 1546 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257), 1547 SRC2_REL(ABSOLUTE), 1548 SRC2_ELEM(ELEM_Z), 1549 SRC2_NEG(0), 1550 ALU_INST(SQ_OP3_INST_MULADD), 1551 BANK_SWIZZLE(SQ_ALU_VEC_012), 1552 DST_GPR(1), 1553 DST_REL(ABSOLUTE), 1554 DST_ELEM(ELEM_W), 1555 CLAMP(0)); 1556 1557 /* 11 srcX MAD */ 1558 shader[i++] = ALU_DWORD0(SRC0_SEL(256), 1559 SRC0_REL(ABSOLUTE), 1560 SRC0_ELEM(ELEM_X), 1561 SRC0_NEG(0), 1562 SRC1_SEL(1), 1563 SRC1_REL(ABSOLUTE), 1564 SRC1_ELEM(ELEM_X), 1565 SRC1_NEG(0), 1566 INDEX_MODE(SQ_INDEX_LOOP), 1567 PRED_SEL(SQ_PRED_SEL_OFF), 1568 LAST(0)); 1569 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1), 1570 SRC2_REL(ABSOLUTE), 1571 SRC2_ELEM(ELEM_Z), 1572 SRC2_NEG(0), 1573 ALU_INST(SQ_OP3_INST_MULADD), 1574 BANK_SWIZZLE(SQ_ALU_VEC_012), 1575 DST_GPR(1), 1576 DST_REL(ABSOLUTE), 1577 DST_ELEM(ELEM_X), 1578 CLAMP(0)); 1579 /* 12 srcY MAD */ 1580 shader[i++] = ALU_DWORD0(SRC0_SEL(257), 1581 SRC0_REL(ABSOLUTE), 1582 SRC0_ELEM(ELEM_X), 1583 SRC0_NEG(0), 1584 SRC1_SEL(1), 1585 SRC1_REL(ABSOLUTE), 1586 SRC1_ELEM(ELEM_X), 1587 SRC1_NEG(0), 1588 INDEX_MODE(SQ_INDEX_LOOP), 1589 PRED_SEL(SQ_PRED_SEL_OFF), 1590 LAST(1)); 1591 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1), 1592 SRC2_REL(ABSOLUTE), 1593 SRC2_ELEM(ELEM_W), 1594 SRC2_NEG(0), 1595 ALU_INST(SQ_OP3_INST_MULADD), 1596 BANK_SWIZZLE(SQ_ALU_VEC_012), 1597 DST_GPR(1), 1598 DST_REL(ABSOLUTE), 1599 DST_ELEM(ELEM_Y), 1600 CLAMP(0)); 1601 1602 /* 13 maskX MAD */ 1603 shader[i++] = ALU_DWORD0(SRC0_SEL(258), 1604 SRC0_REL(ABSOLUTE), 1605 SRC0_ELEM(ELEM_Y), 1606 SRC0_NEG(0), 1607 SRC1_SEL(0), 1608 SRC1_REL(ABSOLUTE), 1609 SRC1_ELEM(ELEM_Y), 1610 SRC1_NEG(0), 1611 INDEX_MODE(SQ_INDEX_LOOP), 1612 PRED_SEL(SQ_PRED_SEL_OFF), 1613 LAST(1)); 1614 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(258), 1615 SRC2_REL(ABSOLUTE), 1616 SRC2_ELEM(ELEM_Z), 1617 SRC2_NEG(0), 1618 ALU_INST(SQ_OP3_INST_MULADD), 1619 BANK_SWIZZLE(SQ_ALU_VEC_012), 1620 DST_GPR(0), 1621 DST_REL(ABSOLUTE), 1622 DST_ELEM(ELEM_Z), 1623 CLAMP(0)); 1624 1625 /* 14 maskY MAD */ 1626 shader[i++] = ALU_DWORD0(SRC0_SEL(259), 1627 SRC0_REL(ABSOLUTE), 1628 SRC0_ELEM(ELEM_Y), 1629 SRC0_NEG(0), 1630 SRC1_SEL(0), 1631 SRC1_REL(ABSOLUTE), 1632 SRC1_ELEM(ELEM_Y), 1633 SRC1_NEG(0), 1634 INDEX_MODE(SQ_INDEX_LOOP), 1635 PRED_SEL(SQ_PRED_SEL_OFF), 1636 LAST(1)); 1637 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), 1638 SRC2_REL(ABSOLUTE), 1639 SRC2_ELEM(ELEM_Z), 1640 SRC2_NEG(0), 1641 ALU_INST(SQ_OP3_INST_MULADD), 1642 BANK_SWIZZLE(SQ_ALU_VEC_012), 1643 DST_GPR(0), 1644 DST_REL(ABSOLUTE), 1645 DST_ELEM(ELEM_W), 1646 CLAMP(0)); 1647 1648 /* 15 srcX MAD */ 1649 shader[i++] = ALU_DWORD0(SRC0_SEL(258), 1650 SRC0_REL(ABSOLUTE), 1651 SRC0_ELEM(ELEM_X), 1652 SRC0_NEG(0), 1653 SRC1_SEL(0), 1654 SRC1_REL(ABSOLUTE), 1655 SRC1_ELEM(ELEM_X), 1656 SRC1_NEG(0), 1657 INDEX_MODE(SQ_INDEX_LOOP), 1658 PRED_SEL(SQ_PRED_SEL_OFF), 1659 LAST(0)); 1660 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), 1661 SRC2_REL(ABSOLUTE), 1662 SRC2_ELEM(ELEM_Z), 1663 SRC2_NEG(0), 1664 ALU_INST(SQ_OP3_INST_MULADD), 1665 BANK_SWIZZLE(SQ_ALU_VEC_012), 1666 DST_GPR(0), 1667 DST_REL(ABSOLUTE), 1668 DST_ELEM(ELEM_X), 1669 CLAMP(0)); 1670 /* 16 srcY MAD */ 1671 shader[i++] = ALU_DWORD0(SRC0_SEL(259), 1672 SRC0_REL(ABSOLUTE), 1673 SRC0_ELEM(ELEM_X), 1674 SRC0_NEG(0), 1675 SRC1_SEL(0), 1676 SRC1_REL(ABSOLUTE), 1677 SRC1_ELEM(ELEM_X), 1678 SRC1_NEG(0), 1679 INDEX_MODE(SQ_INDEX_LOOP), 1680 PRED_SEL(SQ_PRED_SEL_OFF), 1681 LAST(1)); 1682 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), 1683 SRC2_REL(ABSOLUTE), 1684 SRC2_ELEM(ELEM_W), 1685 SRC2_NEG(0), 1686 ALU_INST(SQ_OP3_INST_MULADD), 1687 BANK_SWIZZLE(SQ_ALU_VEC_012), 1688 DST_GPR(0), 1689 DST_REL(ABSOLUTE), 1690 DST_ELEM(ELEM_Y), 1691 CLAMP(0)); 1692 1693 /* 17 srcX / w */ 1694 shader[i++] = ALU_DWORD0(SRC0_SEL(1), 1695 SRC0_REL(ABSOLUTE), 1696 SRC0_ELEM(ELEM_X), 1697 SRC0_NEG(0), 1698 SRC1_SEL(256), 1699 SRC1_REL(ABSOLUTE), 1700 SRC1_ELEM(ELEM_W), 1701 SRC1_NEG(0), 1702 INDEX_MODE(SQ_INDEX_AR_X), 1703 PRED_SEL(SQ_PRED_SEL_OFF), 1704 LAST(1)); 1705 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1706 SRC0_ABS(0), 1707 SRC1_ABS(0), 1708 UPDATE_EXECUTE_MASK(0), 1709 UPDATE_PRED(0), 1710 WRITE_MASK(1), 1711 FOG_MERGE(0), 1712 OMOD(SQ_ALU_OMOD_OFF), 1713 ALU_INST(SQ_OP2_INST_MUL), 1714 BANK_SWIZZLE(SQ_ALU_VEC_012), 1715 DST_GPR(1), 1716 DST_REL(ABSOLUTE), 1717 DST_ELEM(ELEM_X), 1718 CLAMP(0)); 1719 1720 /* 18 srcY / h */ 1721 shader[i++] = ALU_DWORD0(SRC0_SEL(1), 1722 SRC0_REL(ABSOLUTE), 1723 SRC0_ELEM(ELEM_Y), 1724 SRC0_NEG(0), 1725 SRC1_SEL(257), 1726 SRC1_REL(ABSOLUTE), 1727 SRC1_ELEM(ELEM_W), 1728 SRC1_NEG(0), 1729 INDEX_MODE(SQ_INDEX_AR_X), 1730 PRED_SEL(SQ_PRED_SEL_OFF), 1731 LAST(1)); 1732 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1733 SRC0_ABS(0), 1734 SRC1_ABS(0), 1735 UPDATE_EXECUTE_MASK(0), 1736 UPDATE_PRED(0), 1737 WRITE_MASK(1), 1738 FOG_MERGE(0), 1739 OMOD(SQ_ALU_OMOD_OFF), 1740 ALU_INST(SQ_OP2_INST_MUL), 1741 BANK_SWIZZLE(SQ_ALU_VEC_012), 1742 DST_GPR(1), 1743 DST_REL(ABSOLUTE), 1744 DST_ELEM(ELEM_Y), 1745 CLAMP(0)); 1746 1747 /* 19 maskX / w */ 1748 shader[i++] = ALU_DWORD0(SRC0_SEL(0), 1749 SRC0_REL(ABSOLUTE), 1750 SRC0_ELEM(ELEM_X), 1751 SRC0_NEG(0), 1752 SRC1_SEL(258), 1753 SRC1_REL(ABSOLUTE), 1754 SRC1_ELEM(ELEM_W), 1755 SRC1_NEG(0), 1756 INDEX_MODE(SQ_INDEX_AR_X), 1757 PRED_SEL(SQ_PRED_SEL_OFF), 1758 LAST(1)); 1759 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1760 SRC0_ABS(0), 1761 SRC1_ABS(0), 1762 UPDATE_EXECUTE_MASK(0), 1763 UPDATE_PRED(0), 1764 WRITE_MASK(1), 1765 FOG_MERGE(0), 1766 OMOD(SQ_ALU_OMOD_OFF), 1767 ALU_INST(SQ_OP2_INST_MUL), 1768 BANK_SWIZZLE(SQ_ALU_VEC_012), 1769 DST_GPR(0), 1770 DST_REL(ABSOLUTE), 1771 DST_ELEM(ELEM_X), 1772 CLAMP(0)); 1773 1774 /* 20 maskY / h */ 1775 shader[i++] = ALU_DWORD0(SRC0_SEL(0), 1776 SRC0_REL(ABSOLUTE), 1777 SRC0_ELEM(ELEM_Y), 1778 SRC0_NEG(0), 1779 SRC1_SEL(259), 1780 SRC1_REL(ABSOLUTE), 1781 SRC1_ELEM(ELEM_W), 1782 SRC1_NEG(0), 1783 INDEX_MODE(SQ_INDEX_AR_X), 1784 PRED_SEL(SQ_PRED_SEL_OFF), 1785 LAST(1)); 1786 shader[i++] = ALU_DWORD1_OP2(ChipSet, 1787 SRC0_ABS(0), 1788 SRC1_ABS(0), 1789 UPDATE_EXECUTE_MASK(0), 1790 UPDATE_PRED(0), 1791 WRITE_MASK(1), 1792 FOG_MERGE(0), 1793 OMOD(SQ_ALU_OMOD_OFF), 1794 ALU_INST(SQ_OP2_INST_MUL), 1795 BANK_SWIZZLE(SQ_ALU_VEC_012), 1796 DST_GPR(0), 1797 DST_REL(ABSOLUTE), 1798 DST_ELEM(ELEM_Y), 1799 CLAMP(0)); 1800 /* 21 */ 1801 shader[i++] = 0x00000000; 1802 shader[i++] = 0x00000000; 1803 1804 /* 22/23 - dst */ 1805 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 1806 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 1807 FETCH_WHOLE_QUAD(0), 1808 BUFFER_ID(0), 1809 SRC_GPR(0), 1810 SRC_REL(ABSOLUTE), 1811 SRC_SEL_X(SQ_SEL_X), 1812 MEGA_FETCH_COUNT(24)); 1813 shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), 1814 DST_REL(0), 1815 DST_SEL_X(SQ_SEL_X), 1816 DST_SEL_Y(SQ_SEL_Y), 1817 DST_SEL_Z(SQ_SEL_0), 1818 DST_SEL_W(SQ_SEL_1), 1819 USE_CONST_FIELDS(0), 1820 DATA_FORMAT(FMT_32_32_FLOAT), 1821 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 1822 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 1823 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 1824 shader[i++] = VTX_DWORD2(OFFSET(0), 1825 ENDIAN_SWAP(ENDIAN_NONE), 1826 CONST_BUF_NO_STRIDE(0), 1827 MEGA_FETCH(1)); 1828 shader[i++] = VTX_DWORD_PAD; 1829 /* 24/25 - src */ 1830 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 1831 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 1832 FETCH_WHOLE_QUAD(0), 1833 BUFFER_ID(0), 1834 SRC_GPR(0), 1835 SRC_REL(ABSOLUTE), 1836 SRC_SEL_X(SQ_SEL_X), 1837 MEGA_FETCH_COUNT(8)); 1838 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 1839 DST_REL(0), 1840 DST_SEL_X(SQ_SEL_X), 1841 DST_SEL_Y(SQ_SEL_Y), 1842 DST_SEL_Z(SQ_SEL_1), 1843 DST_SEL_W(SQ_SEL_0), 1844 USE_CONST_FIELDS(0), 1845 DATA_FORMAT(FMT_32_32_FLOAT), 1846 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 1847 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 1848 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 1849 shader[i++] = VTX_DWORD2(OFFSET(8), 1850 ENDIAN_SWAP(ENDIAN_NONE), 1851 CONST_BUF_NO_STRIDE(0), 1852 MEGA_FETCH(0)); 1853 shader[i++] = VTX_DWORD_PAD; 1854 /* 26/27 - mask */ 1855 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 1856 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 1857 FETCH_WHOLE_QUAD(0), 1858 BUFFER_ID(0), 1859 SRC_GPR(0), 1860 SRC_REL(ABSOLUTE), 1861 SRC_SEL_X(SQ_SEL_X), 1862 MEGA_FETCH_COUNT(8)); 1863 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 1864 DST_REL(0), 1865 DST_SEL_X(SQ_SEL_X), 1866 DST_SEL_Y(SQ_SEL_Y), 1867 DST_SEL_Z(SQ_SEL_1), 1868 DST_SEL_W(SQ_SEL_0), 1869 USE_CONST_FIELDS(0), 1870 DATA_FORMAT(FMT_32_32_FLOAT), 1871 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 1872 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 1873 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 1874 shader[i++] = VTX_DWORD2(OFFSET(16), 1875 ENDIAN_SWAP(ENDIAN_NONE), 1876 CONST_BUF_NO_STRIDE(0), 1877 MEGA_FETCH(0)); 1878 shader[i++] = VTX_DWORD_PAD; 1879 1880 /* 28 - non-mask sub */ 1881 shader[i++] = CF_DWORD0(ADDR(40)); 1882 shader[i++] = CF_DWORD1(POP_COUNT(0), 1883 CF_CONST(0), 1884 COND(SQ_CF_COND_ACTIVE), 1885 I_COUNT(2), 1886 CALL_COUNT(0), 1887 END_OF_PROGRAM(0), 1888 VALID_PIXEL_MODE(0), 1889 CF_INST(SQ_CF_INST_VTX), 1890 WHOLE_QUAD_MODE(0), 1891 BARRIER(1)); 1892 1893 /* 29 - ALU */ 1894 shader[i++] = CF_ALU_DWORD0(ADDR(33), 1895 KCACHE_BANK0(0), 1896 KCACHE_BANK1(0), 1897 KCACHE_MODE0(SQ_CF_KCACHE_NOP)); 1898 shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), 1899 KCACHE_ADDR0(0), 1900 KCACHE_ADDR1(0), 1901 I_COUNT(6), 1902 USES_WATERFALL(0), 1903 CF_INST(SQ_CF_INST_ALU), 1904 WHOLE_QUAD_MODE(0), 1905 BARRIER(1)); 1906 1907 /* 30 - dst */ 1908 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), 1909 TYPE(SQ_EXPORT_POS), 1910 RW_GPR(1), 1911 RW_REL(ABSOLUTE), 1912 INDEX_GPR(0), 1913 ELEM_SIZE(0)); 1914 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1915 SRC_SEL_Y(SQ_SEL_Y), 1916 SRC_SEL_Z(SQ_SEL_0), 1917 SRC_SEL_W(SQ_SEL_1), 1918 R6xx_ELEM_LOOP(0), 1919 BURST_COUNT(0), 1920 END_OF_PROGRAM(0), 1921 VALID_PIXEL_MODE(0), 1922 CF_INST(SQ_CF_INST_EXPORT_DONE), 1923 WHOLE_QUAD_MODE(0), 1924 BARRIER(1)); 1925 /* 31 - src */ 1926 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), 1927 TYPE(SQ_EXPORT_PARAM), 1928 RW_GPR(0), 1929 RW_REL(ABSOLUTE), 1930 INDEX_GPR(0), 1931 ELEM_SIZE(0)); 1932 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 1933 SRC_SEL_Y(SQ_SEL_Y), 1934 SRC_SEL_Z(SQ_SEL_0), 1935 SRC_SEL_W(SQ_SEL_1), 1936 R6xx_ELEM_LOOP(0), 1937 BURST_COUNT(0), 1938 END_OF_PROGRAM(0), 1939 VALID_PIXEL_MODE(0), 1940 CF_INST(SQ_CF_INST_EXPORT_DONE), 1941 WHOLE_QUAD_MODE(0), 1942 BARRIER(0)); 1943 /* 32 */ 1944 shader[i++] = CF_DWORD0(ADDR(0)); 1945 shader[i++] = CF_DWORD1(POP_COUNT(0), 1946 CF_CONST(0), 1947 COND(SQ_CF_COND_ACTIVE), 1948 I_COUNT(0), 1949 CALL_COUNT(0), 1950 END_OF_PROGRAM(0), 1951 VALID_PIXEL_MODE(0), 1952 CF_INST(SQ_CF_INST_RETURN), 1953 WHOLE_QUAD_MODE(0), 1954 BARRIER(1)); 1955 1956 1957 /* 33 srcX MAD */ 1958 shader[i++] = ALU_DWORD0(SRC0_SEL(256), 1959 SRC0_REL(ABSOLUTE), 1960 SRC0_ELEM(ELEM_Y), 1961 SRC0_NEG(0), 1962 SRC1_SEL(0), 1963 SRC1_REL(ABSOLUTE), 1964 SRC1_ELEM(ELEM_Y), 1965 SRC1_NEG(0), 1966 INDEX_MODE(SQ_INDEX_LOOP), 1967 PRED_SEL(SQ_PRED_SEL_OFF), 1968 LAST(1)); 1969 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), 1970 SRC2_REL(ABSOLUTE), 1971 SRC2_ELEM(ELEM_Z), 1972 SRC2_NEG(0), 1973 ALU_INST(SQ_OP3_INST_MULADD), 1974 BANK_SWIZZLE(SQ_ALU_VEC_012), 1975 DST_GPR(0), 1976 DST_REL(ABSOLUTE), 1977 DST_ELEM(ELEM_Z), 1978 CLAMP(0)); 1979 /* 34 srcY MAD */ 1980 shader[i++] = ALU_DWORD0(SRC0_SEL(257), 1981 SRC0_REL(ABSOLUTE), 1982 SRC0_ELEM(ELEM_Y), 1983 SRC0_NEG(0), 1984 SRC1_SEL(0), 1985 SRC1_REL(ABSOLUTE), 1986 SRC1_ELEM(ELEM_Y), 1987 SRC1_NEG(0), 1988 INDEX_MODE(SQ_INDEX_LOOP), 1989 PRED_SEL(SQ_PRED_SEL_OFF), 1990 LAST(1)); 1991 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257), 1992 SRC2_REL(ABSOLUTE), 1993 SRC2_ELEM(ELEM_Z), 1994 SRC2_NEG(0), 1995 ALU_INST(SQ_OP3_INST_MULADD), 1996 BANK_SWIZZLE(SQ_ALU_VEC_012), 1997 DST_GPR(0), 1998 DST_REL(ABSOLUTE), 1999 DST_ELEM(ELEM_W), 2000 CLAMP(0)); 2001 2002 /* 35 srcX MAD */ 2003 shader[i++] = ALU_DWORD0(SRC0_SEL(256), 2004 SRC0_REL(ABSOLUTE), 2005 SRC0_ELEM(ELEM_X), 2006 SRC0_NEG(0), 2007 SRC1_SEL(0), 2008 SRC1_REL(ABSOLUTE), 2009 SRC1_ELEM(ELEM_X), 2010 SRC1_NEG(0), 2011 INDEX_MODE(SQ_INDEX_LOOP), 2012 PRED_SEL(SQ_PRED_SEL_OFF), 2013 LAST(0)); 2014 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), 2015 SRC2_REL(ABSOLUTE), 2016 SRC2_ELEM(ELEM_Z), 2017 SRC2_NEG(0), 2018 ALU_INST(SQ_OP3_INST_MULADD), 2019 BANK_SWIZZLE(SQ_ALU_VEC_012), 2020 DST_GPR(0), 2021 DST_REL(ABSOLUTE), 2022 DST_ELEM(ELEM_X), 2023 CLAMP(0)); 2024 /* 36 srcY MAD */ 2025 shader[i++] = ALU_DWORD0(SRC0_SEL(257), 2026 SRC0_REL(ABSOLUTE), 2027 SRC0_ELEM(ELEM_X), 2028 SRC0_NEG(0), 2029 SRC1_SEL(0), 2030 SRC1_REL(ABSOLUTE), 2031 SRC1_ELEM(ELEM_X), 2032 SRC1_NEG(0), 2033 INDEX_MODE(SQ_INDEX_LOOP), 2034 PRED_SEL(SQ_PRED_SEL_OFF), 2035 LAST(1)); 2036 shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), 2037 SRC2_REL(ABSOLUTE), 2038 SRC2_ELEM(ELEM_W), 2039 SRC2_NEG(0), 2040 ALU_INST(SQ_OP3_INST_MULADD), 2041 BANK_SWIZZLE(SQ_ALU_VEC_012), 2042 DST_GPR(0), 2043 DST_REL(ABSOLUTE), 2044 DST_ELEM(ELEM_Y), 2045 CLAMP(0)); 2046 /* 37 srcX / w */ 2047 shader[i++] = ALU_DWORD0(SRC0_SEL(0), 2048 SRC0_REL(ABSOLUTE), 2049 SRC0_ELEM(ELEM_X), 2050 SRC0_NEG(0), 2051 SRC1_SEL(256), 2052 SRC1_REL(ABSOLUTE), 2053 SRC1_ELEM(ELEM_W), 2054 SRC1_NEG(0), 2055 INDEX_MODE(SQ_INDEX_AR_X), 2056 PRED_SEL(SQ_PRED_SEL_OFF), 2057 LAST(1)); 2058 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2059 SRC0_ABS(0), 2060 SRC1_ABS(0), 2061 UPDATE_EXECUTE_MASK(0), 2062 UPDATE_PRED(0), 2063 WRITE_MASK(1), 2064 FOG_MERGE(0), 2065 OMOD(SQ_ALU_OMOD_OFF), 2066 ALU_INST(SQ_OP2_INST_MUL), 2067 BANK_SWIZZLE(SQ_ALU_VEC_012), 2068 DST_GPR(0), 2069 DST_REL(ABSOLUTE), 2070 DST_ELEM(ELEM_X), 2071 CLAMP(0)); 2072 2073 /* 38 srcY / h */ 2074 shader[i++] = ALU_DWORD0(SRC0_SEL(0), 2075 SRC0_REL(ABSOLUTE), 2076 SRC0_ELEM(ELEM_Y), 2077 SRC0_NEG(0), 2078 SRC1_SEL(257), 2079 SRC1_REL(ABSOLUTE), 2080 SRC1_ELEM(ELEM_W), 2081 SRC1_NEG(0), 2082 INDEX_MODE(SQ_INDEX_AR_X), 2083 PRED_SEL(SQ_PRED_SEL_OFF), 2084 LAST(1)); 2085 shader[i++] = ALU_DWORD1_OP2(ChipSet, 2086 SRC0_ABS(0), 2087 SRC1_ABS(0), 2088 UPDATE_EXECUTE_MASK(0), 2089 UPDATE_PRED(0), 2090 WRITE_MASK(1), 2091 FOG_MERGE(0), 2092 OMOD(SQ_ALU_OMOD_OFF), 2093 ALU_INST(SQ_OP2_INST_MUL), 2094 BANK_SWIZZLE(SQ_ALU_VEC_012), 2095 DST_GPR(0), 2096 DST_REL(ABSOLUTE), 2097 DST_ELEM(ELEM_Y), 2098 CLAMP(0)); 2099 2100 /* 39 */ 2101 shader[i++] = 0x00000000; 2102 shader[i++] = 0x00000000; 2103 2104 /* 40/41 - dst */ 2105 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2106 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2107 FETCH_WHOLE_QUAD(0), 2108 BUFFER_ID(0), 2109 SRC_GPR(0), 2110 SRC_REL(ABSOLUTE), 2111 SRC_SEL_X(SQ_SEL_X), 2112 MEGA_FETCH_COUNT(16)); 2113 shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), 2114 DST_REL(0), 2115 DST_SEL_X(SQ_SEL_X), 2116 DST_SEL_Y(SQ_SEL_Y), 2117 DST_SEL_Z(SQ_SEL_0), 2118 DST_SEL_W(SQ_SEL_1), 2119 USE_CONST_FIELDS(0), 2120 DATA_FORMAT(FMT_32_32_FLOAT), 2121 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2122 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2123 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2124 shader[i++] = VTX_DWORD2(OFFSET(0), 2125 ENDIAN_SWAP(ENDIAN_NONE), 2126 CONST_BUF_NO_STRIDE(0), 2127 MEGA_FETCH(1)); 2128 shader[i++] = VTX_DWORD_PAD; 2129 /* 42/43 - src */ 2130 shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), 2131 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), 2132 FETCH_WHOLE_QUAD(0), 2133 BUFFER_ID(0), 2134 SRC_GPR(0), 2135 SRC_REL(ABSOLUTE), 2136 SRC_SEL_X(SQ_SEL_X), 2137 MEGA_FETCH_COUNT(8)); 2138 shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), 2139 DST_REL(0), 2140 DST_SEL_X(SQ_SEL_X), 2141 DST_SEL_Y(SQ_SEL_Y), 2142 DST_SEL_Z(SQ_SEL_1), 2143 DST_SEL_W(SQ_SEL_0), 2144 USE_CONST_FIELDS(0), 2145 DATA_FORMAT(FMT_32_32_FLOAT), 2146 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), 2147 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), 2148 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); 2149 shader[i++] = VTX_DWORD2(OFFSET(8), 2150 ENDIAN_SWAP(ENDIAN_NONE), 2151 CONST_BUF_NO_STRIDE(0), 2152 MEGA_FETCH(0)); 2153 shader[i++] = VTX_DWORD_PAD; 2154 2155 return i; 2156} 2157 2158/* comp ps --------------------------------------- */ 2159int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) 2160{ 2161 int i = 0; 2162 2163 /* 0 */ 2164 shader[i++] = CF_DWORD0(ADDR(2)); 2165 shader[i++] = CF_DWORD1(POP_COUNT(0), 2166 CF_CONST(0), 2167 COND(SQ_CF_COND_ACTIVE), 2168 I_COUNT(1), 2169 CALL_COUNT(0), 2170 END_OF_PROGRAM(0), 2171 VALID_PIXEL_MODE(0), 2172 CF_INST(SQ_CF_INST_TEX), 2173 WHOLE_QUAD_MODE(0), 2174 BARRIER(1)); 2175 /* 1 */ 2176 shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), 2177 TYPE(SQ_EXPORT_PIXEL), 2178 RW_GPR(0), 2179 RW_REL(ABSOLUTE), 2180 INDEX_GPR(0), 2181 ELEM_SIZE(1)); 2182 2183 shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), 2184 SRC_SEL_Y(SQ_SEL_Y), 2185 SRC_SEL_Z(SQ_SEL_Z), 2186 SRC_SEL_W(SQ_SEL_W), 2187 R6xx_ELEM_LOOP(0), 2188 BURST_COUNT(1), 2189 END_OF_PROGRAM(1), 2190 VALID_PIXEL_MODE(0), 2191 CF_INST(SQ_CF_INST_EXPORT_DONE), 2192 WHOLE_QUAD_MODE(0), 2193 BARRIER(1)); 2194 2195 2196 /* 2/3 - src */ 2197 shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), 2198 BC_FRAC_MODE(0), 2199 FETCH_WHOLE_QUAD(0), 2200 RESOURCE_ID(0), 2201 SRC_GPR(0), 2202 SRC_REL(ABSOLUTE), 2203 R7xx_ALT_CONST(0)); 2204 shader[i++] = TEX_DWORD1(DST_GPR(0), 2205 DST_REL(ABSOLUTE), 2206 DST_SEL_X(SQ_SEL_X), 2207 DST_SEL_Y(SQ_SEL_Y), 2208 DST_SEL_Z(SQ_SEL_Z), 2209 DST_SEL_W(SQ_SEL_W), 2210 LOD_BIAS(0), 2211 COORD_TYPE_X(TEX_NORMALIZED), 2212 COORD_TYPE_Y(TEX_NORMALIZED), 2213 COORD_TYPE_Z(TEX_NORMALIZED), 2214 COORD_TYPE_W(TEX_NORMALIZED)); 2215 shader[i++] = TEX_DWORD2(OFFSET_X(0), 2216 OFFSET_Y(0), 2217 OFFSET_Z(0), 2218 SAMPLER_ID(0), 2219 SRC_SEL_X(SQ_SEL_X), 2220 SRC_SEL_Y(SQ_SEL_Y), 2221 SRC_SEL_Z(SQ_SEL_0), 2222 SRC_SEL_W(SQ_SEL_1)); 2223 shader[i++] = TEX_DWORD_PAD; 2224 2225 return i; 2226} 2227